o
    	Ti1Q                     @   s   d dl Z d dlZd dlmZ d dlmZmZmZ d dlZd dl	m
Z
 d dlmZ d dlmZmZmZmZmZmZmZmZmZmZmZmZmZ d dlmZ d dlmZ d	d
lm Z  ddl!m"Z" ddl#m$Z$m%Z% e rod dl&m'Z' e rvd dl(Z(G dd deZ)dS )    N)Path)CallableOptionalUnion)Dataset)
DataLoader)AutoModelForCausalLMAutoTokenizerBaseImageProcessorDataCollatorDataCollatorForLanguageModelingDataCollatorForSeq2SeqFeatureExtractionMixinPreTrainedModelPreTrainedTokenizerBaseProcessorMixinTrainerTrainingArgumentsis_wandb_available)EvalLoopOutput)is_peft_available   )PPODecorators   )IterativeSFTConfig)generate_model_cardget_comet_experiment_url)	PeftModelc                       s  e Zd ZdZddgZ							d(deeef deee	e
f  dee d	eeeeeef f  d
eeeeeef  deejjejjjf deeejejgejf  deeegef  f fddZdede	defddZdejdejdejfddZedeej  deej  deej  dee dee f
ddZ!e"# 					d)deeej   deeej   deeej   deee  deee  f
ddZ$dd  Z% fd!d"Z&			d*d#ee d$ee d%eeee df fd&d'Z'  Z(S )+IterativeSFTTrainerat	  
    The IterativeSFTTrainer can be used to finetune models with methods that requires some steps between optimization.

    Args:
        model (`Union[str, PreTrainedModel]`):
            Model to be trained. Can be either:

            - A string, being the *model id* of a pretrained model hosted inside a model repo on huggingface.co, or a
              path to a *directory* containing model weights saved using
              [`~transformers.PreTrainedModel.save_pretrained`], e.g., `'./my_model_directory/'`. The model is loaded
              using [`~transformers.AutoModelForCausalLM.from_pretrained`] with the keyword arguments in
              `args.model_init_kwargs`.
            - A [`~transformers.PreTrainedModel`] object. Only causal language models are supported.
        args ([`IterativeSFTConfig`], *optional*, defaults to `None`):
            Configuration for this trainer. If `None`, a default configuration is used.
        data_collator (`DataCollator`, *optional*):
            Function to use to form a batch from a list of elements of the processed `train_dataset` or `eval_dataset`.
            Will default to [`~transformers.default_data_collator`] if no `processing_class` is provided, an instance
            of [`~transformers.DataCollatorWithPadding`] otherwise if the processing_class is a feature extractor or
            tokenizer.
        eval_dataset (`datasets.Dataset`):
            The dataset to use for evaluation.
        processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`):
            Processing class used to process the data. If `None`, the processing class is loaded from the model's name
            with [`~transformers.AutoTokenizer.from_pretrained`].
        optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`):
            The optimizer and scheduler to use for training.
        preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`):
            The function to use to preprocess the logits before computing the metrics.
        compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*):
            The function to use to compute the metrics. Must take a `EvalPrediction` and return a dictionary string to
            metric values.
    trlziterative-sftNNNmodelargsdata_collatoreval_datasetprocessing_class
optimizerspreprocess_logits_for_metricscompute_metricsc	              
      s  t |tr|n|jj}	|d u r|	dd }
t|
 d}nt |tr=t |ts=| }|j|d< |	d tdi |}|d u rFt
|	}|jd urUt |tsUtd t |tr`| ||}t rlt |trld| _nd| _|| _t|jd	d| _|d u r| jrt|d
dd| _nt| jdd| _n|| _|j| _|j| _|j| _t j||| j|||||d t| jdr| j | j! | "| j#j$ | j%&| j| j'| j(\| _| _'| _(| jdkrdnd| j_)t| dst*d| jt+_d S )N/z-IterativeSFT	hub_tokenpush_to_hub_tokenzYou passed model_init_kwargs to the `IterativeSFTConfig`, but your model is already instantiated. The `model_init_kwargs` will be ignored.TFis_encoder_decoder   )label_pad_token_idpad_to_multiple_of)mlm)r!   r"   r#   r$   r%   r(   r&   r'   add_model_tagskeep_endleftrightacceleratorzXYour `Trainer` does not have an `accelerator` object. Consider upgrading `transformers`. ),
isinstancestrconfig_name_or_pathsplitr   r   to_dictr+   popr	   from_pretrainedmodel_init_kwargswarningswarn_create_model_from_pathr   r   is_peft_modelr%   getattrr-   r   r#   r   
max_lengthtruncation_modeoptimize_device_cachesuper__init__hasattrr!   r3   
_tag_namescreate_optimizer_and_schedulerr"   	max_stepsr7   prepare	optimizerlr_schedulertruncation_sideAttributeErrorr   )selfr!   r"   r#   r$   r%   r&   r'   r(   model_id
model_name	dict_args	__class__r8   U/home/ubuntu/.local/lib/python3.10/site-packages/trl/trainer/iterative_sft_trainer.pyrK   [   sl   





zIterativeSFTTrainer.__init__
model_pathreturnc                 C   s   |j pi }tj|fi |S )z0Creates a model from a path or model identifier.)rA   r   r@   )rU   r\   r"   rA   r8   r8   r[   rD      s   
z+IterativeSFTTrainer._create_model_from_path	input_idsattention_masklabelsc                    s   |d u rdd |D } j r4 dd t|||D  jj}|dd  d|d |d  jjk< n dd t||D  jj} j	d urw j
dkr] fd	d
| D }|S  j
dkro fdd
| D }|S td j
 |S )Nc                 S   s   g | ]}t |qS r8   )torch	ones_like).0idsr8   r8   r[   
<listcomp>       z<IterativeSFTTrainer.prepare_model_inputs.<locals>.<listcomp>c                 S   s   g | ]\}}}|||d qS )r^   r_   r`   r8   )rc   rd   attlabr8   r8   r[   re      s    
decoder_input_idsr.   r`   c                 S   s   g | ]	\}}||d qS ))r^   r_   r8   )rc   rd   rh   r8   r8   r[   re      s    
keep_startc                    s    i | ]\}}||d  j  qS NrG   rc   kvrU   r8   r[   
<dictcomp>   s     z<IterativeSFTTrainer.prepare_model_inputs.<locals>.<dictcomp>r4   c                    s"   i | ]\}}|| j  d  qS rl   rm   rn   rq   r8   r[   rr      s   " zUnknown truncation mode: )r-   r#   ziptor!   devicer?   r%   pad_token_idrG   rH   items
ValueError)rU   r^   r_   r`   
input_datar8   rq   r[   prepare_model_inputs   s4   





z(IterativeSFTTrainer.prepare_model_inputstextstexts_labelsc                 C   s|  |du rs|du r=t ddg| |gD ]*\}}t|ts%t| dt| t|d tjs;td| dt|d  qnzt g d| ||gD ]*\}}t|ts[t| dt| t|d tjsqtd| dt|d  qGnDt|tstd	t| t|d tstd
t|d  |durt|tstdt| t|d tstdt|d  | ||||fS )a  
        Check if the input data is valid for training.

        Args:
            input_ids (list[`torch.LongTensor`]):
                List of tensors containing the input_ids
            attention_mask (list[`torch.LongTensor`]):
                List of tensors containing the attention_mask
            labels (list[`torch.FloatTensor`]):
                List of tensors containing the labels
            texts (list[`str`]):
                List of string containing the text input.
            texts_labels (list[`str`]):
                List of string containing the text labels.

        Returns:
            `tuple`: The input data.
        Nr^   r`   z! must be a list of tensors - got r   zElements in z must be tensors - got rg   z''text' must be a list of strings - got z)Elements in 'text' must be strings - got z.'text_labels' must be a list of strings - got z0Elements in 'text_labels' must be strings - got )rs   r9   listrx   typera   Tensorr:   )r^   r_   r`   r{   r|   nametensor_listr8   r8   r[   _step_safety_checker   s8   



z(IterativeSFTTrainer._step_safety_checkerc              	      s  j   jjdkrtdjj_	jj_
|du r'|du r'td|dur5|dur5tdt |du rD|du rDjrDtd|durN|dd nd}|durZ|dd nd}|durf|dd nd}|durr|dd nd}|dur~|dd nd}|||||\}}}}}|durj|jdddd	}|d
 |d }}|durj|jdddd	d
 }|du r|}|||}t| }i }|| fdd}	t|}
|
d t|
jjd|	d}t|D ]\} jj m  fdd|D }j |}jj dkr|! }|" }j#| jj$r8jj%dur8j&j ' jj% j()  j(*  j+durMj+)  j jd7  _ j	|7  _	,  W d   n	1 skw   Y  qdS )a  
        Run an optimisation step given a list of input_ids, attention_mask, and labels or a list of text and
        text_labels.

        Args:
            input_ids (list[`torch.LongTensor`]):
                List of tensors containing the input_ids (if not provided, text will be used)
            attention_mask (list[`torch.LongTensor`], , *optional*):
                List of tensors containing the attention_mask
            labels (list[`torch.FloatTensor`], *optional*):
                List of tensors containing the labels (if set to None, will default to input_ids)
            texts (list[`str`], *optional*):
                List of strings containing the text input (if not provided, input_ids will directly be used)
            texts_labels (list[`str`], *optional*):
                List of strings containing the text labels (if set to None, will default to text)

        Returns:
            `dict[str, Any]`: A summary of the training statistics
        r   g        Nz@Step should include `input_ids` or `texts` as keyword arguments.ztBoth `input_ids` and `texts` argument are provided. `input_ids` will be ignored. Please provide only one of the two.zNo 'labels' or 'text_labels' are provided. When using an encoder-decoder architecture, 'labels' or 'text_labels' must be passed.Tpt)rG   
truncationpaddingreturn_tensorsr^   r_   c                    sF   t  }| d D ]  dv r t fdd| D jj| < q|S )Nr   rg   c                    s   g | ]}|  qS r8   r8   )rc   dkeyr8   r[   re   i  s    z>IterativeSFTTrainer.step.<locals>.collator.<locals>.<listcomp>)dictra   stackrt   r!   ru   )datareturn_dictrq   r   r[   collatore  s   &z*IterativeSFTTrainer.step.<locals>.collatorra   )
batch_sizeshuffle
collate_fnc                    s   i | ]}| | qS r8   r8   )rc   ro   )batchr8   r[   rr   x  rf   z,IterativeSFTTrainer.step.<locals>.<dictcomp>r   )-r!   trainstateglobal_stepra   tensorrt   r"   ru   tr_loss_globalstep_last_loggedrx   rB   rC   UserWarningr-   r   r%   rG   rz   r}   keysupdater   	from_dict
set_formatr   per_device_train_batch_size	enumerater7   
accumulatecompute_lossn_gpumeandetachbackwardsync_gradientsmax_grad_normclip_grad_norm_
parametersrQ   step	zero_gradrR   _maybe_log_save_evaluate)rU   r^   r_   r`   r{   r|   model_inputsmodel_inputs_names
batch_dictr   
batch_datastep_dataloader_losstr_loss_stepr8   )r   rU   r[   r     s   









zIterativeSFTTrainer.stepc                 C   s   | j jd ur| jj| j j dkr| jjdkr| | j | j jd urf| jj| j j dkrh| jjdkrji }| | j	 
 }|  j| j8  _t|| jj| j  d|d< |  |d< | jj| _| | d S d S d S d S )Nr      r   learning_rate)r"   
eval_stepsr   r   evaluater$   logging_steps_nested_gatherr   r   itemroundr   _get_learning_ratelog)rU   logstr_loss_scalarr8   r8   r[   r     s     
z,IterativeSFTTrainer._maybe_log_save_evaluatec                    sL   | j jd u rt| j jj}n	| j jdd }| j|d t || d S )Nr)   r*   )rW   )	r"   hub_model_idr   
output_dirr   r=   create_model_cardrJ   _save_checkpoint)rU   r!   trialrW   rY   r8   r[   r     s
   z$IterativeSFTTrainer._save_checkpointrW   dataset_nametagsc              
   C   s   |   sdS t| jjdrtj| jjjs| jjj}nd}|du r&t }nt	|t
r/|h}nt|}t| jjdr?|d || j t||| j||t rXtjdurXtjjndt dd}|tj| jjd dS )a  
        Creates a draft of a model card using the information available to the `Trainer`.

        Args:
            model_name (`str` or `None`, *optional*, defaults to `None`):
                Name of the model.
            dataset_name (`str` or `None`, *optional*, defaults to `None`):
                Name of the dataset used for training.
            tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
                Tags to be associated with the model card.
        Nr<   unsloth_versionunslothzIterative SFT)
base_modelrW   r   r   r   	wandb_url	comet_urltrainer_namez	README.md)is_world_process_zerorL   r!   r;   ospathisdirr<   setr9   r:   addr   rM   r   r   r   wandbrunurlr   savejoinr"   r   )rU   rW   r   r   r   
model_cardr8   r8   r[   r     s0    

z%IterativeSFTTrainer.create_model_card)NNNNr    NN)NNNNN)NNN))__name__
__module____qualname____doc__rM   r   r:   r   r   r   r   r   r   r   r   r
   r   r   tuplera   optim	OptimizerrR   LambdaLRr   r   r   rK   rD   rz   staticmethodr}   
LongTensorr   r   empty_device_cacher   r   r   r   __classcell__r8   r8   rY   r[   r   6   s    "
	^ 5


r   )*r   rB   pathlibr   typingr   r   r   ra   datasetsr   torch.utils.datar   transformersr   r	   r
   r   r   r   r   r   r   r   r   r   r   transformers.trainer_utilsr   transformers.utilsr   corer   iterative_sft_configr   utilsr   r   peftr   r   r   r8   r8   r8   r[   <module>   s$   <