o
    ॵi                     @   s   d dl Z d dlmZmZ d dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZmZ d dlmZ d dlmZ d d	lmZ d
dlmZ d
dlmZ eje	jdG dd deZeje	jdG dd deZdS )    N)TupleUnion)nn)Trainersbuild_metric)Model
TorchModel)Preprocessor)Config)ModeKeys   )TRAINERS)EpochBasedTrainer)module_namec                       sd   e Zd ZdZ fddZdd ZdefddZd	ee	j
ef fd
dZd	eeef fddZ  ZS )NlpEpochBasedTrainera  Add code to adapt with nlp models.

    This trainer will accept the information of labels&text keys in the cfg, and then initialize
    the nlp models/preprocessors with this information.

    Labels&text key information may be carried in the cfg like this:

    >>> cfg = {
    >>>     ...
    >>>     "dataset": {
    >>>         "train": {
    >>>             "first_sequence": "text1",
    >>>             "second_sequence": "text2",
    >>>             "label": "label",
    >>>             "labels": [1, 2, 3, 4],
    >>>         },
    >>>         "val": {
    >>>             "first_sequence": "text3",
    >>>             "second_sequence": "text4",
    >>>             "label": "label2",
    >>>         },
    >>>     }
    >>> }

    To view some actual finetune examples, please check the test files listed below:
    tests/trainers/test_finetune_sequence_classification.py
    tests/trainers/test_finetune_token_classification.py
    c                    s4   d | _ d | _d | _d | _d | _t j|i | d S N)label2idid2label
num_labels
train_keys	eval_keyssuper__init__)selfargskwargs	__class__ S/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/trainers/nlp_trainer.pyr   2   s   zNlpEpochBasedTrainer.__init__c                 C   s   z |j jj}dd t|D | _dd t|D | _t|| _W n	 ty)   Y nw dd }||	d| _
||	d| _t| jdkrK| j
| _d S d S )	Nc                 S   s   i | ]\}}||qS r   r   .0idxlabelr   r   r    
<dictcomp>=       z7NlpEpochBasedTrainer.prepare_labels.<locals>.<dictcomp>c                 S   s   i | ]\}}||qS r   r   r!   r   r   r    r%   >   r&   c                 S   sD   | d urt | dd t | dd t | dd d}ni }dd | D S )Nfirst_sequencesecond_sequencer$   )r'   r(   r$   c                 S   s   i | ]\}}|d ur||qS r   r   )r"   kvr   r   r    r%   M   s    zSNlpEpochBasedTrainer.prepare_labels.<locals>.build_dataset_keys.<locals>.<dictcomp>)getattritems)cfg
input_keysr   r   r    build_dataset_keysC   s   


z?NlpEpochBasedTrainer.prepare_labels.<locals>.build_dataset_keyszdataset.trainzdataset.valr   )datasettrainlabels	enumerater   r   lenr   AttributeErrorsafe_getr   r   )r   r-   r2   r/   r   r   r    prepare_labels:   s   
z#NlpEpochBasedTrainer.prepare_labelsr-   c                 C   sf   | j d ur
|  |}| | t|jds1t|jds1| jd ur&| j|jd< | jd ur1| j|jd< |S )Nr   r   )cfg_modify_fnr7   hasattrmodelr   r   )r   r-   r   r   r    rebuild_configT   s   




z#NlpEpochBasedTrainer.rebuild_configreturnc                 C   sb   | j du ri nd| j i}tj| jfd| ji|}t|tjs't|dr'|j	S t|tjr/|S dS )z Instantiate a pytorch model and return.

        By default, we will create a model using config from configuration file. You can
        override this method in a subclass.

        Nr   cfg_dictr:   )
r   r   from_pretrained	model_dirr-   
isinstancer   Moduler9   r:   )r   
model_argsr:   r   r   r    build_model`   s   z NlpEpochBasedTrainer.build_modelc                 C   s   | j du ri nd| j i}tj| jf| jtjd|| jtjdd}tj| jf| jtjd|| j	tjdd}||fS )zBuild the preprocessor.

        User can override this method to implement custom logits.

        Returns: The preprocessor instance.

        Nr   )r=   preprocessor_modeT)modeuse_fast)
r   r
   r>   r?   r-   r   TRAINr   EVALr   )r   
extra_argstrain_preprocessoreval_preprocessorr   r   r    build_preprocessorq   s<   


z'NlpEpochBasedTrainer.build_preprocessor)__name__
__module____qualname____doc__r   r7   r   r;   r   r   rA   r	   rC   r   r
   rL   __classcell__r   r   r   r    r      s    r   c                   @   s   e Zd ZdddZdS )VecoTrainerNc                    s  ddl m} |durddlm} |||  | j  tj| _	i }| j
du r2| j| j| j	| jd| _
d}d}t| j
|rH| j
| t| j
j}	 | j| j
fi | jjdi | _| j| _d	d
 | jD }|D ]}| |_qh| | j| t|D ]#\}	}
d| d|vri |d| d< |
 |d| d | j|	 < qy|d7 }||k r| j
| nnqI| jD ]$fdd
| D }|d  D ] t fdd
|D | < qq|S )z1Veco evaluates the datasets one by one.

        r   )VecoDatasetN)LoadCheckpointHook)	model_cfgrE   preprocessorr   T
dataloaderc                 S   s   g | ]}t |qS r   r   r"   metricr   r   r    
<listcomp>       z(VecoTrainer.evaluate.<locals>.<listcomp>zeval_dataset[]c                       g | ]}|  qS r   r   )r"   m)metric_namer   r    rZ      r[   c                    r]   r   r   rX   )keyr   r    rZ      r[   ) 1modelscope.msdatasets.dataset_cls.custom_datasetsrS   modelscope.trainers.hooksrT   load_checkpointr:   evalr   rH   _modeeval_datasetbuild_dataset_from_cfgr-   rK   r@   switch_datasetr4   datasets_build_dataloader_with_dataset
evaluationgeteval_dataloaderdata_loadermetricstrainerevaluation_loopr3   evaluatevalueskeysnpaverage)r   checkpoint_pathrS   rT   metric_valuesr#   dataset_cntmetric_classesr^   m_idx
metric_clsall_metricsr   )r`   r_   r    rr      sb   



zVecoTrainer.evaluater   )rM   rN   rO   rr   r   r   r   r    rR      s    rR   )ostypingr   r   numpyru   torchr   modelscope.metainfor   modelscope.metrics.builderr   modelscope.models.baser   r	   modelscope.preprocessorsr
   modelscope.utils.configr   modelscope.utils.constantr   baser   rp   r   register_modulenlp_base_trainerr   nlp_veco_trainerrR   r   r   r   r    <module>   s    ~