o
    ॵi%                     @   sJ  d dl Z d dlZd dlmZmZmZmZmZ d dlZd dlm	Z
 d dlmZ d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlm Z m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z, ddl-m.Z.m/Z/m0Z0 dd Z1dd Z2ej3ej4dG dd deZ5dS )    N)CallableDictOptionalTupleUnion)distributed)nn)Dataset)Trainers)Model
TorchModel)convert_models_to_fp32)	MsDataset)Preprocessor)CLIPPreprocessor)EpochBasedTrainer)TRAINERS)	merge_cfg
update_cfg)build_optimizer)Config)DEFAULT_MODEL_REVISION
ConfigKeysInvokeModeKeys	ModelFile
ThirdParty   )get_lossget_optimizer_paramsget_schedulec                 C   s    d| v pd| v pd| v pd| v S )Nbnlnbiaslogit_scale nr%   r%   e/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/trainers/multi_modal/clip/clip_trainer.pyexclude   s    r)   c                 C   s
   t |  S )N)r)   r&   r%   r%   r(   include    s   
r*   )module_namec                       s   e Zd Zdddddddddedfdeeeeje	f  dee	 dee
 dee
 deee
ee	e
f f  d	eeeef  d
eeeef  deeeee	ef f  deejjejjjf dee	 def fddZdd Z  ZS )CLIPTrainerN)NN*   modelcfg_filecfg_modify_fnarg_parse_fndata_collatortrain_dataseteval_datasetpreprocessor
optimizersmodel_revisionseedc                    s  t |tr,|tjd }|d ur|tj | ||
|| _|d u r+tj	
| jtj}n|d us4J dtj	|| _t|| _|| _t| j | | j| _d|v r\| j|d  t| j| _| j}tj||
tjd}t| d|vs~t|d dkr|jj}n|d }|jj}t tj!dd}t"#t||jj$j%|  }||jj& |jj'_(|	d d u rt)|* }dd	 |D }d
d	 |D }t+||}|dd||d dg|d |d |d f|d d}t,||jj-|d}n|	d }|	d d u rt.||jj'}n|	d }||f}	t/0 }t/0 }|1t tj!dd| _2|1t tj!dd| _3|jj4| _4d|vrD|jdd rD|jj5|d< d|vrW|jddrW|jj6|d< |d u rzt7jt8|t9j:|j;|j<d dt7j=t8|t9j>|j;|j<d di}|j?| _@tA| j@dr| j@d dd}|t7j B| |t7j= B| | j@d dd}|t7j C| |t7j= C| |jj$j%| | _DtE jFd|||||||||	|d
| d S )Nz?Config file should not be None if model is not from pretrained!cfg_options)revision
invoked_bywork_dirr   
WORLD_SIZEr   c                 S   "   g | ]\}}t |r|jr|qS r%   )r)   requires_grad.0r'   pr%   r%   r(   
<listcomp>j       z(CLIPTrainer.__init__.<locals>.<listcomp>c                 S   r>   r%   )r*   r?   r@   r%   r%   r(   rC   n   rD   g        )paramsweight_decayrF   lrbeta1beta2eps)rE   rG   betasrJ   )cfgdefault_args
LOCAL_RANKlauncheruse_fp16Fimage_resolution)	model_dirmode	tokenizer
resolution
column_mapimgtext)
r.   r/   r0   r1   r2   r3   r4   r5   r6   r8   r%   )G
isinstancestrgetr   KEYpopget_or_download_model_dirrR   ospathjoinr   CONFIGURATIONdirnamer   	from_filerL   r0   r   rebuild_configmerge_from_dictr   r   from_pretrainedr   TRAINERr   lentrainr<   pretrained_model
model_nameintenvironmathceil
dataloaderbatch_size_per_gpu
max_epochslr_schedulernum_train_stepslistnamed_parametersr   r   	optimizerr    r   CrossEntropyLosscudaloss_imgloss_txtloss_cfgrO   rP   r   r   r   TRAINrT   
model_infovalEVALdatasetdataset_cfghasattrset_input_img_keyset_input_text_keyglobal_batch_sizesuper__init__)selfr.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   kwargsthird_partyrL   r<   rl   
world_sizeepoch_stepsrw   gain_or_bias_paramsrest_paramsoptimizer_hparamsoptimizer_argsrx   rt   r{   r|   img_key_nametext_key_name	__class__r%   r(   r   '   s   







zCLIPTrainer.__init__c                    s.  |   tj|d< ||}t|| j| j| j}d|i}d|vrdg}tg }|D ] |	 fdd|
 D  q)i }|D ],}	||	d }
|
d urjt rat ra|
j }
t|
t  |	|	|
 i q>t|d|}|jjj  |d< t| j|d< | j	| n| j	|d  || _d S )	NrS   losslog_varsc                    s   g | ]} |v r|qS r%   r%   )rA   keykey_pr%   r(   rC      s    z*CLIPTrainer.train_step.<locals>.<listcomp>moduler$   r   )rj   r   r~   forwardr   r{   r|   r}   setupdatekeysr[   distis_availableis_initializeddataclone
all_reducediv_get_world_sizeitemgetattr
clip_modelr$   rm   r   
log_buffertrain_outputs)r   r.   inputsmodel_outputsr   r   default_keys_pattern
match_keysr   r   valueunwrapped_modelr%   r   r(   
train_step   sF   


zCLIPTrainer.train_step)__name__
__module____qualname__r   r   r   r   r   ModulerZ   r   r   r   r	   r   r   torchoptim	Optimizerrt   _LRSchedulerrm   r   r   __classcell__r%   r%   r   r(   r,   $   sV    

	

 #r,   )6ro   r_   typingr   r   r   r   r   r   r   r   r   torch.utils.datar	   modelscope.metainfor
   modelscope.models.baser   r   (modelscope.models.multi_modal.clip.modelr    modelscope.msdatasets.ms_datasetr   modelscope.preprocessors.baser   $modelscope.preprocessors.multi_modalr   modelscope.trainersr   modelscope.trainers.builderr   "modelscope.trainers.default_configr   r   %modelscope.trainers.optimizer.builderr   modelscope.utils.configr   modelscope.utils.constantr   r   r   r   r   r   clip_trainer_utilsr   r   r    r)   r*   register_moduleclip_multi_modal_embeddingr,   r%   r%   r%   r(   <module>   s0    