o
    ॵiY                     @   s`  d dl Z d dlZd dlZd dlZd dlmZ d dlZd dlmZ	 d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZ d d
lm Z m!Z! d dl"m#Z#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z,m-Z-m.Z. d dl/m0Z0 d dl1m2Z2 d dl3m4Z4m5Z5 e(j6ej7dG dd de&Z8G dd dZ9dS )    N)Dict)EasyDict)DistributedDataParallel)Trainers)	Evaluater)	inference)FeatureLoss)build_ddp_modelbuild_local_model)cosine_scheduler	ema_model)build_dataloaderbuild_dataset)BaseTrainer)TRAINERS)save_checkpoint)DEFAULT_MODEL_REVISION	ModelFile
ThirdParty)
get_logger)MeterBuffer)get_ranksynchronize)module_namec                       st   e Zd Zddddefdededededef
 fdd	Zd
d Zdd Z	ddede	ee
f fddZdd Z  ZS )ImageDetectionDamoyoloTrainerNTmodelcfg_fileload_pretrain
cache_pathmodel_revisionc           
         s   |dur+| tj}|dur|tj | |||| _|du r*tj| jt	j
| _n|dur3|dus7J d|durE|| _|durE|| _t | j | j}	tj| j|	jjj|	jj_|rwd|v rj|d |	j_ntj| j| jjj|	j_d| jv r| |	}	d|v r|d |	j_d|v r|d |	j_d|v r|d |	j_d|v r|d |	j_d	|v r|d	 |	j_d
|v r|d
 |	j_d|v r|d |	j_d|v r|d |	jj_d|v r|d |	j_ d|v r|d |	j!_"d|v r|d |	j!_#|	jj| _t$| j| _%|	| _dS )a   High-level finetune api for Damoyolo.

        Args:
            model: Model id of modelscope models.
            cfg_file: Path to configuration file.
            load_pretrain: Whether load pretrain model for finetune.
                if False, means training from scratch.
            cache_path: cache path of model files.
            model_revision: the git version of model on modelhub.
            gpu_ids: the id list of gpu.
            batch_size: total batch size.
            max_epochs: maximum number of training epoch.
            train_image_dir: the directory of training image.
            val_image_dir: the directory of validation image.
            train_ann: the path of train set annotation file.
            val_ann: the path of val set annotation file.
            num_classes: class number.
            base_lr_per_img: learning rate per image.
                The final learning rate is base_lr_per_img*batch_size.
            pretrain_model: the path of pretrained model.
            work_dir: the directory of work folder.
            exp_name: the name of experiment.
            third_party: in which third party library this function is called.
        Nz;cfg_file and cache_path is needed, if model is not providedpretrain_model	frameworkgpu_ids
batch_size
max_epochstrain_image_dirval_image_dir	train_annval_annnum_classesbase_lr_per_imgwork_direxp_name)&getr   KEYpopget_or_download_model_dirr   ospathjoinr   CONFIGURATIONr   super__init__cfgr   backbonestructure_filetrainfinetune_pathweights_config_transformr"   r#   total_epochsdatasetr%   r&   r'   r(   headr)   r*   miscs
output_dirr,   len
world_size)
selfr   r   r   r   r   argskwargsthird_partyr7   	__class__ k/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/trainers/cv/image_detection_damoyolo_trainer.pyr6   )   sr    






z&ImageDetectionDamoyoloTrainer.__init__c                 C   s8   t j| tjdd||d t|d d }|| d S )Nncclztcp://127.0.0.1:12344)init_methodrankrD   )torchcuda
set_devicedistinit_process_groupDamoyoloTrainerr:   )rE   
local_rankrD   r7   trainerrK   rK   rL   _train   s   z$ImageDetectionDamoyoloTrainer._trainc                 C   sR   t | jjjdkrtj| j| j| j| jfdd d S t| jd d }|jdd d S )N   T)nprocsrF   r3   r   rV   )	rC   r7   r:   r"   mpspawnrX   rD   rU   )rE   rW   rK   rK   rL   r:      s   

z#ImageDetectionDamoyoloTrainer.traincheckpoint_pathreturnc                 O   s(   |d ur	|| j j_t| j }|  d S N)r7   testr^   r   evaluate)rE   r^   rF   rG   	evaluaterrK   rK   rL   rb      s   

z&ImageDetectionDamoyoloTrainer.evaluatec                 C   s  t i }|jj|_|jjj|j_|jj|j_|j|_|j	|_	|j|_|j
|_|jj|j_|jj
|j_|jjj|j_|jjj|j_|jjj|j_t|jj|jjj |j_|jjj|jj |j_|jjj|j_|jjj|j_|jj|j_|d d= |d d= |d d= |d d= |S )Nr:   rA   lr_scheduler	optimizer
dataloader)easydictr:   rA   rf   workers_per_gpunum_workersr+   rB   r   r?   
evaluationra   preprocessoraugmentrd   warmup_start_lrmin_lr_ratiowarmup_epochsrC   r"   batch_size_per_gpur#   re   lrr*   momentumweight_decayr$   r>   )rE   config
new_configrK   rK   rL   r=      s6   




z/ImageDetectionDamoyoloTrainer._config_transformr`   )__name__
__module____qualname__r   strboolr6   rX   r:   r   floatrb   r=   __classcell__rK   rK   rI   rL   r   &   s4    [


	r   c                   @   sT   e Zd ZdddZdddZdd Zd	d
 Zdd ZdddZdddZ	dd Z
dS )rU   Nc                 C   s   || _ || _|| _|jj| _|jj| _d| _t| j jj	dkr#d| _
nd| _
t|jjd| _tj|jj|jj| _t dkrGtj| jdd ttj| jd| _| jd	| j | jd
| j  d S )NrQ   rY   TF)window_sizer   exist_okztrain_log.txtzargs info: {}zcfg value:
{})r7   tea_cfgrF   rA   rB   r,   devicerC   r:   r"   distributedr   print_interval_itersmeterr1   r2   r3   	file_namer   makedirsr   loggerinfoformat)rE   r7   rF   r   rK   rK   rL   r6      s    


zDamoyoloTrainer.__init__Fc                 C   s   t ||jj|jjd|jjjd}t ||jj|jjdd}t	
t|d |jj }t||jj|jj| j|jj|jjdd|d	}t||jj|jj|jjdd|d}|||fS )	NT)is_trainmosaic_mixupF)r   r       )r#   start_epochr>   ri   r   size_divr   )r#   ri   r   r   r   )r   r?   r%   r'   r:   rl   r   r&   r(   mathceilrC   r#   r   r   r>   rA   ri   ra   )rE   r7   r   train_datasetval_datasetiters_per_epochtrain_loader
val_loaderrK   rK   rL   get_data_loader   sN   

	zDamoyoloTrainer.get_data_loaderc	           	      C   sl   || _ || _|| _ || | _|| | _|| | _|| | _| j| j| j k| _|| | _|| | _|| _	d S r`   )
r   r>   
start_itertotal_iterswarmup_itersno_aug_itersno_augeval_interval_itersckpt_interval_itersr   )	rE   r   r   r>   ro   no_aug_epochseval_interval_epochsckpt_interval_epochsr   rK   rK   rL   setup_iters  s   






zDamoyoloTrainer.setup_itersc           	      C   sP  g g g }}}| j  D ]9\}}t|dr#t|jtjr#||j t|tjs-d|v r4||j	 qt|drFt|j	tjrF||j	 q| j
r| j D ]9\}}t|dret|jtjre||j t|tjsod|v rv||j	 qOt|drt|j	tjr||j	 qOtjj|d|dd}|||d |d|i || _| jS )	NbiasbnweightgMbP?T)rq   rr   nesterov)paramsrs   r   )r   named_moduleshasattr
isinstancer   nn	ParameterappendBatchNorm2dr   distillfeature_lossrP   optimSGDadd_param_groupre   )	rE   rr   rs   bn_groupweight_group
bias_groupkvre   rK   rK   rL   build_optimizer  sB   zDamoyoloTrainer.build_optimizerc              
      s`  t  j j _ jrtj j _ jd urdd _	d _
t  j j _ j  tjtj jd}d|v rC jj|d dd nd|v rQ jj|d dd t jjj jjjdd j _nd	 _	d  _
  jjj jjj _ jjjd ur jd
 jjj   j jjj d _ d _!n. jjj"d ur j# jjj"dd}| _ | _! jd$ j  nd _ d _! jd  jjj%rވ jd t& j jjj' _&nd  _& ( j j\ _) _*} +| j! jjj, jjj- jjj. jj/j0 jj/j1 jj/j2 t3 jjj4 jjj5 jjj6 j7 j8 j9 jjj: _;d jjj<v  _= jr;t> j| _n jd _ jd  j  t?? }t?? }t@ j)D ]\}\}}	}
 jA| } j;B|} jjCD ]}||d< qq| j} fdd|	D }	t?? } j	r j||	dd\}}|d }tD   j||	dd}W d    n	1 sw   Y  dtEF|tEjG tH j)  d d d }| || }||7 }||d< n
 ||	}|d } jI  |J   j
d urtjKjL jM  j
dd  jN   j&d ur j&O| j |}t?? }dd |P D } jQjOd3|| || |d | |d  j7 j8 kra j=ra jd!  j)jRSd	  jT _U jT _Vd	 _=|d  j2 dkr߈ j7|d  } jQd" jW| }d#$tXjYtZ|d$}d%$ j d  j,|d  jT  jT} jQ[d&}d'\d(d |P D } jQ[d)}d'\d*d |P D } jd+$||| jQd j]d,$|j^j_d |j^j_d- |   jQ`  |d  jV dkr jad. j d  |d/ |d  jU dkrt?bd0  c| jjdje  j  tf  |d  jT dkr% j d  _ qZ jad1|d2 d S )4NT   map_locationr   )strict
state_dictcwd)	distillerFzfinetune from r   )need_optimizerzResume Training from Epoch: {}zStart Training...z8Enable ema model! Ema model will be evaluated and saved.r   rQ   zTraining start...rq   c                    s   g | ]}|  jqS rK   )tor   ).0targetrE   rK   rL   
<listcomp>  s    z)DamoyoloTrainer.train.<locals>.<listcomp>)stu
total_loss)tearY      gdistill_loss)max_norm	norm_typec                 S   s   i | ]	\}}||  qS rK   )item)r   _name_vrK   rK   rL   
<dictcomp>  s    z)DamoyoloTrainer.train.<locals>.<dictcomp>)	iter_time
model_timerq   z--->turn OFF mosaic aug now!r   zETA: {})secondszepoch: {}/{}, iter: {}/{}lossz, c                 S      g | ]\}}d  ||jqS )z
{}: {:.1f}r   avgr   r   r   rK   rK   rL   r         timec                 S   r   )z{}: {:.3f}sr   r   rK   rK   rL   r     r   z{}, {}, {}, lr: {:.3e}z, size: ({:d}, {:d}), {}   zepoch_%d_ckpt.pthr[   g~jth?zlatest_ckpt.pth)	ckpt_namerV   rK   )gr
   r7   r   r   r   r   SyncBatchNormconvert_sync_batchnormr   r   	grad_clip	tea_modelevalrP   loadrF   tea_ckptload_state_dictr   neckout_channelsr   r   r   r:   rr   rs   re   r;   r   r   load_pretrain_detectorepochr   resume_pathresume_modelr   emar   ema_momentumr   r   r   r   r>   ro   r   rA   r   r   r   r   r*   r#   rn   r   r   r   rm   rd   rl   r   r	   r   	enumerater   get_lrparam_groupsno_gradr   cospirC   	zero_gradbackwardutilsclip_grad_norm_
parametersstepupdateitemsr   batch_sampler
set_mosaicr   r   r   
global_avgdatetime	timedeltaintget_filtered_meterr3   latesttensorsshapeclear_meters	save_ckptsleeprb   r?   r(   r   )rE   rV   r   resume_epochitersiter_start_timeiter_end_time	data_iterinpstargetsidscur_iterrq   param_groupmodel_start_timeoutputsfpn_outsr   fpn_outs_teadistill_weightr   outputs_array
left_iterseta_secondseta_strprogress_str
loss_meterloss_str
time_metertime_strrK   r   rL   r:   9  sZ  


















zDamoyoloTrainer.trainc                 C   s   |dkrL| j d ur| j j}nt| jtr| jj}n| j}tj| j|}| j	
d| d| jd i}| jr@|j| j d t||| j|dd d S d S )Nr   zSave weights to {}r   rY   )r   T)r   filenamere   meta	with_meta)r   r   r   DDPmoduler1   r2   r3   r   r   r   r   r   r   r   r   r   r   re   )rE   r   rV   update_best_ckpt
save_modelr!  rK   rK   rL   r    s&   



zDamoyoloTrainer.save_ckptc                 C   s   |}t j|| jd}d|v r| j|d  nd|v r#| j|d  |rOd|v r1| j|d  | jrOd|v rC| j|d d  nd|v rO| j|d  d|v r[|d d }|S d|v rc|d }|S )Nr   r   r   re   r!  r   r   )rP   r   r   r   r   re   r   r   )rE   r   load_optimizerckpt_file_pathckptr  rK   rK   rL   r     s,   
zDamoyoloTrainer.resume_modelc                 C   sv   | j d ur
| j j}n| j}t|tr|j}tj| j| j	d}|dkr*tj
|dd | jD ]}t||| j|d q-d S )Nr   r   Tr~   )r   output_folder)r   r   r   r#  r$  r1   r2   r3   rB   r,   r   r   r   r   )rE   rV   r(   	evalmodelr*  data_loader_valrK   rK   rL   rb   '  s$   



zDamoyoloTrainer.evaluater`   )F)rv   rw   rx   r6   r   r   r   r:   r  r   rb   rK   rK   rK   rL   rU      s    

(% 
C
rU   ):r   r   r1   r   typingr   rP   torch.distributedr   rS   torch.multiprocessingmultiprocessingr\   torch.nnr   rg   r   torch.nn.parallelr   r#  modelscope.metainfor   Cmodelscope.models.cv.tinynas_detection.damo.apis.detector_evaluaterr   Cmodelscope.models.cv.tinynas_detection.damo.apis.detector_inferencer   Kmodelscope.models.cv.tinynas_detection.damo.base_models.losses.distill_lossr   >modelscope.models.cv.tinynas_detection.damo.detectors.detectorr	   r
   1modelscope.models.cv.tinynas_detection.damo.utilsr   r   :modelscope.msdatasets.dataset_cls.custom_datasets.damoyolor   r   modelscope.trainers.baser   modelscope.trainers.builderr   modelscope.utils.checkpointr   modelscope.utils.constantr   r   r   modelscope.utils.loggerr   modelscope.utils.metricr   modelscope.utils.torch_utilsr   r   register_moduletinynas_damoyolor   rU   rK   rK   rK   rL   <module>   s:    