o
    ॵi2                     @   sT  d dl Z d dlZd dlZd dlZd dlZd dlmZmZmZ d dl	Z
d dlZd dlmZ d dlmZ d dlmZ d dlmZmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZ d dlm Z  d dl!m"Z" d dl#m$Z$ d dl%m&Z&m'Z'm(Z(m)Z) e$ Z*dZ+dZ,dZ-dZ.dZ/dZ0dZ1dZ2dZ3dZ4dZ5ej6ej7dG dd deZ8dS )    N)CallableDictOptional)nn)optim)Trainers)Model
TorchModel)KWSDataLoader
KWSDataset)BaseTrainer)TRAINERS)update_conf)DEFAULT_MODEL_REVISION	ModelFile)	to_device)create_device)
get_logger)get_dist_infoget_local_rank	init_dist	is_masterbasetrain_easybasetrain_normalbasetrain_hardfinetune_easyfinetune_normalfinetune_hard
checkpointg?g333333?g333333?g      ?)module_namec                       s   e Zd ZdZeeeeee	fZ
ddedfdededee dee dee dee f fd	d
ZdejfddZdd Zdd Zdd Zdd Zdedeeef fddZdd Z  ZS )KWSFarfieldTrainerz
./work_dirNmodelwork_dircfg_filearg_parse_fnmodel_revisioncustom_confc                    s|  t |tr| ||| _|d u rtj| jtj}n|d us"J dtj	|| _t
 || |dd }|r=|| jj_| jjj| _|  | _|| _|dd d urYt|d  t \}	}
|
dk| _|dd}| jrtt }d| }t|| _| jjdkr| j| j d	|vrt| jjd	sJ d
| jjj| _n|d	 | _|dd | _|dd | _ | jd u r| jjj!| _| j d u r| jj"j#| _ | jjj$}|dd | _%| j%d u r|j&| _%t'| _(d|v r|d | _(|j)| _*|dd}|d | _+d|v rtj| j|d }t,-|| _nI| j+dkrTtj| jt. d| j+dd}t//|}t0|dkr>t12d|d  t,-|d | _nt0|dkrMt3d| dt4d| | jjj5j6}t78| j9 || _5t:; | _<d | _=tj| jd>| j?| _@g | _A| jBD ]$}tj| j|}tj| j| d}tC||||  | jAD| qtEF| jtG tEF| jtH tEF| jtI f| _Jd S )Nz?Config file should not be None if model is not from pretrained!num_synlauncher   devicegpuzcuda:cuda
max_epochsz1max_epochs is missing from the configuration filetrain_iters_per_epochval_iters_per_epochworkerssingle_rate
next_epoch	model_binr   _04dz*.pthz!Loading model from checkpoint: %sz$Failed to load checkpoint file like z. File not found!z7Expecting one but multiple checkpoint files are found: z{}.log.jsonz.conf)K
isinstancestrget_or_download_model_dir	model_dirospathjoinr   CONFIGURATIONdirnamesuper__init__getcfgr!   r'   _num_classesbuild_modelr"   r   r   _distr   r   r*   typetohasattrtrainr-   _max_epochs_train_iters
_val_itersr.   
evaluationr/   
dataloader_threadsworkers_per_gpuBASETRAIN_RATIO_single_ratebatch_size_per_gpu_batch_size_current_epochtorchloadCKPT_PREFIXgloblenloggerinfoFileNotFoundErrorAssertionError	optimizerlrr   Adam
parametersr   CrossEntropyLossloss_fndata_valformat	timestampjson_log_path
conf_files	conf_keysr   appendmathfloor
EASY_RATIONORMAL_RATIO
HARD_RATIOstages)selfr!   r"   r#   r$   r%   r&   kwargsr'   r4   
world_sizedevice_name
local_rankdataloader_configr2   model_bin_fileckpt_file_pattern
ckpt_filesr`   conf_keytemplate_file	conf_file	__class__ b/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/trainers/audio/kws_farfield_trainer.pyr@   2   s   
	




















zKWSFarfieldTrainer.__init__returnc                 C   sB   t j| j| jdd}t|trt|dr|jS t|tj	r|S dS )z Instantiate a pytorch model and return.

        By default, we will create a model using config from configuration file. You can
        override this method in a subclass.

        T)cfg_dicttrainingr!   N)
r   from_pretrainedr9   rB   r6   r	   rH   r!   r   Module)rr   r!   r   r   r   rD      s   
zKWSFarfieldTrainer.build_modelc                 O   s   | j s|   td tj }d}t| jD ]\}}||7 }|| j }| 	|| qtj | }td
| d  d S )NzStart training...r   zTotal time spent: {:.2f} hours
      @)re   gen_valr[   r\   datetimenow	enumeraterq   rU   	run_stagerf   total_seconds)rr   argsrs   	totaltimenext_stage_head_epochstage	num_epochepochs_to_runr   r   r   rI      s   




zKWSFarfieldTrainer.trainc              
   C   s8  |dkrt d| d dS t d| d | | j|d  | j|d d  \}}t|}t|D ]}|  jd7  _tj	 }t d	| j d
}d}	t| j
D ]e}
t|\}}t|d}t|| j}t|| j}| j  | |}| t|d| jf|}t| s|  | j  || 7 }|	d7 }	d| j| j|
d | j
| }t | | | qR||	 }| d}d| j||}t | | | dt| j||}tj !| j"|}t d|  t#| j| tj	 | }t d| j|$ d  q4|%  |&  t d| d dS )z
        Run training stages with correspond data

        Args:
            stage: id of stage
            epochs_to_run: the number of epoch to run in this stage
        r   zInvalid epoch number, stage z exit!NzStarting stage z...   r)   zStart epoch %d...        r   z8Epoch: {:04d}/{:04d}, batch: {:04d}/{:04d}, loss: {:.4f} z<Evaluate epoch: {:04d}, loss_train: {:.4f}, loss_val: {:.4f}z/{}_{:04d}_loss_train_{:.4f}_loss_val_{:.4f}.pthzSave model to z%Epoch {:04d} time spent: {:.2f} hoursr   zStage z is finished.)'r[   warningr\   create_dataloaderri   iterrangerU   r   r   rK   nextrV   reshaper   r*   r_   	zero_gradr!   rd   rC   npisnanitembackwardsteprf   rJ   	_dump_logevaluaterX   r:   r;   r<   r"   saver   stoprelease)rr   r   r   datasetrN   itr4   	epochtimeloss_train_epochvalidbatchsbifeatlabelpredictlosstrain_resultloss_val_epoch
val_result	ckpt_name	save_pathr   r   r   r      sr   









zKWSFarfieldTrainer.run_stagec           	      C   s6  t j| jd}| jdkr4td t|d}t	|| _
W d   n1 s(w   Y  td dS td | | jd | jd	 \}}t|}g | _
t| jD ]}td
| t|\}}t|d}| j
||g qS|  |  t|d}t| j
| W d   n1 sw   Y  td dS )z)
        generate validation set
        zval_dataset.binr   zStart loading validation set...rbNzFinish loading validation set!z"Start generating validation set...r      zIterating validation data %dr   wbz!Finish generating validation set!)r:   r;   r<   r"   rU   r[   r\   openpicklerW   re   r   ri   r   r   rL   r   rV   r   rk   r   r   dump)	rr   val_dump_filefr   rN   r   r   r   r   r   r   r   r      s2   



zKWSFarfieldTrainer.gen_valc                 C   s8   t ||| j| j| j}t|| j| jd}|  ||fS )N)	batchsize
numworkers)r   rO   rR   rC   r
   rT   start)rr   	base_pathfinetune_pathr   rN   r   r   r   r     s   

z$KWSFarfieldTrainer.create_dataloadercheckpoint_pathc           	   	   O   s   t d d}t 4 | jD ](\}}t|| j}t|| j}| |}| t	|d| j
f|}|| 7 }qW d    n1 sBw   Y  t d || j S )NzStart validation...r   r   zFinish validation.)r[   r\   rV   no_gradre   r   r*   r!   rd   r   rC   r   rL   )	rr   r   r   rs   r   r   r   r   r   r   r   r   r   $  s   





zKWSFarfieldTrainer.evaluatec                 C   sP   t  r&t| jd}|| |d W d    d S 1 sw   Y  d S d S )Nza+
)r   r   rh   write)rr   msgr   r   r   r   r   6  s   
"zKWSFarfieldTrainer._dump_log)__name__
__module____qualname__DEFAULT_WORK_DIRBASETRAIN_CONF_EASYFINETUNE_CONF_EASYBASETRAIN_CONF_NORMALFINETUNE_CONF_NORMALBASETRAIN_CONF_HARDFINETUNE_CONF_HARDrj   r   r7   r   r   dictr@   r   r   rD   rI   r   r   r   r   floatr   r   __classcell__r   r   r~   r   r    +   s@    iC

r    )9r   rY   rl   r:   r   typingr   r   r   numpyr   rV   r   r   modelscope.metainfor   modelscope.modelsr   r	   7modelscope.msdatasets.dataset_cls.custom_datasets.audior
   r   modelscope.trainers.baser   modelscope.trainers.builderr   "modelscope.utils.audio.audio_utilsr   modelscope.utils.constantr   r   modelscope.utils.data_utilsr   modelscope.utils.devicer   modelscope.utils.loggerr   modelscope.utils.torch_utilsr   r   r   r   r[   r   r   r   r   r   r   rX   rn   ro   rp   rQ   register_modulespeech_dfsmn_kws_char_farfieldr    r   r   r   r   <module>   sF    