o
    ॵi7                     @   s   d dl mZ d dlZd dlmZmZ d dlZd dl	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ ejd	krGejjZe  e Zejd
dG dd deZ								dddZdddZdS )    N)DictOptional)snapshot_download)CsanmtForTranslation)BaseTrainer)TRAINERS)	ModelFile)
get_loggerz2.0zcsanmt-translation)module_namec                       sZ   e Zd Zddedef fddZdd Zdd	 Z	dd
ee deee	f fddZ
  ZS )CsanmtTranslationTrainerNmodelcfg_filec           	         sv  |  |}t  || _t|tj| _|d u rt|tj	}t
 | i | _|   tjdd}d|j_tj|d| _tjtjd d gdd| _tjtjd d gdd| _i | _tj | _t| jfi | j| _| j| j| jd}| j| tjjt | jd d	| _ | j! &}t"#d
| j  t$| j}tj%| j| |&t'  W d    d S 1 sw   Y  d S )NT)allow_soft_placement)configsource_wids)dtypeshapenametarget_wids)inputlabelkeep_checkpoint_max)max_to_keepzloading model from )(get_or_download_model_dirtfreset_default_graph	model_dirospjoinr   TF_CHECKPOINT_FOLDER
model_pathCONFIGURATIONsuper__init__params_override_params_from_fileConfigProtogpu_optionsallow_growthSession_sessionplaceholderint64r   r   outputtraincreate_global_stepglobal_stepr   r   updateSaverglobal_variablesmodel_saver
as_defaultloggerinfoget_pretrained_variables_mapinit_from_checkpointrunglobal_variables_initializer)	selfr   r   argskwargs	tf_configr-   sesspretrained_variables_map	__class__ f/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/trainers/nlp/csanmt_translation_trainer.pyr#      sJ   

"z!CsanmtTranslationTrainer.__init__c                 C   s  | j d d | jd< | j d d | jd< | j d d | jd< | j d d | jd< | j d d | jd< | j d d | jd< | j d d | jd< | j d d	 | jd	< | j d d
 | jd
< | j d d | jd< | j d d | jd< | j d d | jd< | j d d | jd< | j d d | jd< | j d d | jd< d| jd< d| jd< d| jd< | j d d | jd< | j d d | jd< | j d d d | jd< | j d d d | jd< | j d d | jd< | j d d | jd< | j d d  | jd < | j d d! | jd!< | j d d" | jd"< | j d d# | jd#< | j d d$ | jd$< | j d d% | jd%< | j d d& | jd&< | j d d' | jd'< | j d d( | jd(< | j d d) | jd)< | j d d | jd< | j d d* | jd*< | j d d+ | jd+< | j d d, | jd,< | j d d- | jd-< | j d d. | jd.< | j d d/ | jd/< | j d d0 | jd0< | j d d1 | jd1< | j d d2 | jd2< | j d3 d4 | jd4< | j d3 d5 | jd5< | j d3 d6 | jd6< | j d d7 | jd7< d S )8Nr   hidden_sizefilter_size	num_headsnum_encoder_layersnum_decoder_layerslayer_preproclayer_postproc$shared_embedding_and_softmax_weightsshared_source_target_embeddinginitializer_scaleposition_info_typemax_relative_disnum_semantic_encoder_layerssrc_vocab_sizetrg_vocab_sizeg        attention_dropoutresidual_dropoutrelu_dropoutdataset	train_src	train_trg	src_vocabfile	vocab_src	trg_vocab	vocab_trgr.   num_gpuswarmup_stepsupdate_cycler   
confidence	optimizer
adam_beta1
adam_beta2adam_epsilongradient_clip_normlearning_rate_decayinitializerlearning_ratetrain_batch_size_wordsscale_l1scale_l2train_max_lennum_of_epochssave_checkpoints_stepsnum_of_sampleseta
evaluation	beam_sizelp_ratemax_decoded_trg_lenseed)cfgr$   )r<   rD   rD   rE   r%   E   s   
















z3CsanmtTranslationTrainer._override_params_from_filec                 O   s  t d t| j| jd }t| j| jd }t| j| jd }t| j| jd }d}d}| j }		 |d7 }|| jd	 krDntj	d
t
|f  t||||| jd | jd | jd dkrg| jd ndd|	|d
}
|
\}}zQ	 |	||g\}}|d7 }| j|| j|i}| jj| j|d}|d }t d|| || jd  dkrtj	dt
|f  | jd }| jj|	|tj d qt tjjy   tj	d|  Y nw q8tj	dt
ttt f  W d    d S 1 sw   Y  d S )NzBegin csanmt trainingrY   rZ   r]   r_   r   T   rp   z%s: Epoch %irl   ro   r`   )batch_size_wordsmax_lenr`   is_trainsessionepoch)	feed_dictlossz Iteration: {}, step loss: {:.6f}rq   z%s: Saving model on step: %d.z
model.ckpt)r0   zepoch %d end!z'%s: NMT training completed at time: %s.)r6   r7   r   r   r   r$   r*   r5   r   logging__name__input_fnr:   r   r   r-   formatr4   saver.   get_global_steperrorsOutOfRangeErrortimeasctime	localtime)r<   r=   r>   rY   rZ   r]   r_   r   	iteration
tf_sessiontrain_input_fnfeatureslabelsfeatures_batchlabels_batchr   sess_outputs	loss_stepck_pathrD   rD   rE   r.      s   


0"zCsanmtTranslationTrainer.traincheckpoint_pathreturnc                 O   s   dS )a  evaluate a dataset

        evaluate a dataset via a specific model from the `checkpoint_path` path, if the `checkpoint_path`
        does not exist, read from the config file.

        Args:
            checkpoint_path (Optional[str], optional): the model path. Defaults to None.

        Returns:
            Dict[str, float]: the results about the evaluation
            Example:
            {"accuracy": 0.5091743119266054, "f1": 0.673780487804878}
        NrD   )r<   r   r=   r>   rD   rD   rE   evaluate   s   z!CsanmtTranslationTrainer.evaluateN)r   
__module____qualname__strr#   r%   r.   r   r   floatr   __classcell__rD   rD   rB   rE   r      s    +EA
r      d         rz   Tc                    s  t jjt jj|t jt jjjt jt jjjdddt jjt jj|t jt jjjt jt jjjdddt j	
| }t j	
|}t j	j||f}|jdd ddd}|jd	d ddd}|jfd
dddd}|	rfdd} fdd} fdd}|fdd}|t j	jj|||d}n|j| d gd gfd}t j	|}t t jj|j | \}}|	r|
|j |dkr|
t   ||fS )N)	key_dtype	key_indexvalue_dtypevalue_indexrz   )num_oov_bucketsc                 S   s   t | gt |gfS r   )r   string_splitsrctrgrD   rD   rE   <lambda>   s    zinput_fn.<locals>.<lambda>
   )num_parallel_callsi@B c                 S   s   | j |j fS r   )valuesr   rD   rD   rE   r     s    c                    s     |  |fS r   )lookupr   )r[   r^   rD   rE   r     s    c                    sL     d  }t t j| d| t j|d| }t jt |t jdS )Nrz   r   r   )r   maximumsizecastminimumr,   )src_datatrg_databucket_width	bucket_id)r|   num_bucketsrD   rE   key_func	  s   zinput_fn.<locals>.key_funcc                    s   |j  d gd gfdS )Npadded_shapes)padded_batch)
unused_keywindowed_data)r{   rD   rE   reduce_func  s   zinput_fn.<locals>.reduce_funcc                    s8    d  }| d7 }   | |  }t j|t jdS )Nrz   r   )r   r   r,   )keyr   r   )r{   r|   r   r`   rD   rE   window_size_func  s   z"input_fn.<locals>.window_size_funcc                    s$   t t j| d kt j|d kS )Nr   )r   logical_andr   r   )r|   rD   rE   r     s    )r   r   r   r   )r   r   StaticVocabularyTableTextFileInitializerstringTextFileIndex
WHOLE_LINEr,   LINE_NUMBERdataTextLineDatasetDatasetzipmapprefetchfilterapplyexperimentalgroup_by_windowr   make_initializable_iteratoradd_to_collection	GraphKeysTABLE_INITIALIZERSrj   get_nextr:   tables_initializer)src_filetrg_filesrc_vocab_filetrg_vocab_filer   r|   
batch_sizer{   r`   r}   r~   r   src_datasettrg_datasetsrc_trg_datasetr   r   r   iteratorr   r   rD   )r{   r|   r   r`   r[   r^   rE   r      s   
r   c                    s   t jt j| }|  d u r tfddt  D }nt fddt  D }g }ttt	dd t  t  }i }t j
ddd) |D ]\}}|| }	|	  }
|
| kri||	 |	||< qLW d    |S 1 suw   Y  |S )	Nc                    s6   g | ]}|j d d  v r|j |j d d fqS ):r   r   split).0var)saved_shapesrD   rE   
<listcomp>8  s
    z0get_pretrained_variables_map.<locals>.<listcomp>c                    sL   g | ]"  j d d v rt fddD r j  j d d fqS )r   r   c                 3   s    | ]}| j vV  qd S r   )r   )r   scoper   rD   rE   	<genexpr>>  s
    
z:get_pretrained_variables_map.<locals>.<listcomp>.<genexpr>)r   r   all)r   ignore_scoper   r   rE   r   <  s     c                 S   s   | j dd S )Nr   r   r   )xrD   rD   rE   r   D  s    z.get_pretrained_variables_map.<locals>.<lambda> T)reuse)r   r.   NewCheckpointReaderlatest_checkpointget_variable_to_shape_mapsortedr3   dictr   r   variable_scope	get_shapeas_listappend)checkpoint_file_pathr   reader	var_namesrestore_varsname2varrestore_mapvar_namesaved_var_namecurr_var	var_shaperD   r   rE   r8   3  s@   



r8   )r   r   r   r   rz   TNNr   )os.pathpathr   r   typingr   r   
tensorflowr    modelscope.hub.snapshot_downloadr   modelscope.models.nlpr   modelscope.trainers.baser   modelscope.trainers.builderr   modelscope.utils.constantr   modelscope.utils.loggerr	   __version__compatv1disable_eager_executionr6   register_moduler   r   r8   rD   rD   rD   rE   <module>   s6   

 K
U