o
    ॵi                     @   s   d dl Z d dlmZ d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ ddlmZ ddlmZ eje
jdG dd deZdS )    N)Union)DeepSpeedEngine)mpu)nn)Trainers)
TorchModel)DistributedPlug)BertLayerNorm)TextGenerator)ModeKeys   )TRAINERS)NlpEpochBasedTrainer)module_namec                   @   sd   e Zd Zdeejef fddZdeejef fddZdd Z	dd	 Z
d
d Zdd Zdd ZdS )PlugTrainerreturnc                 C   sb   t tjdd}tjdd}tjdd}t| j|f||d| jj}| j| |j_|jS )N
LOCAL_RANKMASTER_ADDRz	127.0.0.1MASTER_PORT29500)	master_ipmaster_port)	intosenvirongetr   	model_dircfgmodelunwrap_module)selfrankr   r   r    r#   X/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/trainers/nlp/plug_trainer.pybuild_model   s   zPlugTrainer.build_modelc                 C   s   ddl m} ||S )Nr   )DistributedDataParallel) modelscope.utils.nlp.distributedr&   )r!   r   DDPr#   r#   r$   to_parallel#   s   zPlugTrainer.to_parallelc                 C   s   dg i}g dd}|  D ]?}t|ttjjfr*|d dd t|j	 D  q|d dd t|j
 D  |d dd t|j
 D  q||fS )Nparams        )r*   weight_decayc                 S   s   g | ]}|d ur|qS )Nr#   ).0pr#   r#   r$   
<listcomp>-   s
    zIPlugTrainer._get_params_for_weight_decay_optimization.<locals>.<listcomp>c                 S   s4   g | ]\}}|d urd|vrd|vr|dkr|qS )N
mask_scoremaskbiasr#   r-   nr.   r#   r#   r$   r/   2   s    c                 S   s$   g | ]\}}|d ur|dkr|qS )Nr2   r#   r3   r#   r#   r$   r/   7   s
    )modules
isinstancer	   torchr   	LayerNormextendlist_parametersvaluesitems)r!   moduleweight_decay_paramsno_weight_decay_paramsmodule_r#   r#   r$   )_get_params_for_weight_decay_optimization'   s   


z5PlugTrainer._get_params_for_weight_decay_optimizationc                 C   sV  | j \}}| jjdd }|d ur|di }ddlm} | j}|jjj	j
}|jjj	jj}|jjjj}	g }
|
t| |7 }
|
t| |7 }
|
t| |	7 }
|
D ]}|d D ]
}t|dsbd|_qXqR||
|j|jd}| jjd	d }|d ur|d usJ |di }dd
lm} | j}|||j|j| ||jdd}|| _|| _| j| j||fS )N	optimizeroptionsr   )DeepSpeedCPUAdamr*   model_parallelF)lrr,   lr_scheduler)AnnealingLRr   )start_lrwarmup_iter	num_itersdecay_style	last_iter)
optimizersr   trainr   popdeepspeed.ops.adamrE   r   r>   bert
embeddingsencoderlayerdecoderr:   rB   hasattrrF   rG   r,   &modelscope.models.nlp.plug.AnnealingLRrI   	max_iterswarmuprM   rC   rH   )r!   rC   rH   optimizer_cfgoptim_optionsrE   r   rT   layers
dec_layersparam_groupsparam_groupparamlr_scheduler_cfg
lr_optionsrI   rL   r#   r#   r$   create_optimizer_and_scheduler>   s^   

z*PlugTrainer.create_optimizer_and_schedulerc           	      C   s   |  \}}d}ttj|||f|jd|d||}tj|  tj|jd}d|||k< tj|tj|jd}|	d
|}|||fS )N   )device)dtyperg   r+   r   )sizer7   trilonesrg   viewfloatarangelong	unsqueeze	expand_as)	r!   data	eod_token
batch_size
seq_lengthatt_mask_batchattention_mask	loss_maskposition_idsr#   r#   r$   _get_masks_and_position_idsn   s(   

z'PlugTrainer._get_masks_and_position_idsc              	   C   s   t j| _t| jjdd}|d d d d df  }|d d d dd f  }| |d\}}}t| jjdd r=| }||d d |d	 ||||d
\}	}
t	
|
  |}|d}t|d| |  }d|i| _| j| j d S )Ncheckpoint_activationsTlabelsr   rf   r   fp16	input_idsrw   )r{   loss)r   TRAIN_modegetattrr   rP   
contiguousrz   halfr   vocab_parallel_cross_entropyrm   rl   r7   sumtrain_outputs
log_bufferupdate)r!   r   inputsr{   
tgt_tokens
tgt_labelstgt_attention_maskdec_loss_maskry   _outputlossesr   r#   r#   r$   
train_step   s6   

	

zPlugTrainer.train_stepc                 C   sr  t | jtr| jj}n| j}|  | | jjj}|d jd }t	|| j
jd }t  |d  }|d  }|d  }|d d dd f  }	|d |g}
||
}|d }|	   }g |d< g |d< t|D ]8}|| d }d	|||d k< |   }| j
j|| d
d}| j
j|d
d}|d | |d | qnW d    |S 1 sw   Y  |S )Nr~   r   rw   r|   rf   predictionspredstgtsd   T)skip_special_tokens)r6   r   r   r>   evalr    configoriginal_vocab_sizeshaper
   eval_preprocessornlp_tokenizerr7   no_gradro   byter   translate_batchcpunumpytolistrangedecodeappend)r!   rr   r   
vocab_sizert   beam_generatortokenspadding_mask
target_idstarget_labelsencoder_inputsresult	pred_listtarget_listipred_idsgold_stringpred_stringr#   r#   r$   evaluation_step   sL   




zPlugTrainer.evaluation_stepN)__name__
__module____qualname__r   r   Moduler   r%   r)   rB   re   rz   r   r   r#   r#   r#   r$   r      s    0r   )r   typingr   r7   	deepspeedr   megatron_utilr   r   modelscope.metainfor   modelscope.models.baser   modelscope.models.nlp.plugr   #modelscope.models.nlp.plug.backboner	   $modelscope.models.nlp.plug.generatorr
   modelscope.utils.constantr   baser   nlp_trainerr   register_modulenlp_plug_trainerr   r#   r#   r#   r$   <module>   s     