o
    ॵi                     @   s   d dl Z d dlmZ d dlmZmZmZmZ d dlZd dlm	Z	 d dl
mZ d dlmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ ejejdG dd deZdS )    N)deepcopy)AnyDictListUnion)nn)Trainers)Model
TorchModel)GPT3ForTextGeneration)TRAINERS)NlpEpochBasedTrainer)build_parallel)Config)is_megatron_initialized)module_namec                       s   e Zd Zdef fddZdeejef fddZ	dd Z
d	d
 Zdedeeef deeef fddZdedeeef deeef fddZdefddZ  ZS )GPT3Trainercfgc                    s&   t  |}ttjdd|j_|S )NRANKr   )superrebuild_configintosenvirongetmodelrank)selfr   	__class__ X/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/trainers/nlp/gpt3_trainer.pyr      s   zGPT3Trainer.rebuild_configreturnc                 C   s   | j dd d ur!t| j d }|t|tj gd t|S td|dtj gd}t	 rCddl
m} |tj | d t|S )	Nparallel)module
device_idsDistributedDataParallelT)typer$   find_unused_parametersr%   r   )mpu)output_deviceprocess_group)r   r   r   updatedicttorchcudacurrent_devicer   r   megatron_utilr)   get_data_parallel_group)r   r   dp_cfgr)   r    r    r!   to_parallel   s&   
zGPT3Trainer.to_parallelc                 C   s   | j j}|| S N)eval_preprocessor	tokenizer
detokenizetolist)r   tokensr7   r    r    r!   _decode3   s   zGPT3Trainer._decodec                 C   s<   | j r| jjn| j}|  d|v r| ||S | ||S )N
inputs_len)_distr   r$   eval_generate_eval_forward_eval)r   datar   r    r    r!   evaluation_step7   s
   zGPT3Trainer.evaluation_stepr   rA   c                    sd   |j ddd ||}|d } fddt|d |D |d<  fd	dt|d
 |D |d< |S )N   g        )top_ktop_pprompts_lenc                    s"   g | ]\}}  ||d  qS r5   r;   .0seqskip_lenr   r    r!   
<listcomp>G   s    z.GPT3Trainer._generate_eval.<locals>.<listcomp>	sequencespredsc                    s&   g | ]\}}  ||d  d qS )rC   NrG   rH   rL   r    r!   rM   K   s    labelstgts)r,   generatezip)r   r   rA   resultrF   r    rL   r!   r?   @   s   




zGPT3Trainer._generate_evalc                 C   s
   | |S r5   )forward)r   r   rA   r    r    r!   r@   Q   s   
zGPT3Trainer._forward_evalc                 C   s   t j| j| j| jjdS )N)cfg_dictmegatron_cfg)r	   from_pretrained	model_dirr   megatronrL   r    r    r!   build_modelU   s   zGPT3Trainer.build_model)__name__
__module____qualname__r   r   r   r   Moduler
   r4   r;   rB   r   r   strr   r?   r@   r[   __classcell__r    r    r   r!   r      s     	





r   )r   copyr   typingr   r   r   r   r.   r   modelscope.metainfor   modelscope.models.baser	   r
   modelscope.models.nlpr   modelscope.trainers.builderr   modelscope.trainers.nlp_trainerr   $modelscope.trainers.parallel.builderr   modelscope.utils.configr   modelscope.utils.megatron_utilsr   register_modulegpt3_trainerr   r    r    r    r!   <module>   s   