o
    ߥi*                     @   s   d dl mZ d dlZd dlmZmZ d dlmZ d dlm	Z
 d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ ddlmZ ddlmZ e ZG dd deZdS )    )DictN)mpuprint_rank_0)FP16_Module)
functional)
TorchModel)Tensor)
get_logger)init_megatron_util)pre_load   )	PlugModel)PlugNLGConfigc                       sx   e Zd ZdZ fddZdddZedded	 fd
dZ									dddZ	dde
eef fddZ  ZS )DistributedPlugaF  
    The wapper class of PLUG Model to initialize parallel environment, load model weights, generate sentences.
    Parameters:
        model_dir (`str`, *required*):
            Path to model damo/nlp_plug_text-generation_27B.
        The model structure in model_dir should be like this:
        model_dir
            |_ config.json
            |_ configuration.json
            |_ ds_zero-offload_10B_config.json
            |_ vocab.txt
            |_ model <-- an empty directory

        Model binaries shall be downloaded separately to populate the model directory, so that
        the model directory would contain the following binaries:
            |_ model
                |_ mp_rank_00_model_states.pt
                |_ mp_rank_01_model_states.pt
                |_ mp_rank_02_model_states.pt
                |_ mp_rank_03_model_states.pt
                |_ mp_rank_04_model_states.pt
                |_ mp_rank_05_model_states.pt
                |_ mp_rank_06_model_states.pt
                |_ mp_rank_07_model_states.pt
        rank (`int`, *required*):
            Used to identify different GPUs in a tensor parallel environment. eg. The rank of GPU #0 is 0, and the
            model file `mp_rank_00_model_states.pt` will be loaded on this GPU.
        world_size (`int`, *required*, defaults to 8):
            The parallel size in total.
        model_parallel_size (`int`, *required*, defaults to 8):
            The parallel size of model(tensor parallel).
        master_ip (`str`, *required*):
            The master IP, can usually be set to `"127.0.0.1"`, used as part of
            [`~torch.distributed.init_process_group`] method parameter `init_method`.
            `init_method` = `"tcp://{master_ip}:{master_port}"`
        master_port (`str`, *required*):
            The master port, can usually be set to `"29500"`, used as part of
            [`~torch.distributed.init_process_group`] method parameter `init_method`.
            `init_method` = `"tcp://{master_ip}:{master_port}"`
        seed (`int`, *optional*, defaults to 42):
            Random seed to control sampling.
    c                    sP   t  j|fi | || _|| _t|| _t||d d| _| j	dd| _
d S )N)	model_dirrankr   model)path_load_tag)super__init__r   	model_cfgr   from_pretrainedconfigr
   	iterationinitialize_modelr   )selfr   r   kwargs	__class__ _/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/nlp/plug/distributed_plug.pyr   @   s   zDistributedPlug.__init__r   c              	   C   sf  t d t| j}t dkr#tdt t	dd |
 D  | jjr/| jjr/|  |tj  | jjrt|}| jjr^|jjjjj  |jjjjj  |jjjjj  | jjrk|jjjjj  | jjr| D ]\}}d|v r|  qstt | j|d}|jj  }|D ]}||! vrt d|  qt d	|  q|jjj"|d
d |S )zBuild the model.z3Building Plug model. It will take a few minutes ...r   z5 > number of parameters on model parallel rank {}: {}c                 S   s   g | ]}|  qS r   )nelement).0pr   r   r    
<listcomp>T   s    z4DistributedPlug.initialize_model.<locals>.<listcomp>	LayerNorm)tagz
Skip key: zLoading key: F)strict)#r   r   r   r   get_data_parallel_rankloggerinfoformatget_tensor_model_parallel_ranksum
parameters	deepspeedfp16halfcudatorchcurrent_devicer   fp32_embeddingmoduler   bert
embeddingsword_embeddingsfloatposition_embeddingstoken_type_embeddingsfp32_tokentypesfp32_layernormnamed_modulesr   r   
state_dictkeysload_state_dict)r   r   r   name_module
load_model
model_dictkeyr   r   r    r   K   sJ   
z DistributedPlug.initialize_modelr           Infc           	      C   s   |dkr| t | |d d k }|| |< |dkr`| |  d  } t j| dd\}}t jtj|dddd}||k}|d	d df 	 |d	dd f< d|d
< || }|| |< | dd } | S )Nr   ).NrH   r   T)
descendingrJ   dim.).r   )
r3   topkviewsize
contiguoussortcumsumFsoftmaxclone)	logitstop_ktop_pfilter_valueindices_to_removesorted_logitssorted_indicescumulative_probssorted_indices_to_remover   r   r    top_k_logitsy   s(   
zDistributedPlug.top_k_logitsNFTc                 C   s   | j |||||||||	|
d
S )N)checkpoint_activationsis_infersequence_outputparallel_outputr   )r   input_tokenstoken_type_idsattention_masktarget_tokensposition_idsdecode_attention_maskra   rb   rc   rd   r   r   r    forward   s   zDistributedPlug.forward   inputc                 G   s  t j }|d jd }|d dd |}|d |}|d |}| j  t 	 ( g }	g }
d}d }| j
j}d}||k r0|d dkr|dkr|
| ||kjd	d
d }|t|
 dkr{t |d | t j|
gddd  }nt j|
|d ||t|
 < |dk}|d |}g }
d }t j|dgt|
t j|d}| j|d ||||d	|dd	\}}}|d d dd d f }|| jd  }| j|| jd | jd d}tj|dd}t j|dd}|d  }||krd}d|d< |dkrt|	ttd|d krn%|dkr|d7 }q@t j||gdd}|
| |	| |d7 }||k sEg }|	D ]}|rG|d dkrG|dkrGq4|| q4d|iW  d    S 1 s]w   Y  d S )N	input_idsr   r   rJ   dec_input_idsrh   f   rm   T)as_tuplei   i )dtypedeviceF)rb   rc   rd   temperaturerX   rY   )rX   rY   rL   )num_samplesd   g?generate_context)r3   r2   r4   shaperO   rQ   tor   evalno_gradr   original_vocab_sizeappendnonzerolencat
LongTensorfulllongr   r`   rT   rU   multinomialitemintmax)r   rn   
out_lengthr   rt   
batch_sizetokensrp   rh   all_generate_tokensgenerate_tokenscounterrc   
vocab_sizesep_token_idxstartrj   _rW   	log_probsprev
prev_tokenrx   tokenr   r   r    generate   s   










:
&zDistributedPlug.generatere   )	NNNNNFFNT)rm   )__name__
__module____qualname____doc__r   r   staticmethodr:   r`   rl   r   strr   r   __classcell__r   r   r   r    r      s"    +
. 
 r   )typingr   r3   megatron_utilr   r   megatron_util.fp16r   torch.nnr   rT   modelscope.modelsr   modelscope.models.baser   modelscope.utils.loggerr	   modelscope.utils.megatron_utilsr
   $modelscope.utils.nlp.load_checkpointr    r   configurationr   r)   r   r   r   r   r    <module>   s   