o
    Ti^                     @   s   d dl mZmZmZ d dlZd dlmZ ddlm	Z	 ddl
mZmZ ddlT ddlT ddlmZ dd	lmZmZ dd
lmZ G dd deZdS )    )IterableOptionalTupleN   )
empty_from)ActivationType	DtypeEnum)*)RaggedBatchWrapper   )OPTNonTransformerContainerOPTTransformerContainer)instantiate_embedc                   @   s  e Zd ZU dZee ed< 	 eee  ed< 	 	 e	de
fddZ	 e	de
fddZe	de
fd	d
Ze	de
fddZe	de
fddZe	de
fddZe	de
fddZe	defddZe	defddZe	defddZe	defddZe	dee fddZ	 d.ddZ	 d edejfd!d"Z d#e
d$ejd%ejd&ede!ejejf f
d'd(Z"d%ejd&edejfd)d*Z#d+edejfd,d-Z$dS )/OPTInferenceModelzL
    Inference model implementation for ragged batching for OPT models.
    _non_transformer_transformerreturnc                 C      | j jS N)_configmax_seq_lengthself r   j/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/inference/v2/model_implementations/opt/model.pymax_sequence_length(      z%OPTInferenceModel.max_sequence_lengthc                 C   r   r   )r   num_hidden_layersr   r   r   r   
num_layers0   r   zOPTInferenceModel.num_layersc                 C   r   r   )r   hidden_sizer   r   r   r   	model_dim4   r   zOPTInferenceModel.model_dimc                 C   r   r   )r   
vocab_sizer   r   r   r   r!   8   r   zOPTInferenceModel.vocab_sizec                 C   s   | j | j S r   )r    n_headsr   r   r   r   	head_size<   s   zOPTInferenceModel.head_sizec                 C   r   r   )r   num_attention_headsr   r   r   r   r"   @   r   zOPTInferenceModel.n_headsc                 C   r   r   )r   ffn_dimr   r   r   r   intermediate_dimD   r   z"OPTInferenceModel.intermediate_dimc                 C   s0   | j jtjkr
tjS | j jtjkrtjS td)Nz Only fp16 and bf16 are supported)	r   torch_dtypetorchfloat16r   fp16bfloat16bf16NotImplementedErrorr   r   r   r   activation_dtypeH   s
   z"OPTInferenceModel.activation_dtypec                 C      t jS r   )r   RELUr   r   r   r   mlp_activation_fnQ      z#OPTInferenceModel.mlp_activation_fnc                 C   r/   r   )NormTypeEnum	LayerNormr   r   r   r   	norm_typeU   r2   zOPTInferenceModel.norm_typec                 C   r/   r   )PositionalEmbeddingTypenoner   r   r   r   positional_embedding_typeY   r2   z+OPTInferenceModel.positional_embedding_typec                 C   s   d S r   r   r   r   r   r   positional_embedding_config]   s   z-OPTInferenceModel.positional_embedding_configNc                 C   s.   t | jjj| j| jddd}t|| j| _dS )z
        Performs setup and creates embedding DSModule. Since OPT includes trained
        positional embeddings, we will override the base model implementation.
        T   )
max_tokensresidual_dtypeembedding_dimpositional_embeddingpositional_offsetN)DSEmbeddingsConfig_engine_configstate_managermax_ragged_batch_sizer.   r    r   embed)r   embed_configr   r   r   make_embedding_layere   s   
z&OPTInferenceModel.make_embedding_layerragged_batchc                 C   sB   |  || jj| jj}|jd | jkrtd|j d| j |S )NzEmbedding output shape z does not match model_dim )rD   r   word_embword_emb_posshaper    
ValueError)r   rG   rD   r   r   r   _forward_embedw   s   z OPTInferenceModel._forward_embed	layer_idxresidualhidden_statesragged_batch_infoc                 C   s  | j | }| j|}| j||j|jd}| |||}| j||j|j	d}| j
dkr3tj|| jd | j|||j|jd\}}| j||j|jd}| j||j|jd}| j
dkratj|| jd || jd kr| j |d  }| j|||j|jd\}}||fS || ||fS )N)br   groupbeta)r   rB   	get_cacheqkvqkv_wqkv_battnattn_out
attn_out_w
attn_out_btp_sizedist
all_reduce_base_mp_groupnormmlp_norm_gammamlp_norm_betamlp_1mlp_1_wmlp_1_bmlp_2mlp_2_wmlp_2_br   attn_norm_gammaattn_norm_betaadd_)r   rN   rO   rP   rQ   
cur_paramskv_cachenext_paramsr   r   r   _forward_transformer_layer~   s4   




	
z,OPTInferenceModel._forward_transformer_layerc                 C   s   | j || jj|| jj| jjd}| jdkrNt| j| j|jd |jd f}t| j	|jd | j
f}tj||| jd ||ddd|jd | j
 |S |S )N)gammarV   r   r   rS   r:   )unembedr   word_unembedfinal_norm_wfinal_norm_br_   r   _comm_logitsrK   _return_logitsr!   r`   all_gather_into_tensorrb   copy_permutereshape)r   rP   rQ   logitscomm_bufferfull_logitsr   r   r   _forward_unembed   s   
 $z"OPTInferenceModel._forward_unembedwrapped_batchc                 C   s`   |  |}| j|d | jd j| jd jd\}}t| jD ]}| ||||\}}q| ||S )Nr   rU   )	rM   rc   r   rl   rm   ranger   rr   r   )r   r   rO   rP   rN   r   r   r   forward   s   





zOPTInferenceModel.forward)r   N)%__name__
__module____qualname____doc__r   r   __annotations__r   r   propertyintr   r   r    r!   r#   r"   r&   r   r.   r   r1   r3   r5   r6   r8   RotateHalfConfigr9   rF   r
   r(   TensorrM   r   rr   r   r   r   r   r   r   r      sV   
 

'r   )typingr   r   r   r(   deepspeed.commcommr`   	allocatorr   inference_utilsr   r   model_implementationsmodules.configsraggedr
   	containerr   r   modules.heuristicsr   DSTransformerModelBaser   r   r   r   r   <module>   s   