o
    Ti                     @   s   d dl mZmZmZ d dlZd dlmZ ddlm	Z	 ddl
mZmZ ddlT ddlT ddlT ddlmZ dd	lmZmZ G d
d deZdS )    )IterableOptionalTupleN   )
empty_from)ActivationType	DtypeEnum)*)RaggedBatchWrapper   )MistralNonTransformerContainerMistralTransformerContainerc                   @   s  e Zd ZU dZee ed< 	 eee  ed< 	 	 e	de
fddZ	 e	de
fddZe	de
fd	d
Ze	de
fddZe	de
fddZe	de
fddZe	de
fddZe	de
fddZe	defddZe	defddZe	defddZe	defddZe	dee fddZ	 dedejfd d!Z d"e
d#ejd$ejd%ede!ejejf f
d&d'Z"d$ejd%edejfd(d)Z#d*edejfd+d,Z$d-S ).MistralInferenceModelzP
    Inference model implementation for ragged batching for Mistral models.
    _non_transformer_transformerreturnc                 C      | j jS N)_configmax_seq_lengthself r   n/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/inference/v2/model_implementations/mistral/model.pymax_sequence_length(      z)MistralInferenceModel.max_sequence_lengthc                 C   r   r   )r   num_hidden_layersr   r   r   r   
num_layers0   r   z MistralInferenceModel.num_layersc                 C   r   r   )r   hidden_sizer   r   r   r   	model_dim4   r   zMistralInferenceModel.model_dimc                 C   r   r   )r   
vocab_sizer   r   r   r   r    8   r   z MistralInferenceModel.vocab_sizec                 C   s   | j | j S r   )r   n_headsr   r   r   r   	head_size<   s   zMistralInferenceModel.head_sizec                 C   r   r   )r   num_attention_headsr   r   r   r   r!   @   r   zMistralInferenceModel.n_headsc                 C   r   r   )r   intermediate_sizer   r   r   r   intermediate_dimD   r   z&MistralInferenceModel.intermediate_dimc                 C   r   r   )r   num_key_value_headsr   r   r   r   
n_heads_kvH   r   z MistralInferenceModel.n_heads_kvc                 C   s0   | j jtjkr
tjS | j jtjkrtjS td)Nz Only fp16 and bf16 are supported)	r   torch_dtypetorchfloat16r   fp16bfloat16bf16NotImplementedErrorr   r   r   r   activation_dtypeL   s
   z&MistralInferenceModel.activation_dtypec                 C   sT   | j j }|dkrtjS |dkrtjS |dkrtjS |dkr"tjS td| d)NgelurelugegelusiluzActivation z not supported)r   
hidden_actlowerr   GEGLUReGLUSiGLUr.   )r   
activationr   r   r   mlp_activation_fnU   s   z'MistralInferenceModel.mlp_activation_fnc                 C      t jS r   )NormTypeEnumRMSNormr   r   r   r   	norm_typec      zMistralInferenceModel.norm_typec                 C   r;   r   )PositionalEmbeddingTyperotate_halfr   r   r   r   positional_embedding_typeg   r?   z/MistralInferenceModel.positional_embedding_typec                 C   s   t | jjdS )N)
theta_base)RotateHalfConfigr   
rope_thetar   r   r   r   positional_embedding_configk   s   z1MistralInferenceModel.positional_embedding_configragged_batchc                 C   s<   |  || jj}|jd | jkrtd|j d| j |S )z
        Performs the embedding lookup prior to running the transformer of the model.

        Arguments:
            ragged_batch (RaggedBatchWrapper): The batch to embed.

        Returns:
            torch.Tensor: The embedded batch.
        zEmbedding output shape z does not match model_dim )embedr   word_embshaper   
ValueError)r   rG   rI   r   r   r   _forward_embeds   s   
z$MistralInferenceModel._forward_embed	layer_idxresidualhidden_statesragged_batch_infoc                 C   s  | j | }| j|}| j||jdd}| |||}| j||jdd}| jdkr1t	j
|| jd | j|||jdd\}}| j||jdd}| j||jdd}| jdkr\t	j
|| jd || jd krz| j |d  }| j|||jdd\}}||fS || ||fS )aL  
        Executes one (slightly offset) layer of the transformer. This implementation does a peak-ahead
        optimization to fuse the layer norm of the next layer into the current layer.

        Arguments:
            layer_idx (int): The index of the layer to execute.
            residual (torch.Tensor): The residual tensor from the previous layer.
            hidden_states (torch.Tensor): The hidden states from the previous layer. This is the
                hidden states after pre normalization.
            ragged_batch_info (RaggedBatchWrapper): The batch metadata.
        N)br   groupbeta)r   state_manager	get_cacheqkvqkv_wattnattn_out
attn_out_wtp_sizedist
all_reduce_base_mp_groupnormmlp_norm_gammamlp_1mlp_1_wmlp_2mlp_2_wr   attn_norm_gammaadd_)r   rN   rO   rP   rQ   
cur_paramskv_cachenext_paramsr   r   r   _forward_transformer   s$   



z*MistralInferenceModel._forward_transformerc                 C   s   | j || jj|| jjd}| jdkrKt| j| j|jd |jd f}t| j|jd | j	f}t
j||| jd ||ddd|jd | j	 |S |S )z
        Performs unembedding of the hidden states to logits. This will only sample the final
        token of each sequence.
        )gammar   r   rS      )unembedr   word_unembed
final_normr^   r   _comm_logitsrK   _return_logitsr    r_   all_gather_into_tensorra   copy_permutereshape)r   rP   rQ   logitscomm_bufferfull_logitsr   r   r   _forward_unembed   s   
 $z&MistralInferenceModel._forward_unembedwrapped_batchc                 C   sX   |  |}| j|d | jd jd d\}}t| jD ]}| ||||\}}q| ||S )Nr   rU   )rM   rb   r   rh   ranger   rm   r|   )r   r}   rO   rP   rN   r   r   r   forward   s
   
zMistralInferenceModel.forwardN)%__name__
__module____qualname____doc__r   r   __annotations__r   r   propertyintr   r   r   r    r"   r!   r%   r'   r   r/   r   r:   r<   r>   r@   rB   rD   rF   r
   r)   TensorrM   r   rm   r|   r   r   r   r   r   r      sV   
 
,r   )typingr   r   r   r)   deepspeed.commcommr_   	allocatorr   inference_utilsr   r   model_implementationsmodules.configsmodules.interfacesraggedr
   	containerr   r   DSTransformerModelBaser   r   r   r   r   <module>   s   