o
    	۷i|+                     @   sT  d Z ddlmZmZ ddlZddlmZ ddlmZ ddlm	Z	 ddl
mZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZmZmZmZmZmZmZ ddlmZ e e!Z"dZ#dZ$G dd dej%Z&d%ddZ'G dd dej%Z(G dd deZ)G dd deZ*G dd deZ+G d d! d!eZ,G d"d# d#eZ-g d$Z.dS )&zPyTorch Phi-3 model.    )CallableOptionalN)nn   )ACT2FN)Cache)GenerationMixin)FlashAttentionKwargs)ALL_ATTENTION_FUNCTIONS)Unpack)logging)deprecate_kwarg   )MistralDecoderLayerMistralForCausalLM MistralForSequenceClassificationMistralForTokenClassificationMistralPreTrainedModeleager_attention_forwardrotate_half   )
Phi3Configz microsoft/Phi-3-mini-4k-instructr   c                       s2   e Zd Z fddZdejdejfddZ  ZS )Phi3MLPc                    sP   t    || _tj|jd|j dd| _tj|j|jdd| _t	|j
 | _d S )Nr   Fbias)super__init__configr   Linearhidden_sizeintermediate_sizegate_up_proj	down_projr   
hidden_actactivation_fn)selfr   	__class__ [/home/ubuntu/vllm_env/lib/python3.10/site-packages/transformers/models/phi3/modular_phi3.pyr   2   s
   
zPhi3MLP.__init__hidden_statesreturnc                 C   s4   |  |}|jddd\}}|| | }| |S )Nr   dim)r!   chunkr$   r"   )r%   r*   	up_statesgater(   r(   r)   forward:   s   

zPhi3MLP.forward)__name__
__module____qualname__r   torchFloatTensorr2   __classcell__r(   r(   r&   r)   r   1   s    r   c                 C   s   | |}| |}|jd }| dd|f | d|df }}|dd|f |d|df }	}
tj|| t||  |gdd}tj|	| t|	|  |
gdd}||fS )a  Applies Rotary Position Embedding to the query and key tensors.

    Args:
        q (`torch.Tensor`): The query tensor.
        k (`torch.Tensor`): The key tensor.
        cos (`torch.Tensor`): The cosine part of the rotary embedding.
        sin (`torch.Tensor`): The sine part of the rotary embedding.
        position_ids (`torch.Tensor`, *optional*):
            Deprecated and unused.
        unsqueeze_dim (`int`, *optional*, defaults to 1):
            The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
            sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
            that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
            k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
            cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
            the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
    Returns:
        `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
    r,   .Nr-   )	unsqueezeshaper6   catr   )qkcossinposition_idsunsqueeze_dim
rotary_dimq_rotq_passk_rotk_passq_embedk_embedr(   r(   r)   apply_rotary_pos_embC   s   


""""rI   c                       s   e Zd ZdZddedee f fddZeddd	d
		dde	j
dee	j
e	j
f dee	j
 dee dee	j dee dee	j
ee	j
 eee	j
  f fddZ  ZS )Phi3Attentionz=Multi-headed attention from 'Attention Is All You Need' paperNr   	layer_idxc                    s   t    || _|| _t|d|j|j | _|j|j | _	|j| _| jd | _
|j| _d| _|j| j d|j| j   }tj|j| j |jdd| _tj|j|dd| _d S )Nhead_dimg      Tr   Fr   )r   r   r   rK   getattrr   num_attention_headsrL   num_key_value_headsnum_key_value_groupsscalingattention_dropout	is_causalr   r   o_projqkv_proj)r%   r   rK   op_sizer&   r(   r)   r   f   s   
zPhi3Attention.__init__past_key_valuepast_key_values4.58new_nameversionr*   position_embeddingsattention_maskcache_positionkwargsr+   c                 K   s~  |j d d }g |d| jR }| |}	| jj| j }
|	dd |
f }|	d|
|
| j| j  f }|	d|
| j| j  d f }||dd}||dd}||dd}|\}}t||||\}}|d ur~|||d}|	||| j
|\}}t}| jjdkrt| jj }|| ||||f| jsdn| j| jt| jdd d	|\}}|jg |dR   }| |}||fS )
Nr,   .r   r   )r?   r>   r_   eagerg        sliding_window)dropoutrQ   rb   )r:   rL   rU   r   rN   rO   view	transposerI   updaterK   r   _attn_implementationr
   trainingrR   rQ   rM   reshape
contiguousrT   )r%   r*   r]   r^   rX   r_   r`   input_shapehidden_shapeqkv	query_posquery_states
key_statesvalue_statesr>   r?   cache_kwargsattention_interfaceattn_outputattn_weightsr(   r(   r)   r2   u   sD   

	

zPhi3Attention.forward)N)NN)r3   r4   r5   __doc__r   r   intr   r   r6   Tensortupler   
LongTensorr   r	   r2   r8   r(   r(   r&   r)   rJ   c   s*    rJ   c                       s   e Zd Zdedef fddZedddd							
				ddejde	ej de	ej
 de	e de	e de	ej
 de	eejejf  dee deeje	eejejf  f fddZ  ZS )Phi3DecoderLayerr   rK   c                    sL   t  || || _t||d| _t|| _t|j	| _
t|j	| _d S )N)r   rK   )r   r   r   rJ   	self_attnr   mlpr   Dropoutresid_pdropresid_attn_dropoutresid_mlp_dropout)r%   r   rK   r&   r(   r)   r      s   
zPhi3DecoderLayer.__init__rW   rX   rY   rZ   NFr*   r^   r@   	use_cacher_   r]   r`   r+   c              
   K   sj   |}	|  |}| jd|||||||d|\}}
|	| | }|}	| |}| |}|	| | }|S )N)r*   r^   r@   rX   r   r_   r]   r(   )input_layernormr|   r   post_attention_layernormr}   r   )r%   r*   r^   r@   rX   r   r_   r]   r`   residualself_attn_weightsr(   r(   r)   r2      s&   




zPhi3DecoderLayer.forward)NNNFNN)r3   r4   r5   r   rw   r   r   r6   rx   r   rz   r   boolry   r   r	   r7   r2   r8   r(   r(   r&   r)   r{      s8    	
r{   c                   @   s   e Zd ZdZdS )Phi3PreTrainedModelz0.0.5N)r3   r4   r5   _versionr(   r(   r(   r)   r      s    r   c                   @   s$   e Zd Z							dddZdS )Phi3ForCausalLMNTc	                 K   sb   |r| j jr|jd | j jd kr|d }
|
| j jkrd }tj| f||||||||d|	}|S )Nr   r   )	input_idsrX   r^   inputs_embedsr_   r@   r   logits_to_keep)r   rope_scalingr:    original_max_position_embeddingsr   prepare_inputs_for_generation)r%   r   rX   r^   r   r_   r@   r   r   r`   past_lengthmodel_inputsr(   r(   r)   r      s.   
z-Phi3ForCausalLM.prepare_inputs_for_generation)NNNNNTN)r3   r4   r5   r   r(   r(   r(   r)   r      s    r   c                   @      e Zd ZdS )Phi3ForSequenceClassificationNr3   r4   r5   r(   r(   r(   r)   r         r   c                   @   r   )Phi3ForTokenClassificationNr   r(   r(   r(   r)   r     r   r   )r   	Phi3Modelr   r   r   )Nr   )/rv   typingr   r   r6   r   activationsr   cache_utilsr   
generationr   modeling_flash_attention_utilsr	   modeling_utilsr
   processing_utilsr   utilsr   utils.deprecationr   mistral.modeling_mistralr   r   r   r   r   r   r   configuration_phi3r   
get_loggerr3   logger_CHECKPOINT_FOR_DOC_CONFIG_FOR_DOCModuler   rI   rJ   r{   r   r   r   r   __all__r(   r(   r(   r)   <module>   s4   $	

 F+*