o
    ۷i+                     @   s   d dl mZ d dlZddlmZmZmZ ddlmZ e	e
ZedddZed	ddZe Ze Zd
ejdedejfddZdeej dejdefddZ			ddejjdejdejdejdeej dedee dee deejdf fddZdS )    )OptionalN   )is_torch_npu_availableis_torch_xpu_availablelogging)is_torch_greater_or_equalz2.5T)
accept_devz2.8hidden_statesn_repreturnc                 C   s^   | j \}}}}|dkr| S | dddddddddf |||||} | ||| ||S )z
    This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
       N)shapeexpandreshape)r	   r
   batchnum_key_value_headsslenhead_dim r   ^/home/ubuntu/vllm_env/lib/python3.10/site-packages/transformers/integrations/sdpa_attention.py	repeat_kv   s
   0r   attention_maskkeyc                 C   s<   t rtot|tjj S trdS to| d u ot|tjj S )NF)_is_torch_xpu_available#_is_torch_greater_or_equal_than_2_8
isinstancetorchfxProxy_is_torch_npu_available#_is_torch_greater_or_equal_than_2_5)r   r   r   r   r   use_gqa_in_sdpa   s
   r!           modulequeryvaluedropoutscaling	is_causalc                 K   sH  | dds| dd urtd i }	t| dr/t||s+t|| j}t|| j}nddi}	|d urK|jdkrK|d d d d d d d |jd	 f }|d u r`|jd
 dko_|d u o_t	| dd}t
j rot|t
jro| }tr|d ur|jt
jkrt
| |j}t
jjj|||f||||d|	}
|
dd
 }
|
d fS )Noutput_attentionsF	head_maskz`sdpa` attention does not support `output_attentions=True` or `head_mask`. Please set your attention to `eager` if you want any of these features.num_key_value_groups
enable_gqaT   r   r   r(   )	attn_mask	dropout_pscaler(   )getloggerwarning_oncehasattrr!   r   r+   ndimr   getattrr   jit
is_tracingr   Tensoritemr   dtypeboollogical_nottodevicenn
functionalscaled_dot_product_attention	transpose
contiguous)r#   r$   r   r%   r   r&   r'   r(   kwargssdpa_kwargsattn_outputr   r   r   sdpa_attention_forward0   sB   

&"
rI   )r"   NN)typingr   r   utilsr   r   r   utils.import_utilsr   
get_logger__name__r3   r    r   r   r   r:   intr   r=   r!   rA   ModulefloattuplerI   r   r   r   r   <module>   sB    

