o
    i                     @   sn   d dl mZ d dlZd dlmZ dejdedejfddZd	ejd
ejdejdejdeej defddZ	dS )    )OptionalN)nnhidden_statesn_repreturnc                 C   s^   | j \}}}}|dkr| S | dddddddddf |||||} | ||| ||S )z
    This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
       N)shapeexpandreshape)r   r   batchnum_key_value_headsslenhead_dim r   ^/home/ubuntu/LTX-2/.venv/lib/python3.10/site-packages/transformers/integrations/eager_paged.py	repeat_kv   s
   0r   modulequerykeyvalueattention_maskscalingc                 K   s  | dd }|d ur*|j||| jfi |\}}|ddd}|ddd}t| dr;t|| j}t|| j}t|t	rWt
| dd}|dksN|d u rPdnd}	||	 }
n|}
t||dd	| }|
d urm||
 }t| d
r| jdddd|jd d|jd d}tj||gdd}||jdddj }tjj|dtjd|j}|dd df }ntjj|dtjd|j}t||}|dd }||fS )Ncacher   r   num_key_value_groupssliding_windowfull_attentionsliding_attention      sinks)dimT)r"   keepdim)r"   dtype.)popupdate	layer_idx	transpose	unsqueezehasattrr   r   
isinstancedictgetattrtorchmatmulr   r
   r	   r   catmaxvaluesr   
functionalsoftmaxfloat32tor$   
contiguous)r   r   r   r   r   r   kwargsr   r   
layer_typecausal_maskattn_weightsr   attn_outputr   r   r   eager_paged_attention_forward   s4   




*r=   )
typingr   r.   r   Tensorintr   Modulefloatr=   r   r   r   r   <module>   s"    