o
    i                     @   s   d dl mZ d dlZddlmZmZ ddlmZ ee	Z
e Z				ddejjdejd	ejd
ejdeej dedee dee dee deejdf fddZdS )    )OptionalN   )_flash_attention_forward!flash_attn_supports_top_left_mask)logging        modulequerykeyvalueattention_maskdropoutscalingsliding_windowsoftcapreturnc	                 K   s  |	 dds|	 dd d urtd |jd }
tdd |jD r&td|d	d}|d	d}|d	d}d }|jtj	krat
 rIt }nt| jd
rT| jj}ntdd |  D jj}|	dd  t||||f|
| j||||t|| jjd	|	}|d fS )Noutput_attentionsF	head_maskz`flash_attention_2` does not support `output_attentions=True` or `head_mask`. Please set your attention to `eager` if you want any of these features.r   c                 s   s    | ]}|d kV  qdS )r   N ).0dimr   r   l/home/ubuntu/maya3_transcribe/venv/lib/python3.10/site-packages/transformers/integrations/flash_attention.py	<genexpr>#   s    z*flash_attention_forward.<locals>.<genexpr>zTensor query has shape  with a zero dimension.
FlashAttention does not support inputs with dim=0.
Please check your input shapes or use SDPA instead.   _pre_quantization_dtypec                 s   s"    | ]}t |tjjr|V  qd S )N)
isinstancetorchnnLinear)r   layerr   r   r   r   <   s     	is_causal)	query_lengthr    r   softmax_scaler   r   use_top_left_masktarget_dtypeattn_implementation)getloggerwarning_onceshapeany
ValueError	transposedtyper   float32is_autocast_enabledget_autocast_gpu_dtypehasattrconfigr   nextmodulesweightpopr   r    _use_top_left_mask_attn_implementation)r   r	   r
   r   r   r   r   r   r   kwargsseq_lenr$   attn_outputr   r   r   flash_attention_forward   sN   


r<   )r   NNN)typingr   r   modeling_flash_attention_utilsr   r   utilsr   
get_logger__name__r'   r7   r   ModuleTensorfloatinttupler<   r   r   r   r   <module>   s>    
		