o
    i@                  #   @   s:  U d dl Z d dlmZmZmZ d dlmZ d dlZd dlm	Z	 edZ
edZi Zeejjef ed< eh dZd	ed
edeee
ef gee
ef f fddZedd	d<dd d ddejdejdejdeej dedededejfddZdee dedefddZdejd ededejfd!d"Zd#ejd$ejd%ed&edee d'edejfd(d)Zd%ed&eddfd*d+Zd#ejd$ejd%ed&edee dejfd,d-Zed.d			d=dd d d dd/dd0d#ejd$ejd1ejd2eej d3eej d4eej d5ed6ed7ed'edee d8ed9ee deejejejejf fd:d;Z dS )>    N)CallableOptionalTypeVar)	ParamSpec)_dtype_mappings_P_RONNX_ATEN_DECOMP_TABLE>      
         op_typeopset_versionreturnc                    s,   dt ttf dt ttf f fdd}|S )zDDecorator to register an ONNX operator with a custom implementation.funcr   c                    sP   d }t jjd  d| dd| }| tttt jj |< ||  |S )Nopsetzonnx::. )mutates_args)torchlibrary	custom_opr	   getattropsonnxregister_fake)r   overloadtorch_opr   r   r   Q/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/torch/onnx/ops/_impl.py	decorator   s   

z_onnx_op.<locals>.decorator)r   r   r   )r   r   r!   r   r   r    _onnx_op   s   (r"   RotaryEmbedding   F)interleaved	num_headsrotary_embedding_dimx	cos_cache	sin_cacheposition_idsr%   r&   r'   c                   s  | j t}d  d durWt dkfdd tj d  k fdd tj d kfd	d t dkoN dkfd
d nt dkod dkfdd |dkrwt| d} n$|dkrt|dkfdd d }|| }	 ||	g}
t| |
} tt| j dkdd  | j d }	|dkr|	}| ddddddd|f }| dddddd|df }|d dur  ntj d  koj d k fdd tj d  koj d k fdd tj d kfdd tj d kfdd tdtd|rk|dddddddddf }|dddddddddf }n
tj|ddd\}}| |  }| |  }|rt|d}t|d}tj	||fdd}t||j }n	tj	||fdd}tj	||fdd}|dkrt|S t|dS )z_RotaryEmbedding-23 https://onnx.ai/onnx/operators/onnx__RotaryEmbedding.html#rotaryembedding-23r   N   c                      s   d j  S )Nz6position_ids must be 2D when provided. Received shape shaper   )r+   r   r    <lambda>F   s    z%rotary_embedding_23.<locals>.<lambda>c                      s   d  dj d  S )Nz6position_ids first dim (batch) must match x.shape[0] (). Received r   r.   r   )
batch_sizer+   r   r    r0   J       r
   c                      s   d d j d  S )Nz;position_ids second dim (sequence) must match x.shape[-2] (r1   r
   r.   r   )r+   sequence_lengthr   r    r0   N   r3   c                         d j  dj  S )NzWcos_cache/sin_cache must be 2D when position_ids is provided. Received cos_cache shape , sin_cache shape r.   r   r)   r*   r   r    r0   R   
       c                      r5   )Nz[cos_cache/sin_cache must be 3D when position_ids is not provided. Received cos_cache shape r6   r.   r   r7   r   r    r0   X   r8      )r   r-   r
   r9   c                      s
   d  S )NzKnum_heads must be provided for 3D inputs. Received input tensor with shape r   r   )input_shaper   r    r0   e   s   
 c                   S      dS )Nzx should be a 4D tensor by nowr   r   r   r   r    r0   l       c                      s   dj  d  d dS )Nzcos has shape  but expected (batch=, seq=, ...)r.   r   )r2   cosr4   r   r    r0          c                      s   dj  d  d dS )Nzsin has shape r>   r?   r@   r.   r   )r2   r4   sinr   r    r0      rB   c                      s   d j d  d dS )NzLast dimension of cos cache (rD   ') should match rotary_embedding_dim/2 ().r.   r   )rA   rotary_embedding_dim_halfr   r    r0          c                      s   dj d  d  dS )NzLast dimension of sin cache (rD   rE   rF   r.   r   )rG   rC   r   r    r0      rH   dim)
r/   lenr   _checkrJ   permutereshape	unsqueezechunkcat)r(   r)   r*   r+   r%   r&   r'   
input_rankhidden_size	head_size	new_shapex_rotatex_not_rotatex1x2realimagx_rotate_concatoutputr   )	r2   rA   r)   r;   r+   rG   r4   rC   r*   r    rotary_embedding_23/   s   



  "$
r^   scalerT   c                 C   s   | dur| S dt | S )z/Get the scale factor for attention computation.Ng      ?)mathsqrt)r_   rT   r   r   r    _get_scale_factor   s   rb   tensorr2   c                 C   s:   | j d | j d }}|| }| ||||dd S )z1Reshape 3D tensor to 4D for multi-head attention.r
   r-   )r/   view	transpose
contiguous)rc   r2   r&   r4   rS   rT   r   r   r    _reshape_3d_to_4d   s   rg   QKcurrent_q_num_headscurrent_kv_num_headsqk_matmul_output_modec              	   C   s2   |dkrt | ||||S tt| |ddS )z1Get QK output tensor based on the specified mode.r   r,   rD   )_compute_qk_output_for_mode_0r   
zeros_likematmulre   )rh   ri   rj   rk   r_   rl   r   r   r    _get_qk_output_for_aten_spda   s
   	
rp   c                    s"   t   dk fdd dS )z-Validate Group Query Attention configuration.r   c                      s   d d  dS )Nzq_num_heads (z%) must be divisible by kv_num_heads (z	) for GQAr   r   rk   rj   r   r    r0      s    z-_validate_gqa_configuration.<locals>.<lambda>N)r   rL   )rj   rk   r   rq   r    _validate_gqa_configuration   s   
rr   c                 C   s`   |}||kr|| }|j |dd}t|| jd }t|}| | }	|| }
t|	|
ddS )zDHelper function to compute QK output for qk_matmul_output_mode == 0.r
   rI   r9   r,   rD   )repeat_interleaverb   r/   r`   ra   r   ro   re   )rh   ri   rj   rk   r_   K_for_qkrepeat_factorscale_factor
sqrt_scaleQ_scaledK_scaledr   r   r    rm      s   	
rm   	Attention        )	is_causalkv_num_headsq_num_headsrl   r_   softcapsoftmax_precisionV	attn_maskpast_key
past_valuer|   r}   r~   r   r   c          (      C   s  d\}}}t | j}| jd }t | jdkr;t|dko|dkdd  | jd }t| ||} t|||}t|||}tt | jdkoQt |jdkoQt |jdkdd  | j| }t|
|}
|d	urmtj||g|d
n| }|d	ur~tj||g|d
n| }||}}| j| }|j| }| j| }|j| }|dko|	dko|d	u o|d	u p|jtj	k}t
|| |rd	}|d	ur|jtj	kr| n|}tjjj| |||d||
t	||kd}t| ||||
|	}n||kr|| }|j||d
}|j||d
}tj||| j| jd}|r+t|d	u dd  ttj||tj	| jd}|| td}|d	urE|jtj	krA|| td}n|| }t|
| jd } t| }!| |! }"||! }#t|"|#dd}$|$}|$| }%|	dkrq|%}|dkr|t|%|  }%|	dkr|%}|d	ur|tv r|%j}&|%tj| }%tj|%dd
}'|'|&}'ntj|%dd
}'ntj|%dd
}'|	dkr|'}t|'|}|dkr|dd  !||d}||||fS )zMAttention-23 https://onnx.ai/onnx/operators/onnx__Attention.html#attention-23)r
   r-   r9   r   r9   c                   S   r<   )Nz;q_num_heads and kv_num_heads must be provided for 3D inputsr   r   r   r   r    r0     r=   zattention_23.<locals>.<lambda>r
   r:   c                   S   r<   )Nz'Q, K, and V should be 4D tensors by nowr   r   r   r   r    r0   !  r=   NrI   r{   )r   	dropout_pr|   r_   
enable_gqa)dtypedevicec                   S   r<   )Nz'Cannot use both is_causal and attn_maskr   r   r   r   r    r0   v  r=   z-infr,   rD   r-   )"rK   r/   r   rL   rg   rb   rQ   cloner   boolrr   nn
functionalscaled_dot_product_attentionrp   rs   zerosr   trilonesmasked_fillfloatr`   ra   ro   re   tanh-_ATTENTION_23_ALLOWED_INTERMEDIATE_PRECISIONStor   ONNX_DTYPE_TO_TORCH_DTYPEsoftmaxrf   rd   )(rh   ri   r   r   r   r   r|   r}   r~   rl   r_   r   r   num_head_dimsequence_dimhead_diminput_shape_lenr2   q_sequence_lengthq_head_sizepresent_keypresent_valuerj   rk   kv_sequence_lengthcan_use_sdpasdpa_attn_maskr]   	qk_outputru   	attn_biascausal_maskrv   rw   rx   ry   qk_matmul_outputqk_with_biasoriginal_dtype
qk_softmaxr   r   r    attention_23   s   



(
















r   )N)NNN)!r`   typingr   r   r   typing_extensionsr   r   torch.onnx.opsr   r   r   r	   dict_ops
OpOverload__annotations__	frozensetr   strintr"   Tensorr   r^   r   rb   rg   rp   rr   rm   tupler   r   r   r   r    <module>   s   


	 




	
