o
    Xi6                     @  s   d dl mZ d dlmZmZ d dlZd dlmZm	Z	m
Z
 	 eeejf ZG dd de
jZdd	d
ZeddZeddZeeZeeZdS )    )annotations)SequenceUnionN)_fusion_utils	_ir_utilspatternc                      s@   e Zd Zd fddZdd Z						ddddZdd Z  ZS )MultiHeadAttention	is_rotaryboolhas_past_presentis_cross_attentionc                  s"   t  | || _|| _|| _d S N)super__init__
_is_rotary_has_past_present_is_cross_attention)selfnamer	   r   r   	__class__ W/home/ubuntu/.local/lib/python3.10/site-packages/onnxscript/rewriter/ort_fusions/mha.pyr   $   s   
zMultiHeadAttention.__init__c
                 C  sh  |j |tjdgd}
|j|
g dd}| jsG|j |tjdgd}tj|j|g dd|gdddgd	}|j |tjd
gd}|j|g dd}n|}| jrg|j||||	dd}| jsd|j||||	dd}n|}n|}|}| jrw|j	||dd}n|}| jr|j	||dd}n|}|}|}|j
|||ddgdd}|j|g dd}|j |tjdgd}| jr|||fS |S )Nquery_BSHDh)_outputsr            perm	key_BSHDhkey_transposedTF)tag_var
tag_valuesvalue_BSHDhcom.microsoft_domainaxissdpa_outputzai.onnxruntime._fusion)_allow_other_inputsr   r(   attention_reshaped)Reshaper   	ANY_VALUE	Transposer   OrValuer   RotaryEmbeddingr   ConcatSDPA)r   op	query_BSDkeyvaluepast_key
past_valueposition_idscossinr   query_BHSDhr%   value_BHSDhquery_BHSDh_embkey_BHSDh_embkey_seq	value_seqkey_seq_to_sdpavalue_seq_to_sdpasdpaattention_transposed	attentionr   r   r   r   1   s\   




zMultiHeadAttention.patternNreturnpattern.MatchResultc                   s  t  }| }i  d0 fdd}||g d	r"|d
| d|S ||g dr3|d
| d|S | jrm||g drG|d
| d|S ||g drX|d
| d|S |d urb|d|S |d url|d|S nc||g dr~|d
| d|S |jd}|	rdnd}||kr|d| d| |S ||g dr|d
| d|S | jr||g dr|d
| d|S ||g dr|d
| d|S d }t|j	dkr|j	d }|| _
|d urR|j }d u r|d |S | d!kr+||g d"r|d
| d#|S  d$}| d%krd&| _n8|d'kr&d(| _n/|d)S | d*krH||d$d+grD|d
| d,|S d(| _n|d-| d.|S d&| _|jd/d | _|S )1Nvalir.ValuedimsSequence[str]rJ   r
   c                   s   t  | | S r   )r   check_shape_bool)rL   rN   bindingsr   r   no_match   s   z*MultiHeadAttention.check.<locals>.no_match)BSDzShape mismatch: z3 does not match expected dimensions ['B', 'S', 'D'])rT   rU   HDhz9 does not match expected dimensions ['B', 'S', 'H', 'Dh'])rT   rW   SkvrX   z; does not match expected dimensions ['B', 'H', 'Skv', 'Dh'])rT   rW   rY   Dvz; does not match expected dimensions ['B', 'H', 'Skv', 'Dv']z+past_key should be None in cross-attention.z-past_value should be None in cross-attention.)rT   rY   rV   z5 does not match expected dimensions ['B', 'Skv', 'D']
key_formatBHSdBSHdzUnexpected key format: z. Expected: )rT   rW   SpastrX   z= does not match expected dimensions ['B', 'H', 'Spast', 'Dh'])rT   rW   r^   rZ   z= does not match expected dimensions ['B', 'H', 'Spast', 'Dv']r   z Mask shape cannot be determined.   )B_or_1H_or_1S_or_1StzH does not match expected dimensions ['1 or B', '1 or H', '1 or S', 'St']rb   rU   Fr   Tz0Mask dimension 2 cannot be verified to be 1 or Sr   rc   z4 does not match expected dimensions ['1 or S', 'St']zMask shape z% is not supported. Expected 2D or 4D.scale)rL   rM   rN   rO   rJ   r
   )r   MatchResultproducerfailr   
attributes
get_stringr   leninputsmaskshaperankget_use_mask_broadcast	get_float_scale)r   r6   r7   r8   r9   r,   r:   r;   r   r"   r!   r%   _check_result	sdpa_noderS   sdpa_key_formatexpected_key_formatrl   
mask_shape
mask_dim_2r   rQ   r   check   s   















zMultiHeadAttention.checkc                 K  s:  t |d}t|tsd S | jr+|j|||	|
dd}| js(|j|||	|
dd}n7|}n4| jr[|}|j|g dd}|||j	g dd}|j|g dd}|||j	g dd}n|}|}| j
}| jr|j	dgd}|j|ddd	}|j||||d
d}|||}dd| j  }|j|||d d ||||d|| jdS )Nr   r&   r'   r   r   )r   r   )
value_intsr   )startendr   r*   )	num_headsr(   r   rd   )r   get_dim
isinstanceintr   r3   r   r1   r/   Constantrl   rp   Shaper4   Expandr   r   rr   )r   r6   r7   r8   r9   r:   r;   r   r<   r=   r>   rs   r   query_BSD_embkey_BSD_embrl   onerU   
shape_11S1num_outputsr   r   r   rewrite  sR   


zMultiHeadAttention.rewrite)r	   r
   r   r
   r   r
   )NNN)rJ   rK   )__name__
__module____qualname__r   r   rz   r   __classcell__r   r   r   r   r   #   s    c r   r   r
   c                   s*    fdddD }t dd |D }|S )Nc                   s0   g | ]} r	d gnd dgD ]}| |dqqS )FT)r	   r   r   r   ).0r	   r   r   r   r   
<listcomp>X  s    z"_make_rule_set.<locals>.<listcomp>)FTc                 S  sP   g | ]$}t jd |d rdnd |d rdnd |d rdnd fi |qS )MHAr	   _Rotary r   _Pastr   _CrossAttention)r   rule)r   paramsr   r   r   r   d  s    )r   RewriteRuleSet)r   parameter_combinations	mha_rulesr   r   r   _make_rule_setW  s   
r   Fr   T)r   r
   )
__future__r   typingr   r   onnx_irironnxscript.rewriterr   r   r   r   SymbolicDimDimRewriteRuleClassBaser   r   mha_rules_no_pastmha_rules_with_pastapply_fusion_rules	fuse_mha1	fuse_mha2r   r   r   r   <module>   s     
6


