o
    Xi~A                     @  s   d dl mZ d dlmZmZ d dlZd dlZd dl	m
  mZ d dlmZmZmZ 	 eeejf ZdddZdd ZG dd dejZe ZG dd dejZe ZeegZeeZdS )    )annotations)SequenceUnionN)_basics	_ir_utilspatternvalueir.Valuenamestrmodelir.Modelreturnboolc                 C  s   | |j jv o
| j|kS N)graphinputsr
   )r   r
   r    r   W/home/ubuntu/.local/lib/python3.10/site-packages/onnxscript/rewriter/ort_fusions/gqa.py_is_model_input#   s   r   c                 C  s,  | j |ddd}| |}| j |ddd}	| |	}
| |
|}| |dg}| |
|d}| j||dd}| ||}| j|ddd}t	||g}| d|d}| |ddg}| 
||}| ||}| ||}| ||}t	||g}| j||d	}| ||}| |ddg}| ||}|S )
zODefines a pattern for a pure causal mask, with optional sliding window support.      )endstart   r   axis)upperto)ShapeSqueezeAddReshapeRangeConcatExpandTrilur   OrValueGreaterSubLessOrEqualOrCastMul	Unsqueeze)op	input_idspast_kv_cache
shape_B111min_valwindow_sizedtypeseq_len
seq_len_0Dpast_seq_lenpast_seq_len_0Dtotal_seq_len_0Dtotal_seq_lencurrent_range
mask_shapemask_all_min_expandmask_all_min_trilumask_all_mintotal_range_as_rowcurrent_range_as_column
non_causalcurrent_range_minus_windowout_of_sliding_windownon_causal_sliding_windowboolean_maskfloat_0_1_maskfloat_0_min_maskmask_4d_11STmask_4d_B1STr   r   r   _causal_mask'   s.   


rN   c                   @  s   e Zd Zdd ZdddZdS )_CausalMaskPatternc
                 C  s|   t |||||||}
||ddg}|j||	d}||
|}|j|tjjd}t||g}|	|d}|
|||
}|S )Nr   r   r   g        )rN   r0   r.   r#   irDataTypeFLOATr   r)   EqualWhere)selfr1   r2   r3   r4   r5   r6   dtype1attn_mask_2ddtype2causal_maskattn_mask_4dattn_mask_4d_castsumsum_fp32is_zeroresultr   r   r   r   U   s"   
z_CausalMaskPattern.patternNc           
      K  s   t |d|jst d|S | | kr!t d||gS t|}|d u r2t d|S t	|j
 j}	||	krMt d|	 d| |S |rWt d|S dS )	Nattention_maskzInvalid attention_mask inputzDtype mismatchzMinval is not a constant.zExpected min value z, got z Sliding window not yet supportedT)r   r   r   MatchResultfailas_intr   get_singleton_valuenpfinfor7   numpymin)
rU   contextrV   rX   r5   rW   sliding_window_	min_valueexpected_min_valuer   r   r   checkv   s"   
z_CausalMaskPattern.checkr   )__name__
__module____qualname__r   rn   r   r   r   r   rO   T   s    !rO   c                      sJ   e Zd Z fddZdd Z				dddd	Z				dd
dZ  ZS )GroupQueryAttentionc                   s   t  jddd d S )NGQAF)remove_nodes)super__init__)rU   	__class__r   r   rv      s   zGroupQueryAttention.__init__c           "      C  s  |j |tjdgd}|j|tjddgd}t||g}|j|g dd}|j|tjddgd}t||g}|j |tjd	gd}|j|tjdd
gd}t||g}|j|g dd}|j|tjddgd}t||g}|j |tjdgd}|j|g dd}|j||||	ddgd}|j||||	ddgd}|j||dd}t||g}||dg}|	|tj}|j |tjdgd}|j||dd}t||g}||dg}|	|tj}|j |tjdgd}|j
||||
ddd}|j|g dd} |j | tjdgd}!|!||fS )Nquery_BSHDh)_outputsr   query_BSHDh_normalized)r   rz   )r   r   r   r   )permquery_BHSDh_normalizedkey_BSHkvDhkey_BSHkvDh_normalizedkey_BHkvSDh_normalizedvalue_BSHkvDhcom.microsoftquery_BHSDh_rope)_domainrz   key_BHkvSDh_roper   r   key_seq_BHTDhvalue_seq_BHTDhBHSdzai.onnxruntime._fusion)
key_formatr   attention_BSD)r$   r   	ANY_VALUESimplifiedLayerNormalizationr)   	TransposeRotaryEmbeddingr&   r0   r'   SDPA)"rU   r1   	query_BSD	key_BSDkvvalue_BSDkvpast_key
past_valueposition_idscossinmaskry   r{   query_BHSDhr}   r~   r   key_BHkvSDhr   r   value_BHkvSDhr   r   key_seq_BHkvTDhkey_seq_BHkv1TDhkey_seq_BHkvGTDhr   value_seq_BHkvTDhvalue_seq_BHkv1TDhvalue_seq_BHkvGTDhr   attention_BHSDhattention_BSHDhr   r   r   r   r      s   




zGroupQueryAttention.patternNri   _basics.MatchContextc                   s  t  }|d ur|d ur|d||gS |d ur$|d ur$|d||gS i  d fd	d
}||g dr6dS ||g dr?dS ||g drHdS |d urU||g drUdS |d urb||g drbdS t|	d}t|
d}t|tsy|d|	S t|ts|d|
S || _|| _|	 j
}|	 j
}|dd}|dd}||krt  d|	 |	 gS || _|	 }|d u rt  d|S tj|j|j|dd}|d u rt  d|S dS )NzQuery normalized twicezKey normalized twicevalr	   dimsSequence[str]r   r   c                   s   t  | | S r   )_fusion_utilscheck_shape_bool)r   r   bindingsr   r   no_match$  s   z+GroupQueryAttention.check.<locals>.no_match)BSDF)r   r   Dkv)r   HkvPDh)r   r   r   Dvr   z#Unable to determine num_heads valuez&Unable to determine kv_num_heads valueinterleavedr   z/Rotary embedding interleaved attribute mismatchzUnhandled mask pattern)check_nodes_are_removablez'Mask does not match causal mask patternT)r   r	   r   r   r   r   )r   ra   rb   r   get_dim
isinstanceint	num_headskv_num_headsproducer
attributesget_int_interleaved_causal_mask_patternmatchr   graph_or_function)rU   ri   r   r   r   r   r   r   r   ry   r~   r   r{   r}   r   r   rk   r_   r   r   r   query_rotary_attributeskey_rotary_attributesquery_interleavedkey_interleaved	mask_nodemask_match_resultr   r   r   rn     sj   



zGroupQueryAttention.checkc                  K  sn  |j tjdtjjdd}|j tjdgtjjdd}|j tjdgtjjdd}|j||dd}|j|tjjd}|j||dd}|||}|pK|}|d urt|	 }|j
}|jd }|j||fi |}|j g dd}|||}|pw|}|d ur|	 }|j
}|jd }|j||fi |}|j g dd}|||}|j|||||||||	| j| jd| jd	d
dS )Nr   )r7   )r   r   )keepdimsr   )r   r   r   )
value_intsr   r   )r   r   	do_rotaryrotary_interleavedr   rz   )ConstantrP   tensorrQ   INT32INT64	ReduceMaxr.   r#   r   r   r   r   r$   rr   r   r   r   ) rU   r1   r   r   r   r   r   r   r   r   r   ry   r~   r{   r}   r   r   rk   one_int32_0done_int64_1dzero_int64_1dseqlens_k_int64	seqlens_kmax_seq_lengthtotal_seq_length_int32normalized_query	norm_node
norm_attrs
norm_scalereshape_BSHDh_to_BSDnormalized_keyreshape_BSHkvDh_to_BSDkvr   r   r   rewrite]  s`   

zGroupQueryAttention.rewrite)NNNN)ri   r   )ro   rp   rq   rv   r   rn   r   __classcell__r   r   rw   r   rr      s    zgrr   )r   r	   r
   r   r   r   r   r   ) 
__future__r   typingr   r   rg   re   onnx_irrP   !onnxscript.rewriter._fusion_utilsrewriterr   onnxscript.rewriterr   r   r   r   SymbolicDimDimr   rN   PatternBaserO   r   RewriteRuleClassBaserr   rule_basic_gqa_ruleRewriteRuleSet	gqa_rulesapply_fusion_rulesfuse_gqar   r   r   r   <module>   s$   
-<  