o
    ôÚ·iG ã                   @   sv   d dl Z d dlmZ d dlmZ d dlmZmZmZm	Z	m
Z
 d dlmZ e  e¡ZG dd„ deƒZG dd	„ d	eƒZdS )
é    N)ÚFusionAttention)ÚFusion)ÚFunctionProtoÚ	NodeProtoÚTensorProtoÚhelperÚnumpy_helper)Ú	OnnxModelc                       sœ   e Zd ZdZdededef‡ fdd„Z							dd	ed
edededededededededede	dB dedB fdd„Z
dd„ Zdd„ Zdd„ Z‡  ZS )ÚFusionRotaryAttentionze
    Fuse Attention subgraph with rotary positional embeddings into one MultiHeadAttention node.
    ÚmodelÚhidden_sizeÚ	num_headsc                    s   t ƒ j|||dg d¢d d S )NT)ÚSimplifiedLayerNormalizationÚ SkipSimplifiedLayerNormalizationÚLayerNormalizationÚSkipLayerNormalizationÚAdd)Úuse_multi_head_attentionÚsearch_op_types)ÚsuperÚ__init__)Úselfr   r   r   ©Ú	__class__© úf/home/ubuntu/vllm_env/lib/python3.10/site-packages/onnxruntime/transformers/fusion_rotary_attention.pyr      s   
ûzFusionRotaryAttention.__init__Ú NÚinputÚoutputÚq_rotaryÚk_rotaryÚv_matmulÚ	attn_maskÚadd_qkÚpast_kÚpast_vÚ	present_kÚ	present_vÚscaleÚreturnc                 C   s  | j dksJ ‚| jdkr#| j| j  dkr#t d| j› d| j › ¡ d S | j d¡}|jd |jd |jd d||||	g}|g}|
rJ|rJ| |
|g¡ tj	d|||d}d|_
|j t d| j ¡g¡ |d urq|j t d	|¡g¡ | jd ur„|j t d
t| jƒ¡g¡ |  d¡ |S )Nr   z)fuse_rotary_attention: input hidden size z# is not a multiple of num of heads ÚMultiHeadAttentionr   ©ÚinputsÚoutputsÚnameúcom.microsoftr   r(   Úmask_filter_value)r   r   ÚloggerÚdebugr   Úcreate_node_namer   Úextendr   Ú	make_nodeÚdomainÚ	attributeÚmake_attributer0   ÚfloatÚincrease_counter)r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   Úmha_node_nameÚ
mha_inputsÚmha_outputsÚmha_noder   r   r   Úcreate_mha_node(   sB   ÿøü

z%FusionRotaryAttention.create_mha_nodec	           1      C   s  | j  |dgdg¡}	| j  |dgdg¡}
|	d u s|
d u rdS |	d |
d }}| j  |g d¢g d¢¡}| j  |g d¢g d¢¡}| j  |g d¢g d¢¡}| j  |g d¢g d¢¡}|d u sg|d u sg|d u sg|d u ridS |\}}}|\}}}|jd |ks|jd |krƒdS |d j|jks“|d j|jkr•dS | j  |dgdg¡}| j  |dgdg¡}|d u s±|d u r³dS |d |d }}| j  |g d	¢g d
¢¡}| j  |g d¢g d¢¡}| j  |g d¢g d¢¡}| j  |g d¢g d¢¡}|d u sü|d u sü|d u sü|d u rþdS |d j|jks"|d j|jks"|d j|jks"|d j|jkr$dS | j  |dgdg¡}|d u r5dS |d }| j  |g d	¢g d
¢¡} | j  |g d¢g d¢¡}!| d u s[|!d u r]dS | d j|jkso|!d j|jkrqdS | j  |dgdg¡}"|"d u r‚dS |"d }#| j  |#g d	¢g d
¢¡}$| j  |#g d¢g d¢¡}%|$d u s¨|%d u rªdS |$d j|jks¼|%d j|jkr¾dS |$d }&| d }'|d }(|jd })|&jd |)ksç|'jd |)ksç|(jd |)krédS | j  |g d¢g d¢¡}*| j  |g d¢g d¢¡}+|*d ur|*\}},}-n|+d ur|+\}}},}-ndS |-jd dvr$dS | j  |,g d¢g d¢¡}.| j  |-g d¢g d¢¡}/| j  |-dgdg¡}0|.d u sU|/d u sU|0d u rWdS |.d j|/d jksm|.d j|/d jkrodS |/d jd |0d jd kr€dS dS )NÚConcaté   Fr   ©Ú	UnsqueezeÚGatherÚShape©r   r   r   ©rA   r   r   )é   r   r   )rC   ÚMulrD   rE   ©r   r   r   r   )rC   r   rD   rE   ©rA   r   r   r   rH   )rH   r   r   r   ©r@   ÚSlicerM   ©ÚCastr@   rM   rM   >   r"   Úattention_mask)rH   r   rA   r   rC   T)r   Úmatch_parent_pathr   r.   r   )1r   Úreshape_qkv_2Úreshape_qkv_1Úreshape_q_2Úreshape_k_2Úreshape_v_2Úreshape_v_1r#   Ú
root_inputÚconcat_qkv_2_pathÚconcat_qkv_1_pathÚconcat_qkv_2Úconcat_qkv_1Úreshape_qkv_2_path_1Úreshape_qkv_2_path_2Úreshape_qkv_1_path_1Úreshape_qkv_1_path_2Ú_Úgather_1Úshape_1Úgather_2Úshape_2Úconcat_v_2_pathÚconcat_v_1_pathÚ
concat_v_2Ú
concat_v_1Úreshape_v_2_path_1Úreshape_v_2_path_2Úreshape_v_1_path_1Úreshape_v_1_path_2Úconcat_k_2_pathÚ
concat_k_2Úreshape_k_2_path_1Úreshape_k_2_path_2Úconcat_q_2_pathÚ
concat_q_2Úreshape_q_2_path_1Úreshape_q_2_path_2Úmul_qÚmul_kÚmul_vÚgather_1_outÚattn_mask_path_1Úattn_mask_path_2Ú
slice_qk_2Ú
slice_qk_1Úslice_qk_2_pathÚslice_qk_1_path_1Úslice_qk_1_path_2r   r   r   Ú&check_runtime_shape_paths_for_function`   sÄ   

 ÿÿ
ÿÿ$
ÿ$
0

ÿÿ,z<FusionRotaryAttention.check_runtime_shape_paths_for_functionc                 C   s\  | j  |dgdg¡}|d u rdS |d }| j  |g d¢g d¢¡}| j  |g d¢g d¢¡}	|d u s4|	d u r6dS |\}
}}|	\}
}}|jd |ksN|jd |krPdS | j  |dgdg¡}|d u r`dS |d }| j  |g d¢g d¢¡}| j  |g d¢g d¢¡}|d u s„|d u r†dS |d j|jks–|d j|jkr˜dS | j  |dgdg¡}|d u r¨dS |d }| j  |g d¢g d¢¡}| j  |g d¢g d¢¡}|d u sÌ|d u rÎdS |d j|jksÞ|d j|jkràdS | j  |dgdg¡}|d u rðdS |d }| j  |g d¢g d¢¡}| j  |g d¢g d¢¡}|d u s|d u rdS |d j|jks*|d j|jkr,dS dS )	Nr@   rA   Fr   rB   rF   rG   T)r   rQ   r   r.   )r   Úreshape_qkvÚ	reshape_qÚ	reshape_kÚ	reshape_vrX   Úconcat_qkv_pathÚ
concat_qkvÚreshape_qkv_path_1Úreshape_qkv_path_2ra   rb   rc   rd   re   Úconcat_v_pathÚconcat_vÚreshape_v_path_1Úreshape_v_path_2Úconcat_k_pathÚconcat_kÚreshape_k_path_1Úreshape_k_path_2Úconcat_q_pathÚconcat_qÚreshape_q_path_1Úreshape_q_path_2r   r   r   Ú#check_runtime_shape_paths_for_nodesü   sV   	

  $z9FusionRotaryAttention.check_runtime_shape_paths_for_nodesc           W         sh  |j dvrd S d }ˆ j |g d¢g d¢¡}ˆ j |g d¢g d¢¡}ˆ j |g d¢g d¢¡}|d ur;|\}}	}}
}|}n"|d urH|\}}}}|}n|d urV|\}}}}}|}nt d¡ d S d\}}}d }d }ˆ j |g d	¢g d
¢¡}ˆ j |g d¢g d¢¡}ˆ j |g d¢g d¢¡}ˆ jj|g d¢g d¢fg d¢g d¢fg d¢g d¢fg d¢g d¢fg d¢g d¢fg d¢g d¢fg d¢g d¢fg d¢g d¢fg d¢g d¢fg	d d\}}}ˆ j |g d ¢g d!¢¡}|d ur|\}}}}}}|}ˆ j |d"d#gd$d%g¡}|d u rt d&¡ d S |d$ jd$ }|d' jd$ }|jd$ }nq|d ur4|\}}}}|}|jd$ }|jd$ }nY|d urF|\}}}|}|jd$ }nG|d urkt|ƒd(krk|d$ d)d … \}}}}|}|jd$ }|jd$ }n"|d ur†|\}}}}}|}|}|jd$ }|jd$ }nt d*¡ d S ˆ j |g d+¢g d,¢¡}d-\}} |d ur©|\}}}} nt d.¡ d S d/\}!}"ˆ j |g d0¢g d¢¡}#ˆ j |g d1¢g d¢¡}$ˆ j |g d2¢g d3¢¡}%ˆ j |g d4¢g d5¢¡}&ˆ j |g d6¢g d7¢¡}'ˆ j |g d8¢g d3¢¡}(ˆ j |g d9¢g d:¢¡})|#d ur|#\}}*}+|*jd$ }!nb|$d ur)|$\}}}*}+|*jd$ }!nQ|%d ur9ˆ  	|%d$ jd$ ¡}"nA|&d urIˆ  	|&d$ jd$ ¡}"n1|'d urV|'d$ jd$ }"n$|(d urc|(d$ jd$ }"n|)d ursˆ  	|)d$ jd$ ¡}"nt d;¡ d S d/\},}-d }.d }/d }0ˆ j | g d<¢g d
¢¡}1ˆ j | g d=¢g d¢¡}2ˆ j | g d>¢g d?¢¡}3ˆ jj| g d@¢g d:¢fg dA¢g dB¢fg dC¢g dD¢fg dE¢g dF¢fg dG¢g dH¢fg dI¢g dJ¢fg dK¢g dL¢fg dI¢g dM¢fg dI¢g dN¢fg	d d\}}4}ˆ j | g dO¢g dP¢¡}5|1d urA|1\}6}}7}}8}9|1}.ˆ j |7d"d#gd$d%g¡}:|:d u r&t dQ¡ d S |:d$ jd$ },|:d' jd$ };|7jd$ }-||;ks@J ‚n|2d urU|2\}}8}}<}9|2}.|8jd$ }-nk|3d uro|3\}}7}8}}<}9|3}.|7jd$ },|7jd$ }-nQ|4d urœt|4ƒd(krœ|4d$ dRd … \}<}9|4d$ dSdT… \}7}8|4}.|7jd$ },|7jd$ }-n$|5d ur¹|5\	}}7}0}8}/}}<}}9|5}.|7jd$ },|7jd$ }-nt dU¡ d S d }=d }>d }?ˆ j | g dV¢g d,¢¡}@ˆ j | g dW¢g d,¢¡}Aˆ j | g dX¢g dY¢¡}B|@d urø|@\}C}}D}E|@}=n&|Ad ur|A\}D}}F}E|A}=n|Bd ur|B\}?}D}>}}F}}E|B}=nt dZ¡ d S |Ejd$ |9jd$ kr;|9jd$ |jd$ kr;t d[¡ d S d\}G||kr_ˆ  
|	|
|C|6||||Ejd$ ¡sYt d]¡ d S |	jd$ }GnX|||fv r·ˆ  ||F|<||Ejd$ ¡szt d]¡ d S |jd$ }G|>r‡|>jd$ n|Ejd$ |Djd$< |/r—|/jd$ n|9jd$ |8jd$< |?d u r¬|8jd^ |8jd$< ||kr·|d_d … }‡ fd`da„}H|?rš|0ršˆ j db¡}I|Id^ }Jtjdb|0jd$ g|Jg|Idc}K|Kj t ddg de¢¡g¡ ˆ j db¡}L|Ld^ }Mtjdb|?jd$ g|Mg|Ldc}N|Nj t ddg de¢¡g¡ |H|<ƒ}O|Od u rt df¡ d S ˆ jjdgdhdi}Ptjdg|Kjd$ |Ojd$ g|Pd^ g|Pdc}Qˆ jjdgdjdi}Rtjdg|Njd$ |Ojd$ g|Rd^ g|Rdc}S|Q}8|S}Dˆ j |O¡ ˆ j |K¡ ˆ j |N¡ ˆ j |Q¡ ˆ j |S¡ ˆ jˆ j|Oj< ˆ jˆ j|Kj< ˆ jˆ j|Nj< ˆ jˆ j|Qj< ˆ jˆ j|Sj< ˆ  |Ejd$ |G|D|8||!|"|,||-|¡}T|Td u r¸t dk¡ d S ˆ j |T¡ ˆ jˆ j|Tj< ˆ j |d_d … ¡ ||krêˆ j |d u râ|d d'… n|d dR… ¡ n|d$ d' g}U|D ]	}Vˆ  |V|U¡ qóˆ j |¡ |.|1krˆ j |.d dR… ¡ nw|.|2kr1ˆ j |.d$ ¡ ˆ j |.d% ¡ ˆ j |.dl ¡ nY|.|3krWˆ j |.d$ ¡ ˆ j |.d_ ¡ ˆ j |.dl ¡ ˆ j |.dm ¡ n3|.|5krmˆ j |.d$ ¡ ˆ j |.d_ ¡ n|.|4krŠ|.d$ d' |.d$ d) g}U|.D ]	}Vˆ  |V|U¡ q€|=|@kršˆ j |=d dR… ¡ n|=|Akr¯ˆ j |=d_ ¡ ˆ j |=d% ¡ dnˆ _d S )oN>   r   r   r   )ÚMatMulÚReshapeÚ	Transposer˜   r—   ©rA   r   r   r   r   )r—   r˜   r™   r—   rK   )Ú	AllReducer—   r˜   r™   r—   z0fuse_rotary_attention: failed to match qkv nodes)r   r   r   )r˜   r™   r@   r™   r˜   r—   )rA   r   r   rA   r   r   )r@   r™   r˜   r—   )rA   rA   r   r   )r™   r˜   r—   rG   )r˜   ÚExpandrC   r@   r™   r˜   r—   )rA   r   r   r   rA   r   r   )r˜   rœ   ÚWhereÚEqualr˜   r@   rC   rD   rE   r@   r™   r˜   r—   )rA   r   rA   r   r   r   r   r   r   r   rA   r   r   )r˜   rœ   r   rž   rI   ÚConstantOfShaperE   r˜   r@   rC   rD   rE   r@   r™   r˜   r—   )rA   r   rA   r   rA   r   r   r   r   rA   r   r   r   rA   r   r   )r˜   rœ   r   rŸ   rE   r˜   r@   rC   rD   rE   r@   r™   r˜   r—   )rA   r   rA   rA   r   r   r   é   r   r   r   rA   r   r   )r˜   rœ   r   r˜   r@   rC   rD   rE   r@   r™   r˜   r—   )rA   r   rA   rH   r   é   r   r   r   rA   r   r   )	r˜   r@   rC   rD   rE   r@   r™   r˜   r—   )	rA   rA   r   r   r   r   rA   r   r   )
r˜   r@   rC   rI   rD   rE   r@   r™   r˜   r—   )
rA   rA   rA   r   r   r   r   rA   r   r   )	rA   rA   rH   r   r   r   rA   r   r   )	rA   rA   r    r   r   r   rA   r   r   )Úoutput_name_to_node)r@   r™   r˜   r   r—   )rA   rA   r   r   rA   rM   rC   r   rH   zDfuse_rotary_attention: failed to match past/present concat in v pathéÿÿÿÿé	   éüÿÿÿz-fuse_rotary_attention: failed to match v path)ÚSoftmaxr   ÚDivr—   rJ   ©NNz/fuse_rotary_attention: failed to match qk nodes)r   r   rL   rN   )r   r   ÚSubrO   rœ   rC   rC   )rA   r   rH   rA   r   r   r   )r   r©   rO   rœ   rC   rC   )rA   rH   rA   r   r   r   )rœ   r   r   r©   rO   rœ   rC   rC   )rA   r   r   rH   rA   r   r   r   )rœ   r   r©   rO   rœ   rC   rC   )	r   rO   r   rO   r©   rO   rœ   rC   rC   )	rA   r   r   r   r   rA   r   r   r   z;fuse_rotary_attention: failed to match attention mask nodes)r˜   r™   r@   r™   ÚRotaryEmbeddingr—   )r™   rª   r™   r˜   r—   )r™   r@   rª   r™   r˜   r—   )rA   r   rA   r   r   r   )	r™   r˜   rœ   rC   r@   rª   r™   r˜   r—   )r™   r˜   rœ   r   rž   r˜   r@   rC   rD   rE   r@   rª   r™   r˜   r—   )rA   r   r   rA   r   r   r   r   r   r   r   rA   r   r   r   )r™   r˜   rœ   r   rž   rI   rŸ   rE   r˜   r@   rC   rD   rE   r@   rª   r™   r˜   r—   )rA   r   r   rA   r   rA   r   r   r   r   rA   r   r   r   rA   r   r   r   )r™   r˜   rœ   r   rŸ   rE   r˜   r@   rC   rD   rE   r@   rª   r™   r˜   r—   )rA   r   r   rA   rA   r   r   r   r    r   r   r   rA   r   r   r   )r™   r˜   rœ   r   r˜   r@   rC   rD   rE   r@   rª   r™   r˜   r—   )rA   r   r   rA   rH   r   r¡   r   r   r   rA   r   r   r   )r™   r˜   r@   rC   rD   rE   r@   rª   r™   r˜   r—   )rA   r   rA   r   r   r   r   rA   r   r   r   )r™   r˜   r@   rC   rI   rD   rE   r@   rª   r™   r˜   r—   )rA   r   rA   rA   r   r   r   r   rA   r   r   r   )rA   r   rA   rH   r   r   r   rA   r   r   r   )rA   r   rA   r    r   r   r   rA   r   r   r   )	r™   r@   r@   rª   rM   r™   r˜   r   r—   )	rA   r   rA   r   r   r   r   r   rA   zDfuse_rotary_attention: failed to match past/present concat in k pathéþÿÿÿéûÿÿÿéýÿÿÿz.fuse_rotary_attention: failed to match k nodes)r˜   r™   rª   r—   )rª   r™   r˜   r—   )r@   rª   rM   r™   r˜   r   r—   )r   r   r   r   r   r   rA   z.fuse_rotary_attention: failed to match q nodeszKfuse_rotary_attention: failed to find the same root_input for q, k, v pathsr   z;fuse_rotary_attention: failed to verify runtime shape pathsÚ	_output_0rA   c           
         s  ˆ j  | dd¡}|du rt d¡ dS ˆ j  |jd ¡}ˆ j  |jd ¡}|du s-|du r4t d¡ dS |d }|d }|| }ˆ j jd	d
d}ˆ j  |¡du r]ˆ j|t	j
dg|gdd ˆ j jddd}tjd|jd |jd |g|d g|d}	|	j t dd¡g¡ |	S )zþDetect num_heads and hidden_size for ONNX model from phi-2
            Args:
                reshape_q (NodeProto): reshape node for q
            Returns:
                hidden_size_concat_node(NodeProto): Concat node to be used by reshape
            r@   rA   NzEfuse_rotary_attention: failed to trace the concat node from reshape_qrH   r    zMfuse_rotary_attention: failed to get constant nodes of num_heads or head_sizer   ÚInitializerr   ©Úname_prefixF)r.   Ú	data_typeÚdimsÚvalsÚrawÚhidden_size_concatÚoutput_0r+   Úaxis)r   Úmatch_parentr1   r2   Úget_constant_valuer   r3   Úget_initializerÚadd_initializerr   ÚINT64r   r5   r7   r4   r8   )
rƒ   ÚconcatÚnum_head_constant_nodeÚhead_size_constant_nodeÚnum_head_valueÚhead_size_valuer   Úhidden_size_initilizerÚhidden_size_reshape_node_nameÚhidden_size_concat_node©r   r   r   Úcreate_hidden_size_concat_node¥  sB   

ûýø
zBFusionRotaryAttention.fuse.<locals>.create_hidden_size_concat_noder™   r+   Úperm)r   rH   rA   r    z?fuse_rotary_attention: failed to create hidden_size_concat_noder˜   Úconcat_k_halfr°   Úconcat_q_halfzSfuse_rotary_attention: failed to create multi-head attention with rotary embeddingsr    r¡   T)Úop_typer   rQ   r1   r2   Úmatch_parent_paths_allr   r   ÚlenÚreshape_add_qkr   r–   r.   r3   r   r5   r7   r4   r8   Únodes_to_addÚappendÚthis_graph_nameÚnode_name_to_graph_namer?   Únodes_to_removeÚ&add_nodes_to_remove_with_nodes_to_keepÚprune_graph)Wr   Únormalize_nodeÚinput_name_to_nodesr¢   Ú	qkv_nodesÚqkv_nodes_1Úqkv_nodes_2Úqkv_nodes_3ra   rR   rS   Ú
matmul_qkvr‚   r%   r'   Úpast_seq_lenÚv_nodesÚadd_vÚ	v_nodes_1Ú	v_nodes_2Ú	v_nodes_3Ú	v_nodes_4Ú	v_nodes_5rV   r‹   rW   Úmatmul_vrŠ   Útranspose_vr…   Úqk_nodesr#   Ú	matmul_qkr"   Ú
add_qk_strÚattn_mask_nodes_1Úattn_mask_nodes_2Úattn_mask_nodes_3Úattn_mask_nodes_4Úattn_mask_nodes_5Úattn_mask_nodes_6Úattn_mask_nodes_7Úslice_mask_1Úslice_mask_2r$   r&   Úk_nodesÚslice_krÉ   Ú	k_nodes_1Ú	k_nodes_2Ú	k_nodes_3Ú	k_nodes_4Ú	k_nodes_5rU   r   Úrotary_kÚmatmul_krŽ   Úshared_past_seq_lenr„   Úq_nodesÚslice_qrÊ   Ú	q_nodes_1Ú	q_nodes_2Ú	q_nodes_3rT   Úrotary_qÚmatmul_qrƒ   Úroot_outputrÇ   Úk_transpose_node_nameÚk_tranpose_output_nameÚk_transpose_nodeÚq_transpose_node_nameÚq_tranpose_output_nameÚq_transpose_noderÅ   Úconcat_k_reshape_node_nameÚconcat_k_reshape_nodeÚconcat_q_reshape_node_nameÚconcat_q_reshape_nodeÚnew_nodeÚnodes_to_keepÚ	temp_pathr   rÆ   r   ÚfuseE  sx  
ýýý

ýýýþðíïñþóþþ™l’pý
ý









ý

ýýýýýýý








ýýýôîëíïòñòò ì % Ù )ý
ý









ýýý



,

ø

û

  

5
ü
ü

üüõ


,







zFusionRotaryAttention.fuse)r   r   r   r   r   r   N)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r	   Úintr   Ústrr   r9   r?   r   r–   r  Ú__classcell__r   r   r   r   r
      s^    þýüóþýüûúùø	÷
öõôó
ò8 Ir
   c                
       sh   e Zd Zdef‡ fdd„Zdedefdd„Zdefd	d
„Zde	de	de	de	de	f
dd„Z
dd„ Z‡  ZS )ÚFusionRotaryEmbeddingsr   c                    s*   d| _ tƒ  || j | j | j d dg¡ d S )Nrª   z.1r   )Ú	base_namer   r   )r   r   r   r   r   r   T  s   $zFusionRotaryEmbeddings.__init__Úrot_emb_nodeÚfunctionc                    sü   g g }}|j D ],}|jdkr4|jg kr4|jd |jv r4| |¡ t|jƒ |jd ¡}| |j| ¡ qg }|D ]}|jd j}	| j	 
d¡|	_| j	 |	¡ | |	j¡ q9t||ddD ]\‰ }
tt‡ fdd„| j	j	jj ƒƒ}|D ]	}t |ˆ |
¡ qqq\|S )NÚConstantr   F)Ústrictc                    s
   ˆ | j v S ©N)r   )Úentry©Úextra_outputr   r   Ú<lambda>n  s   
 z?FusionRotaryEmbeddings.reassign_extra_outputs.<locals>.<lambda>)ÚnoderË   r   r   rÐ   ÚlistÚindexr7   Útr   r3   r.   r¼   ÚzipÚfilterÚgraphr	   Úreplace_node_input)r   r  r  Úextra_constantsÚextra_outputsÚfn_nodeÚoutput_indexÚextra_initializersÚextra_constantÚconstant_tensorprotoÚextra_initializerÚnodes_to_updateÚnode_to_updater   r"  r   Úreassign_extra_outputs[  s&   

$
€ÿz-FusionRotaryEmbeddings.reassign_extra_outputsr%  c                    s8  | j  | j¡}| j  ˆddgddg¡}|d ur|\}}nt d¡ d S |jd ˆjd g}tt	‡fdd„| j j j
jƒƒ}tt	‡fdd„| j j j
jƒƒ}d	\}	}
t|ƒdkrÃt|ƒdkrÃ| j  |	¡d u rÃ| j  |
¡d u rÃt |d jd j¡ ¡ }t |d jd j¡ ¡ }tj|	tjt|jƒ| ¡  ¡ d
}| j  || j¡ tj|
tjt|jƒ| ¡  ¡ d
}| j  || j¡ | j |d |d g¡ | |	|
g¡ ˆj}t|ƒdkrtt	‡fdd„| j j jƒƒ}t|ƒdksêJ ‚|  ˆ|d ¡‰ tt	‡ fdd„|ƒƒ}t|ƒdksJ ‚tj | j|||dd}d|_!| j "|¡ |S )Nr˜   r—   r   z.fuse_rotary_embeddings: failed to match MatMulrA   c                    ó   | j d ˆ jd kS )Nr   rH   ©r   r   ©Úconstant©r%  r   r   r$  ˆ  ó    zOFusionRotaryEmbeddings.create_rotary_embeddings_from_function.<locals>.<lambda>c                    r8  )Nr   r    r9  r:  r<  r   r   r$  ‰  r=  ©Ú	cos_cacheÚ	sin_cache©r.   r²   r³   r´   c                    s   | j ˆ jkS r   )r.   rË   )Úfnr<  r   r   r$  «  s    c                    s   | ˆ vS r   r   )Úoutput_name)r.  r   r   r$  ®  s    ©r,   r-   r.   Úinterleavedr/   )#r   r3   r  rQ   r1   r2   r   r   r&  r*  r+  r%  rÍ   r»   r   Úto_arrayr7   r(  Úsqueezer   Úmake_tensorr   ÚFLOATÚshapeÚflattenÚtolistr¼   rÑ   rÓ   r4   Ú	functionsr7  r5   r6   rÐ   )r   r%  Úrotary_emb_node_nameÚmatmul_pathÚreshape_nodeÚmatmul_nodeÚrotary_emb_inputsÚcos_cache_nodeÚsin_cache_nodeÚcos_cache_nameÚsin_cache_namer?  r@  Úcos_cache_tensorÚsin_cache_tensorÚrotary_emb_outputsÚfuncÚrotary_emb_noder   )r.  r%  r   Ú&create_rotary_embeddings_from_functiont  sn   ý

þ
ü
üûz=FusionRotaryEmbeddings.create_rotary_embeddings_from_functionrX   Úposition_idsÚ	cos_sliceÚ	sin_slicer   c                    sž  | j  | j¡}tt‡ fdd„| j j jjƒƒ}tt‡fdd„| j j jjƒƒ}d\}	}
t|ƒdkrºt|ƒdkrº| j  |	¡d u rº| j  |
¡d u rºt	 
|d jd j¡ ¡ }t	 
|d jd j¡ ¡ }|jd }|d d …d |d …f }|d d …d |d …f }tj|	tjt|jƒ| ¡  ¡ d}| j  || j¡ tj|
tjt|jƒ| ¡  ¡ d}| j  || j¡ | j |d |d g¡ tj| j|||	|
g|g|dd	}d
|_|S )Nc                    ó   | j d ˆ kS ©Nr   ©r   r:  )r^  r   r   r$  É  ó    zLFusionRotaryEmbeddings.create_rotary_embeddings_from_nodes.<locals>.<lambda>c                    r`  ra  rb  r:  )r_  r   r   r$  Ê  rc  r>  rA   r   rH   rA  rD  r/   )r   r3   r  r&  r*  r+  r%  rÍ   r»   r   rF  r7   r(  rG  rJ  r   rH  r   rI  rK  rL  r¼   rÑ   rÓ   r4   r5   r6   )r   rX   r]  r^  r_  r   rN  rS  rT  rU  rV  r?  r@  Ú	head_sizerW  rX  r[  r   )r^  r_  r   Ú#create_rotary_embeddings_from_nodes¾  sJ   

ü
ü
ûz:FusionRotaryEmbeddings.create_rotary_embeddings_from_nodesc           %         sÖ  | j |jvr|jdkrd S d ‰ |jdkrct|jƒdvs"|jd dvr)t d¡ d S |  |¡‰ ˆ d u r9t d¡ d S | j |¡ t	t
‡ fdd„| jjjjƒƒ}t|ƒdksVJ ‚| jjjj |d	 ¡ np| j |g d
¢g d¢¡}| j |g d¢g d¢¡}|p~|}| j |g d¢g d¢¡}| j |g d¢g d¢¡}	|pš|	}
|d u s£|
d u rªt d¡ d S | j |g d¢g d¢¡}| j |g d¢g d¢¡}|pÅ|}| j |g d¢g d¢¡}| j |g d¢g d¢¡}|pá|}|d u sê|d u rñt d¡ d S |d j|d jks|d j|
d jks|d j|d jks|d j|
d jkr$t d¡ d S | j |ddgd	d	g¡}| j |ddgd	d	g¡}|p@|}|d u rMt d¡ d S d\}}}| j |g d¢g d ¢¡}| j |g d!¢g d"¢¡}| j |g d#¢g d$¢¡}| j |g d%¢g d&¢¡}|d ur‘|}|d' jd	 }nB|d ur |}|d( jd	 }n3|d ur¶|}|d' jd	 }|d) jd }n|d urÌ|}|d( jd	 }|d) jd }nt d*¡ d S d+\}}| j |g d¢g d,¢¡}| j |g d!¢g d-¢¡}| j |g d#¢g d.¢¡}| j |g d%¢g d/¢¡} |d ur|}|d' jd	 }nB|d ur%|}|d( jd	 }n3|d ur;|}|d' jd	 }|d) jd }n| d urQ| }|d( jd	 }|d) jd }nt d*¡ d S |d0kr™| j |d) d1gdg¡}!| j |d) d1gdg¡}"|!d u sŠ|"d u sŠ|!d	 j|"d	 jkr‘t d2¡ d S |"d	 jd	 }ng }!g }"d3\}#}$||kr«||ksµ||krÓ||krÓ|d4 j|d4 jksË|d j|d jkrÒt d5¡ d S no||krÝ||ksç||kr=|| kr=|d j|d jkrùt d6¡ d S | j |d d7d8gdd	g¡}#| j |d g d9¢g d:¢¡}$|#d u s5|$d u s5| j |#d jd	 ¡d u s5|$d jdkr<t d;¡ d S nt d<¡ |  |d jd	 ||||jd	 ¡‰ ˆ d u r_t d¡ d S |  |g¡ |  |d d… ¡ |  |d d… ¡ |  |d d… ¡ |  |
d d… ¡ |  |d d… ¡ |  |¡ |  |¡ |  |!d d… ¡ |  |"d d… ¡ |#d urÅt| j |#d	 ¡ƒdkrÅ|  |#¡ |$d urÓ|  |$d d… ¡ |  | j ¡ | j| jˆ j< | j ˆ ¡ d=| _d S )>Nr   >   r¡   é   rA   >   ÚposÚpos_idÚpos_idsÚposition_idr]  zLfuse_rotary_embeddings: failed to verify inputs for RotaryEmbedding functionz=fuse_rotary_embeddings: failed to create RotaryEmbedding nodec                    s   | j ˆ jd kS ra  )r.   r   r<  ©r[  r   r   r$    s    z-FusionRotaryEmbeddings.fuse.<locals>.<lambda>r   )rI   r@   ÚNegrM   r™   rš   )rI   r@   rl  rM   rM   )	rI   r@   rl  rM   rC   r§   rD   rE   r™   )	rA   r   r   r   rA   r   r   r   r   )	rI   r@   rl  rM   rC   r§   rD   rE   rM   z9fuse_rotary_embeddings: failed to match x2 in rotate_half)rI   r@   rM   r™   )rA   r   rA   r   )rI   r@   rM   rM   )rI   r@   rM   rC   r§   rD   rE   r™   )rA   r   rA   rH   r   r   r   r   )rI   r@   rM   rC   r§   rD   rE   rM   z9fuse_rotary_embeddings: failed to match x1 in rotate_halfr£   zCfuse_rotary_embeddings: failed to match common input in rotate_halfrI   r™   rM   z8fuse_rotary_embeddings: failed to match x in rotate_half)Nr   r   )	rI   rC   rD   ÚSqueezerm  rM   rC   rD   rE   )	rA   rA   r   r   r   r   rH   r   r   )rI   rC   rD   rm  rm  rM   rC   r   )rA   rA   r   r   r   r   rH   r   )rI   rC   rD   rM   rC   rD   rE   )rA   rA   r   r   rH   r   r   )rI   rC   rD   rM   rC   r   )rA   rA   r   r   rH   r   r¥   r­   rH   z>fuse_rotary_embeddings: failed to match sin path in apply_rope)Nr   )	r   rA   r   r   r   r   rH   r   r   )r   rA   r   r   r   r   rH   r   )r   rA   r   r   rH   r   r   )r   rA   r   r   rH   r   r   r˜   zGfuse_rotary_embeddings: failed to match position ids path in apply_roper¨   r«   zdfuse_rotary_embeddings: failed to match common Gather node and Shape node in sin cache and cos cachezRfuse_rotary_embeddings: failed to match common Add node in sin cache and cos cacherD   rE   )rD   rE   r™   rF   zKfuse_rotary_embeddings: failed to match past_seq_len and curr_seq_len pathsz:fuse_rotary_embeddings: failed to match common cache pathsT)r  rË   rÍ   r   r1   r2   r\  rÓ   rÐ   r&  r*  r   r+  Ú
value_infoÚremoverQ   r.   Úfind_graph_inputre  r   Úadd_nodes_to_removeÚget_childrenr:   rÑ   rÒ   rÏ   rÕ   )%r   r%  r×   r¢   Úold_shape_inferÚrotate_half_x2_path_1_1Úrotate_half_x2_path_1_2Úrotate_half_x2_path_1Úrotate_half_x2_path_2_1Úrotate_half_x2_path_2_2Úrotate_half_x2_path_2Úrotate_half_x1_path_1_1Úrotate_half_x1_path_1_2Úrotate_half_x1_path_1Úrotate_half_x1_path_2_1Úrotate_half_x1_path_2_2Úrotate_half_x1_path_2Úx_path_1Úx_path_2Úx_pathÚsin_pathr@  r]  Ú
sin_path_1Ú
sin_path_2Ú
sin_path_3Ú
sin_path_4Úcos_pathr?  Ú
cos_path_1Ú
cos_path_2Ú
cos_path_3Ú
cos_path_4Úposition_ids_from_sin_pathÚposition_ids_from_cos_pathÚpast_seq_len_pathÚcurr_seq_len_pathr   rk  r   r  ö  sà  



ÿýýýý
ýýýý

ýý



ýýýý




ýýýý





ýý


,ÿü
ýý


ý
û



$


zFusionRotaryEmbeddings.fuse)r  r  r  r	   r   r   r   r7  r\  r  re  r  r  r   r   r   r   r  S  s     Jþýüû
ú8r  )ÚloggingÚfusion_attentionr   Úfusion_baser   Úonnxr   r   r   r   r   Ú
onnx_modelr	   Ú	getLoggerr  r1   r
   r  r   r   r   r   Ú<module>   s    
        L