o
    )wir                     @   sZ   d dl Z d dlZd dlmZmZ d dlmZmZ d dl	m
Z
 e eZG dd deZdS )    N)AttentionMaskFusionAttention)TensorProtohelper)	OnnxModelc                       sJ   e Zd ZdZdedededef fddZdd	 Zd
d Z	dd Z
  ZS )FusionBartAttentionz?
    Fuse Bart Attention subgraph into one Attention node.
    modelhidden_size	num_headsattention_maskc                    s   t  |||| d S )N)super__init__)selfr   r	   r
   r   	__class__ k/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/onnxruntime/transformers/fusion_bart_attention.pyr      s   zFusionBartAttention.__init__c                 C   s  | j |dgdg}|d u rdS |d }| j |g dg d}	| j |g dg d}
|	d u s4|
d u r6dS |	\}}}|
\}}}|jd |ksN|jd |krPdS | j |g dg d}| j |g dg d	}|d u sp|d u rrdS |d
 j|jks|d
 j|jkrdS | j |g dg d}| j |g dg d}| j |g dg d}|d u s|d u s|d u rdS |d
 }|d
 }|d
 }|jd }|jd |ks|jd |ks|jd |krdS dS )NConcat   Fr   	UnsqueezeGatherShaper   r   r   )r   r   r   )r   r   r   )r      r   )r   r   MulT)r   match_parent_pathinputnameoutput)r   reshape_qkv_2reshape_qkv_1reshape_q_2reshape_k_2reshape_v_2
root_inputconcat_qkv_2_pathconcat_qkv_2reshape_qkv_2_path_1reshape_qkv_2_path_2_gather_1shape_1gather_2shape_2reshape_qkv_1_path_1reshape_qkv_1_path_2reshape_q_2_pathreshape_k_2_pathreshape_v_2_pathmul_qmul_kmul_vgather_1_outr   r   r   check_runtime_shape_path   s<   	

 
*z,FusionBartAttention.check_runtime_shape_pathc                 C   s  | j |g dg d}|d u s|d jd |jd krdS | j |g dg d}| j |g dg d}|d u s>|d u r@dS |d }	|d }
|	jd	 |
jd	 krTdS |d jd |jd krn|d jd |jd krndS |d ur| j |d
gd	g}|d u rdS | j |d g dg d}| j |d g dg d}|d u r|d u rdS |\}}}}|\}}}|jd |jd krdS |d jd |jd kr|d jd |jd krdS dS )N)r   Slicer   	Transposer   r   r   r   r   r   F)r   PowCastDivr   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r:   )r:   r   r   r   )r   r   r   r   r   )r   r   r   T)r   r   r   r    )r   r!   
matmul_qkvadd_qk	matmul_qkadd_qreshape_qkv_pathmatmul_qk_path_1matmul_qk_path_2mul_1mul_2add_qk_pathslice_q_path_1slice_q_path_2r+   unsqueeze_1unsqueeze_2r   r   r   check_runtime_shape_path_openaiM   sH    0
0z3FusionBartAttention.check_runtime_shape_path_openaic           c      C   s  d}| j |g dg d}| j |g dg d}|d ur'|\}}}	}
}}n|d ur7|}|\}}}	}
}d}nd S g }|jD ]}||vrEq>||d jd krOq>|| q>t|dkr]d S |d }	 || }|jd	krs| j |d }|jD ]}|s{qv|| }d
d |D }|ddkr|} nqvdd | j 	 jD }dd | j 	 jD }| j |g dg d}| j |g dg d}| j |g dg d}| j |dgdg}| j |g dg d}d\}}d\}}|d ur|\}}} }}!|jd }n|d ura|}|\}} }}!| j j
|ddg| gd}"| j 
|g d}#|"d ur4|"\}$}%|%jd |v r4|%jd }|#d ur`|#\}&}$}'|'jd |v rK|'jd }| j |&ddgddg}(|(\}$})|)jd }n|d ur{|\}}&}} }}!|}|&jd }|&jd }n|d ur|d jd |v r|}|d jd }|d jd }||vrttdd  | j  | }*t|*dkr|*d jd nd!}nJ|d ur|d jd |v r|}|d jd }|d jd }||vr ttd"d  | j  | }*t|*dkr|*d jd nd!}ntd# d S ||v r|nd!}||v r|nd!}| j |d$dgddg}+| j |g d%g d&},| j |g d'g d(}-d }.|+d urL|+\}$}/|+}0n|,d ur[|,\}$}$}.}$}/|,}0n|-d urh|-\}$}.}/|-}0nd S | j |/g d)g d*}1| j |/g d+g d,}2d }3|1d ur|1\}3}4}5}6}7}8n|2d ur|2}1|1\}6}4}5}7}8nd S | j |/g d-g d.}9| j |/g d/g d}:| j |/g d0g d1};| j |/g d2g d3}<| j |/ddgddg}=| j |/g d4g d1}>d\}?}@d5\}A}B}C|9d ur|9\}$}A}D}B}E}C|9}Fn |:d uro|:\}G}D}B}C|:}F|Cjd }@| j j
|Cddg|Bgd}"| j 
|Cg d}#|"d urB|"\}$}H|Hjd |v rB|Hjd }@|#d urn|#\}I}$}J|Jjd |v rY|Jjd }@| j |Iddgddg}(|(\}$}K|Kjd }?n|;d ur|;\}$}A}D}B}C|;}F|Djd }@n|<d ur|<\}$}A}I}$}B}C|<}F|Ijd }?|Ijd }@n|=d ur|=d jd |v r|=}F|Fd jd }?|Fd jd }@|@|vrttd6d  | j  |? }Lt|Ldkr|Ld jd nd!}@nE|>d ur#|>d jd |v r#|>}F|Fd jd }?|Fd jd }@|@|vr"ttd7d  | j  |? }Lt|Ldkr |Ld jd nd!}@nd S |?|v r,|?nd!}?|@|v r5|@nd!}@|F|:|;|<fv r| j |jd jd }Md8}N| j |N}O|Od u rl| j|Ntj|Mgtjd9g|M tjd:d; | j d	}Ptd	|N|Cjd g|Bjg|P}E|rt|?s| |	||.|/|7sd S |st|?s| |	||3|A||sd S t|?ot|o|Cd u od<t v}Q|Q o|Cjd |ko|8jd |ko|!jd |k}R|Q o|8jd |ko|Cjd |!jd ko|Cjd |8jd k}S|Ro |0|+k}T|Ro	|0|,|-fv }U|s|Tn|Uot|?ot|}V|So!|0|+k}W|Qo(|0|+k}Xd g }Y}Z|Urd| j |.d=gdg}[| j |.g d>g d}\|\d urV|\d jd }Y|\}Zn|[d urd|[d jd }Y|[}Z|Tss|Uss|Vss|Wss|Xr|	}]|  |5\}^}_|^dks|_dks|_|^ dkrtd? d S d }`|Vs|Ws|Xr| j!r| j"|8|Ws|Vr|Cn|?|Ws|Vr|!n||7|Ws|Vr|End |Ws|Vr|nd |^|_|]jd |V|Vr|?nd!|Vr|nd!|@||Vd@nd }`nX| j!}ad| _!|Ur|Yr|Ynd!}b| j#dRi dAd dB|8dC|CdD|!dE|7dF|EdG|dH|^dI|_dJ|dK|]jd dLt|Zdkr)d n|bdM|?dN|dO|@dP|dQ|U}`|a| _!|`d u rFd S | j$|` | j%| j&|`j< | j'(|]|
|g | j'(|0 |Vsk|Wsk|Xr|1d jdkrw|1)  |Fd jdkr|F)  |d jdkr|)  | j*r|Ws|Xr|1d jd	kr|1)  |Fd jd	kr|F)  |d jd	kr|)  | j'(|1 | j'(|F | j'(| d| _+d S d S )SNF)AddMatMulReshaper;   rR   rQ   r@   )rP   rQ   rR   r;   rQ   )r   r   r   r   r   Tr   r   rP   c                 S   s   g | ]}|j qS r   op_type).0childr   r   r   
<listcomp>       z,FusionBartAttention.fuse.<locals>.<listcomp>rQ   c                 S      h | ]}|j qS r   r   rU   noder   r   r   	<setcomp>   rX   z+FusionBartAttention.fuse.<locals>.<setcomp>c                 S   rY   r   rZ   r[   r   r   r   r]      rX   )rR   r;   rR   rP   rQ   )r   r   r   r   N)r;   rR   rP   rQ   )r   r   r   N)rR   r   r;   rR   rP   rQ   )r   r   r   r   r   NrR   )r;   rR   rR   r;   r<   ) r^   )NNr;   )exclude)r   rR   r;   r   c                 S   
   | j dkS NIdentityrS   r\   r   r   r   <lambda>     
 z*FusionBartAttention.fuse.<locals>.<lambda>r^   c                 S   r`   ra   rS   rc   r   r   r   rd   )  re   z&fuse_attention: failed to match v pathSoftmax)rf   rR   rP   rR   rQ   )r   r   r   r   r   )rf   rP   rQ   r   )rR   r;   rR   r   rP   rQ   )r   r   r   r   r   r   )r   r;   rR   rP   rQ   )r   r   r   r   r   )r;   rR   r;   rR   rP   rQ   )r   r   r   r   r   r   )r   r;   rR   rQ   )r;   rR   r;   rR   rQ   )r   r   r   r   r   )r;   rR   r   r;   rR   rQ   )r   r   r   r   r   r   )r   r;   rR   rR   r;   )NNNc                 S   r`   ra   rS   rc   r   r   r   rd     re   c                 S   r`   ra   rS   rc   r   r   r   rd     re   
empty_biasg        )dtype)dimsvalsmatmul_vWhere)Expandr   r   rl   z9fuse_attention: failed to detect num_heads or hidden_size)q_matmulk_matmulv_matmulq_addk_addv_addr
   r	   r    unidirectionalpast_kpast_v	present_k	present_v
packed_qkv
mask_indexrn   ro   rp   rq   rr   rs   r
   r	   first_inputr    
add_qk_strru   rv   rw   rx   causalr   ),r   r   r   r    appendlenrT   get_childrencountgraphmatch_child_pathlistfilterinput_name_to_nodesloggerdebugget_initializerri   add_initializerr   FLOATnparrayfloat32create_node_namer   	make_noder   boolrO   r9   localsget_num_heads_and_hidden_sizeuse_multi_head_attentioncreate_multihead_attention_nodecreate_attention_nodenodes_to_addthis_graph_namenode_name_to_graph_namenodes_to_removeextendpop!disable_multi_head_attention_biasprune_graph)cr   normalize_noder   output_name_to_nodemodel_impl_openai	qkv_nodesqkv_nodes_openaiadd_out
matmul_outr!   transpose_qkvr"   rA   other_inputsr   r&   skip_layernormr    childrenchildren_typesgraph_input_namesgraph_output_namesv_nodesv_nodes_openaiv_nodes_with_past_self_attnv_nodes_with_past_cross_attn#v_nodes_with_past_cross_attn_openairv   rx   r%   add_vtranspose_vreshape_v_1rk   reshape_pathconcat_pathr+   transpose_add_vconcat_vtranspose_concat_vconcat_nodestranspose_concat_v_inidentity_node_v
qk_nodes_1
qk_nodes_2qk_nodes_2_openairB   rC   qk_nodesq_nodesq_nodes_openair#   transpose_qreshape_q_1r5   rD   matmul_qk_nodes_with_biask_nodes_no_bias_openaik_nodes_no_bias#k_nodes_no_bias_with_past_self_attn$k_nodes_no_bias_with_past_cross_attn+k_nodes_no_bias_with_past_cross_attn_openairu   rw   r$   reshape_k_1matmul_ktranspose_k_1add_kk_nodesr6   transpose_matmul_kconcat_ktranspose_concat_ktranspose_concat_k_inidentity_node_kbias_dimempty_bias_nameempty_tensoradd_namethree_root_inputsone_root_inputtwo_root_inputsencoder_attentiondecoder_attentiondecoder_attention_with_pastdecoder_cross_attention!decoder_cross_attention_with_pastrz   
mask_nodesmask_nodes_bartmask_nodes_whisperattention_last_noder
   r	   new_node%use_multi_head_attention_ground_truthr|   r   r   r   fuse   s|  



	








 

 





















 

 
( 


"

	



zFusionBartAttention.fuse)__name__
__module____qualname____doc__r   intr   r   r9   rO   r   __classcell__r   r   r   r   r      s    	02r   )loggingnumpyr   fusion_attentionr   r   onnxr   r   
onnx_modelr   	getLoggerr   r   r   r   r   r   r   <module>   s   
