o
    ڷiM                     @   sV   d dl Z d dlZd dlmZmZ d dlmZ d dlm	Z	 e 
eZG dd deZdS )    N)AttentionMaskFusionAttention)helper)	OnnxModelc                       s:   e Zd ZdZdedededef fddZdd	 Z  Z	S )
FusionBartAttentionz?
    Fuse Bart Attention subgraph into one Attention node.
    modelhidden_size	num_headsattention_maskc                    s   t  |||| d S )N)super__init__)selfr   r   r	   r
   	__class__ d/home/ubuntu/vllm_env/lib/python3.10/site-packages/onnxruntime/transformers/fusion_bart_attention.pyr      s   zFusionBartAttention.__init__c           Y      C   s  | j |g dg d}|d ur|\}}}}}	ntd d S g }
|jD ]}||vr+q$||d jd kr5q$|
| q$t|
dkrCd S |
d }	 || }|jdv rY| j 	|d }|jD ]}|saq\|| }dd |D }|
d	dkrw|} nq\d
d | j  jD }dd | j  jD }| j |	g dg d}| j |	g dg d}| j |	g dg d}d\}}g d d }}}|d ur%|}|\}}}}||jd  }|D ]'}|jdkr| j |ddgddg}|d ur|d jd }||jd  } nq|D ]%}||jd  } | D ]}!|!jd |v r|!jd } nq	|dkr# nqn=|d ur>|}|\}"}}}}|"jd }|"jd }n$|	jd |v rL|	jd }n|d ur[|}|d jd }ntd d S ||v ri|nd}||v rr|nd}| j |	dd	gddg}#| j |	g dg d}$g d }%}&|#d ur|#\}'}(|#}%n|$d ur|$\}'}&}(|$}%ntd d S | j |(g dg d})| j |(g d g d}*g }+|)d ur|)}+|+\},}-}.}/}0n|*d ur|*}+|+\}.},}-}/}0ntd! d S | j |(g d"g d#}1| j |(g d$g d%}2| j |(g d&g d}3| j |(g d'g d(}4d\}5}6g d d }7}8}9|1d urU|1}7|7\}:};}9||;jd  }<|<D ]}=|=jd |v rR|=jd }6 nqAn|2d urn|2}7|7\}'}>}:};}9|>jd }5|>jd }6n||(jd  jd |v r||(jd  g}7|7d jd }5n|3d ur|3}7|7\}'}:};}9||9jd  }|D ]*}|jdkr| j |ddgddg}?|?d ur|?d jd }5||jd  } nq|D ]&}||jd  } | D ]}!|!jd |v r|!jd }6 nq|6dkr nqn|4d ur|4}7|7d jd }5ntd) d S |5|v r|5nd}5|6|v r|6nd}6|9d urp|8d u rp| j |jd }@|@jd }A|@j}Bd*}C| j |C}D|Dd u r[| j|C|B|Agtjd+g|A t|Bd,d- | j d.}Etd.|C|9jd g|;jg|E}8t|5ot|o|9d u o|d u }F|F o|0jd |ko|9jd |ko|jd |k}G|F o|0jd |ko|9jd |jd ko|9jd |0jd k}H|Go|%|#k}I|Go|%|$k}J|Ho|%|#k}K|Jot|5ot|}L|Fo|%|#k}M|%|$k}Ng }O|NrN| j |&d/gdg}P| j |&g d0g d1}Q| j |&g d2g d3}R| j |&d4d4gddg}S|Qd ur&|Q}On|Rd ur.|R}On|Sd ur6|S}On|Pd ur>|P}Ontd5 d S t|OdksNJ |Is]|Js]|Ks]|Ls]|Mr|}T| |-\}U}V|Udksw|Vdksw|V|U dkr~td6 d S d }W|Ls|Ks|Mr| jr| j|0|Ks|Lr|9n|5|Ks|Lr|n||/|Ks|Lr|8nd |Ks|Lr|nd |U|V|Tjd |N|Lr|5nd|Lr|nd|6|d7nd }WnB| j}Xd8| _| jdKi d9d d:|0d;|9d<|d=|/d>|8d?|d@|UdA|VdB|dC|Tjd dD|NdE|5dF|dG|6dH|}W|X| _|Wd u rtdI d S | j|W | j| j|Wj< | j |T||	g | j |% |LsC|KsC|Mrt|+dkrV|+d jd	krV|+!  t|7dkri|7d jd	kri|7!  t|dkr||d jd	kr||!  | j"rt|+dkr|+d jd.kr|+!  t|7dkr|7d jd.kr|7!  t|dkr|d jd.kr|!  | j |+ | j |7 | j | dJ| _#d S d S )LN)AddMatMulReshape	Transposer   )   r   r   r   r   z(fuse_attention: failed to match qkv pathr   r   >   r   Clipc                 S   s   g | ]}|j qS r   )op_type).0childr   r   r   
<listcomp>O       z,FusionBartAttention.fuse.<locals>.<listcomp>r   c                 S      h | ]}|j qS r   namer   noder   r   r   	<setcomp>T   r   z+FusionBartAttention.fuse.<locals>.<setcomp>c                 S   r   r   r   r    r   r   r   r"   U   r   )r   r   r   r   )r   r   r   N)Concatr   r   r   r   )r   r   r   r   N)r   r   r   r   )r   r   r   r   ) r$   r#   r   r   r$   z&fuse_attention: failed to match v pathSoftmax)r&   r   r   )r   r   r   z'fuse_attention: failed to match qk path)r   r   Mulr   r   )r   r   r   r   r   )r'   r   r   r   r   z&fuse_attention: failed to match q path)r   r   r   )r   r   r   )r   r#   r   r   r   )r   r   r   r   r   )r'   r   r   r   )r'   r   r   r   r   )r   r   r   r   r   z&fuse_attention: failed to match k path
empty_biasg        )dtype)dimsvalsr   Where)SliceExpandr,   )r   r   r   )r-   	UnsqueezeGatherShaper   )r      r   r   r   r-   z*fuse_attention: failed to match mask nodesz9fuse_attention: failed to detect num_heads or hidden_size)q_matmulk_matmulv_matmulq_addk_addv_addr	   r   outputunidirectionalpast_kpast_v	present_k	present_vF
mask_indexr3   r4   r5   r6   r7   r8   r	   r   first_inputr9   causalr;   r<   r=   r>   z+fuse_attention: failed to create fused nodeTr   )$r   match_parent_pathloggerdebuginputr9   appendlenr   get_childrencountgraphget_initializerr*   	data_typeadd_initializernparrayr   tensor_dtype_to_np_dtypecreate_node_name	make_noder   boolget_num_heads_and_hidden_sizeuse_multi_head_attentioncreate_multihead_attention_nodecreate_attention_nodenodes_to_addthis_graph_namenode_name_to_graph_namenodes_to_removeextendpop!disable_multi_head_attention_biasprune_graph)Yr   normalize_nodeinput_name_to_nodesoutput_name_to_node	qkv_nodesadd_out
matmul_outreshape_qkvtranspose_qkv
matmul_qkvother_inputsinput_
root_inputskip_layernormr9   childrenchildren_typesgraph_input_namesgraph_output_namesv_nodes_past_or_presentv_nodes_with_pastv_nodes_past_only_oair<   r>   v_nodesadd_vmatmul_vtranspose_v	reshape_vstart_child_nodesstart_child_nodeconcat_v_nodesstart_grandchild_nodesstart_grandchild_nodeconcat_vqk_nodes_no_maskqk_nodes_with_maskqk_nodesadd_qk_	matmul_qk
q_nodes_hfq_nodes_oaiq_nodestranspose_q	reshape_qmul_qadd_qmatmul_qk_nodes_no_past_hfk_nodes_with_past_hfk_nodes_past_or_present_oaik_nodes_past_only_oair;   r=   k_nodesadd_kmatmul_ktranspose_k	reshape_ktranspose_k_nodestranspose_k_nodeconcat_kconcat_k_nodesadd_v_tensorbias_dimr)   empty_bias_nameempty_tensoradd_namethree_root_inputsone_root_inputtwo_root_inputsencoder_attentiondecoder_self_attentiondecoder_cross_attention decoder_self_attention_with_past!decoder_cross_attention_with_pastcausal_mask
mask_nodesmask_nodes_bartmask_nodes_whisper_hfmask_nodes_whisper_oai mask_nodes_whisper_oai_unit_testattention_last_noder	   r   new_node%use_multi_head_attention_ground_truthr   r   r   fuse   s  
































&




"

	




zFusionBartAttention.fuse)
__name__
__module____qualname____doc__r   intr   r   r   __classcell__r   r   r   r   r      s    	r   )loggingnumpyrN   fusion_attentionr   r   onnxr   
onnx_modelr   	getLoggerr   rC   r   r   r   r   r   <module>   s   
