o
    )wi                     @   s   d dl Z d dlZd dlmZmZ d dlmZ d dlm	Z	m
Z
 d dlmZ d dlmZmZmZ d dlmZ d dlmZ e eZG d	d
 d
eZG dd deZG dd deZdS )    N)AttentionMaskFusionAttention)Fusion)"FusionSimplifiedLayerNormalization&FusionSkipSimplifiedLayerNormalization)NumpyHelper)	NodeProtoTensorProtohelper)	OnnxModel)BertOnnxModelc                       s   e Zd ZdZdedededef fddZded	B d
e	de	de	dededededed	B de
de	d	B fddZdedededed	B ded	B ded	B ded	B deded	B ded	B dedede	d	B fddZdd Zdd  Zd!d" Z  ZS )#FusionT5Attentionz=
    Fuse T5 Attention subgraph into one Attention node.
    modelhidden_size	num_headsattention_maskc                    s$   t  j||||ddgd d| _d S )NFSoftmax)use_multi_head_attentionsearch_op_types   )super__init__	static_kv)selfr   r   r   r   	__class__ c/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/onnxruntime/transformers/onnx_model_t5.pyr      s   
zFusionT5Attention.__init__
mask_indexNq_matmulk_matmulv_matmulinputoutput	attn_biasscalereturnc                 C   s  |dksJ |dkr|| dkrt d| d|  dS | j|jd }| j|jd }| j|jd }|du sD|du sD|du r^|du rJ|n|du rP|n|}t|jd  d dS t|}t|}t|}|j|jksuJ |jd }|jd }|jd }||  kr|ksJ  J |dkr||krt 	d| d| d	 t
|jdd }t
j|||fdd
}d| }| jd}tj|d tj||g| dd}| j|| j ||d dg}|r|| n|d |	r|d ||	 |r|d dkr|  |r|d dkstjd||g|d}d|_|jtd|g |
dur=|jtd|
g | jdurQ|jtdt| jg |S )a  Create an Attention node.
        Args:
            mask_index (str): mask input
            q_matmul (NodeProto): MatMul node in fully connection for Q
            k_matmul (NodeProto): MatMul node in fully connection for K
            v_matmul (NodeProto): MatMul node in fully connection for V
            num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.
            hidden_size (int): hidden dimension. If a model is pruned, it is the hidden dimension after pruning.
            input (str): input name
            output (str): output name
        Returns:
            Union[NodeProto, None]: the node created or None if failed.
        r   input hidden size # is not a multiple of num of heads Nr   zl is not an initializer. Please set do_constant_folding=True in torch.onnx.export to unblock attention fusionzInput hidden size (z3) is not same as weight matrix dimension of q,k,v (z:). Please provide a correct input hidden size or pass in 0)axis   	Attention_qkv_weightTname	data_typedimsvalsraw inputsoutputsr.   com.microsoftr   r%   mask_filter_value)loggerdebugr   get_initializerr"   printr   to_arrayshapewarningnpprodstackcreate_node_namer
   make_tensorr	   FLOATtobytesadd_initializerthis_graph_nameappendpop	make_nodedomain	attributeextendmake_attributer9   float)r   r   r   r    r!   r   r   r"   r#   r$   r%   q_weightk_weightv_weightmatmulqwkwvw
qw_in_size
kw_in_size
vw_in_sizeqw_out_size
qkv_weightqkv_weight_dimattention_node_nameweightattention_inputsattention_noder   r   r   make_attention_node)   s~   









z%FusionT5Attention.make_attention_nodequerykeyvaluepast_key
past_valuepresent_keypresent_valuec                 C   s  |dkr|dkr|r|r|sJ || dkr#t d| d|  d S | jd}|||dg}|r7|| n|d |rD|| n|d |rY|sOJ || || |rm|d dkrm|  |rm|d dksa|g}|	r|
svJ ||	 ||
 td|d|d	| tjd|||d
}d|_	|j
td|g |j
tddg | jd ur|j
tdt| jg | d |S )Nr   r'   r(   MultiHeadAttentionr3   r4   zattention_inputs=z, attention_outputs=z, attention_node_name=r5   r8   r   r%         ?r9   )r:   r;   r   rD   rJ   rK   r=   r
   rL   rM   rN   rO   rP   r9   rQ   increase_counter)r   rd   re   rf   r   r$   rg   rh   r#   ri   rj   r   r   r_   ra   attention_outputsrb   r   r   r   create_mha_node   sT    







z!FusionT5Attention.create_mha_nodec                 C   s$   |  |||r	d S | ||| d S N)fuse_t5_encoderfuse_t5_decoder)r   nodeinput_name_to_nodesoutput_name_to_noder   r   r   fuse   s   zFusionT5Attention.fusec                     s  |j dksJ | jj|g dg d|d}|d u rdS |\}}}| j|g dg d|}|d u r3dS |d }	| j|g d	g d
|}
|
d u rJdS |
\}}}| j|g dg d|}|d u rbdS |\}}}| j|g dg d|}|d u }|d ur|d }n(| j|g dg d|}|d u r| j|g dg d|}|d u rdS |d }| j|\}}|d u rdS |dkrt|| _| j|d g dg d| | j|d g dg d|} d urt fdd| j j	D r|d ur d j	d |d j	d krt
 d j	dkrd}n| j|d j	d }d }| j|ddgddg}|d u rA|rA| j|g dg d
}|d u rHdS |d jd }| j|g d	g d
}|d u rbdS |\}}}| j|g d	g d}|d u rzdS |\}}}|j	d |	j	d krdS | |\}}| j|||||||	j	d |jd |d d!
}|d u rdS | j| | j| j|j< | j| d"| _d"S )#Nr   MatMul	TransposeReshaper   r   r|   r|   edgesrt   FConcat	UnsqueezeGatherShaper   r   r   r   r4   ry   rz   rx   r   r   r   r   Addrx   r   r   r   r   MulSubCastr   r   r   r   r   r   r   r   r   )r   Slicer   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r      )ConstantOfShaper   r   r   r   r   r   r   r   r   )r   r   r   r   r   c                 3   s$    | ]}|j  d  jd kV  qdS )r4   r   N)r.   r"   .0r"   mask_nodes_2r   r   	<genexpr>F  s   " z4FusionT5Attention.fuse_t5_encoder.<locals>.<genexpr>r   r3   r   RelativePositionBias)r   r   r   rl   )r   r   r"   r#   r$   r%   T)op_typer   match_child_pathmatch_parent_pathget_constant_inputrQ   r9   anygraphr"   lenr   process_maskr#   get_num_heads_and_hidden_sizerc   nodes_to_addrJ   rI   node_name_to_graph_namer.   nodes_to_removeprune_graph) r   softmax_nodert   ru   	qkv_nodes
matmul_qkv_reshape_qkvqkv_shape_nodesinput_shape_nodev_nodes	reshape_vmatmul_vqk_nodesadd_qk	matmul_qk
mask_nodesis_pattern_for_one_graph_inputmul_nodemul_valmask_nodes_3r   res_pos_bias	rpb_nodesk_nodesmatmul_kq_nodes	reshape_qmatmul_qq_num_headsq_hidden_sizenew_noder   r   r   rq      s  












z!FusionT5Attention.fuse_t5_encoderc           0      C   s  |j dksJ | jj|g dg d|d}|d u rd S |\}}}| j|g dg d}|d u r2d S |d }	d }
d }d }| j|g dg d	}|d u r| j|g d
g d}|d ur|\}}}|jd }
|jd }d|vrqd S |jd |	jd krd| _nCd| _n?|jd }||v rd S d|vrd S d| _n*|\}}}}|jd }||v rd S d|vrd S |jd }d|vrd S |jd }
d| _| j|g dg d}|d u rd S |\}}}d }d }| jdkr+| j|g dg d}|d ur|d }n| j|g dg d}|d u rd S |d }| j|\}}|dkr|| _| j	
|d jd }n*| j|ddgddgfddgddgfg|\}}}|dk rPtd d S |jd }d }d }d }| jdkr| j|g d
g d}|d ur|\} }!}|!jd }||!jd  }"|"D ]}#| j|#jd }$|$d ur|$j} nq|d u rd S d|vrd S n| j|dgdg}|d u rd S |d } | jd }||v rd S d |vrd S n| j|g d!g d"fg d#g d$fg|\}%}}d }&d }"|d urd|d |d% }'}!|!jd }|%dkr||'jd  }&|&jd }n|'jd }||v r#d S d&|vr*d S |%dkrP||'jd  }"|"D ]}#| j|#jd }$|$d urM|$j} nq8n|'jd }|d u r\d S d'|vrcd S nK| j|g d
g d}|d u rwd S |\}}!}|!jd }||!jd  }"|"D ]}#| j|#jd }$|$d ur|$j} nq|d u rd S d'|vrd S | j|g d
g d}(|(d u rd S |(\})}*}+|+jd |	jd krd S | |*\},}-| jdkr|d ur|}|}
d }d }|r|
r|,dkr|-dks d S | j|+jd ||
|||||jd |||,|-d(}.|.rn| j|. | j| j|.j< |s-|rc||fD ]1}/|/r=| j|/sHtd)|/d*  d S |/|v sOJ |/d+ ||/ jd< | j|/|/d+  q1| j| d,| _d S d S )-Nr   rw   r{   r}   r   r   r4   )r   ry   rz   rx   )r   r   r   r   r   r   r   rj   r   past_value_crosspast_value_selfpresent_value_selfr   r   r   r   r   r   r   r   r   r   r   zGSkip MultiHeadAttention fusion since attention bias pattern not matchedpresent_key_crossry   past_key_cross)ry   r   rz   rx   )r   r   r   r   )ry   r   ry   rz   rx   )r   r   r   r   r   past_key_selfpresent_key_self)rd   re   rf   r   r$   rg   rh   r#   ri   rj   r   r   zgraph_output=z does not exist in graph output_copyF)r   r   r   r   r"   r#   r   r   r9   r   r   match_parent_pathsr:   r;   find_graph_outputr.   r   ro   r   rJ   rI   r   r=   replace_input_of_all_nodesr   r   )0r   r   rt   ru   r   r   _transpose_qkvr   r   r   rf   rh   rj   r   transpose_vr   r   concat_vr   r   r   r   r   r   r   r   r   matched_path_indexre   rg   ri   r   transpose_k	reshape_kpresent_key_transpose_nodespresent_key_transpose_nodepresent_key_candidateidxpast_key_transpose_nodeconcat_kr   transpose_qr   r   r   r   r   graph_outputr   r   r   rr     s  













































 
z!FusionT5Attention.fuse_t5_decoder)__name__
__module____qualname____doc__r   intr   r   strr   rQ   rc   ro   rv   rq   rr   __classcell__r   r   r   r   r      s    	

q	

F ,r   c                       s*   e Zd Zdef fddZdd Z  ZS )FusionRelativePositionBiasBlockr   c                    s   t  |ddg d S )Nr   r   )r   r   )r   r   r   r   r   r     s   z(FusionRelativePositionBiasBlock.__init__c                 C   sx  | j |g dg d|}|d u r$| j |g dg d|}|d u r$d S |d }|d }|d }|d }|| jv r;d S | j |g d	g d
|}	|	d u rNd S | j |	d jd }
|	d }| j |g dg d|}d}|d u r| j |g dg d|}d}|d u rd S |d }ttt|
d|rdnd  }|dkrt	
d| d | j jdd|rdnd d}| j |jd }|d u rd S t|}t|}tj|d tjt|d t|d g| dd}| j || j |j|jd |jd g}|d  }||jd< tjd||g|d!}d"|_|jtd#|g |jtd$|g | j| j|j< | j | d| _!d S )%N)r   r   r   r   ry   r   Where)r   r   r   r   r   r   r   )r   r   r   r   ry   r   r   r   )r   r   r   r   r   r   r   r      r4   r   r*   )	Minr   r   r   r   r   DivLogr   )	r   r   r   r   r   r   r   r   r   r   )r   Negr   r   r   r   r   Range)r   r   r   r   r   r   r   r   F)r   Absr   r   r   r   T          zmax_distance is z], which is different from the default value 128. Please double check the model configuration.r   RelPosBias_encoderdecodername_prefixr   _bias_table_weightr-   _rel_pos_biasr5   r8   max_distanceis_bidirectional)"r   r   r   get_constant_valuer"   r   rA   roundexpr:   r@   rD   r<   r   r>   	transposer
   rE   r	   rF   r?   rG   rH   rI   r.   rL   rM   rN   rO   rP   r   r   rJ   r   )r   rs   rt   ru   compute_bias_nodesgatherwhereslice	unsqueezecompute_buckets_nodeslog_maxdivrange_nodesr   
range_noder   	node_nametable_weight_itable_weighttable_weight_t
bias_tabler6   bias_outputrpb_noder   r   r   rv     s   
$	


(

z$FusionRelativePositionBiasBlock.fuse)r   r   r   r   r   rv   r   r   r   r   r   r     s    r   c                       sj   e Zd Zddedef fddZdd Zdd	 ZdddZdd Zdd Z	dd Z
dd Zdd Z  ZS )T5OnnxModelr   r   r   c                    sz   t  ||| t| | _t| jjjdkr!ddlm	} |j
| j_t| | j| j| j| _t| | _t| | _t| | _d S )Nr   r   )AttentionMaskFormat)r   r   r   r   r   r   r   r"   fusion_optionsr  NoMaskmask_formatr   r   r   attention_fusionr   layer_norm_fusionr   skip_layer_norm_fusionr   
rpb_fusion)r   r   r   r   r  r   r   r   r   7  s   



zT5OnnxModel.__init__c                 C      | j   d S rp   )r  applyr   r   r   r   fuse_attentionF     zT5OnnxModel.fuse_attentionc                 C   r  rp   )r  r  r  r   r   r   fuse_layer_normI  r  zT5OnnxModel.fuse_layer_normTc                 C   r  rp   )r  r  )r   shape_inferr   r   r   fuse_skip_layer_normL  r  z T5OnnxModel.fuse_skip_layer_normc              	   C   s  |   D ]}|jdkr| |g dg d}|d urdd | jjjD }|d jd |v r| jdd	d
}tjd|d jd g|d g|d}tj	dt
jdgdgd}| | tjd|d dg|d g| jddd
dd}| | | | |d |jd< |d |jd<  d S qd S )Nr   )
r   r   ry   rz   r   r   r   r   SimplifiedLayerNormalizationr   )
r   r   r   r   r   r   r   r   r   r   c                 S   s   g | ]}|j qS r   )r.   r   r   r   r   
<listcomp>f  s    z?T5OnnxModel.adjust_rel_pos_bis_length_input.<locals>.<listcomp>r4   r   r   Added_Shape_r   _Outputr5   Constant_Index_1)r.   r/   r0   r1   r   _Output_Gather_1Added_Gather_r   )r6   r7   r.   r)   r   )nodesr   r   r   r   r"   rD   r
   rL   rE   r	   INT64rH   add_node)r   rs   r)  graph_input_namesr
  
shape_node	indices_1r  r   r   r   adjust_rel_pos_bis_length_inputO  sL   




z+T5OnnxModel.adjust_rel_pos_bis_length_inputc                 C   s   g }|   D ]@}|jdkrF| |g dg d}|d u rq| |dgdg}|d u r+q|d }|jd |jd< || || | | qd S )Nr   )r   r   r   r   r   LessOrEqualTiler   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r)  r   r   r#   rO   rJ   remove_nodesr   r   rs   extended_mask_nodesr   r  r   r   r   !remove_extended_mask_decoder_init  s(   



z-T5OnnxModel.remove_extended_mask_decoder_initc                 C   s   g }|   D ]B}|jdkrH| |g dg d}|d u rq| |ddgddg}|d u r-q|d }|jd |jd< || || | | qd S )Nr   )r   r   r   r   r   r   r0  r1  r   r   r   r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r2  r4  r   r   r   remove_extended_mask_decoder  s(   



z(T5OnnxModel.remove_extended_mask_decoderc                 C   s   |    | j  d S rp   )adjust_reshape_and_expandr  r  r  r   r   r   
preprocess  s   zT5OnnxModel.preprocessc                 C   s$   |    |   |   |   d S rp   )r6  r7  r/  r   r  r   r   r   postprocess  s   zT5OnnxModel.postprocessr|   )T)r   r   r   r   r   r  r  r!  r/  r6  r7  r9  r:  r   r   r   r   r   r  6  s    
9#$r  )loggingnumpyrA   fusion_attentionr   r   fusion_baser   fusion_simplified_layernormr   r   fusion_utilsr   onnxr   r	   r
   
onnx_modelr   onnx_model_bertr   	getLoggerr   r:   r   r   r  r   r   r   r   <module>   s&   
      