o
    ÁÙ·iMA  ã                   @   st   d dl mZ d dlZd dlmZ d dlmZ d dlm	Z	m
Z
 d dlmZmZ d dlmZ eeƒZG dd	„ d	eƒZdS )
é    )Ú	getLoggerN)ÚAttentionMask)ÚFusion)ÚFusionUtilsÚNumpyHelper)Ú	NodeProtoÚhelper)Ú	OnnxModelc                       sP   e Zd Zdedededef‡ fdd„Zdedeeef fd	d
„Z	dd„ Z
‡  ZS )ÚFusionQOrderedAttentionÚmodelÚhidden_sizeÚ	num_headsÚattention_maskc                    s&   || _ || _|| _tƒ  |dd¡ d S )NÚQOrderedAttentionÚQOrderedLayerNormalization)r   r   r   ÚsuperÚ__init__)Úselfr   r   r   r   ©Ú	__class__© úf/home/ubuntu/.local/lib/python3.10/site-packages/onnxruntime/transformers/fusion_qordered_attention.pyr      s   z FusionQOrderedAttention.__init__Ú	reshape_qÚreturnc                 C   sf  | j  |jd ¡}|du rCt |jd › d¡ | j  |dgdg¡}|du r,| j| jfS |d }t|j	ƒdkr=| j| jfS |j	d j
}t |¡}t|ƒdksZ|d dksZ|d dkrit d	|› d
¡ | j| jfS |d }|d }|| }| jdkr’|| jkr’| jr’t d| j› d|› d¡ d| _| jdkr¯|| jkr¯| jr¯t d| j› d|› d¡ d| _||fS )zÊDetect num_heads and hidden_size from a reshape node.
        Args:
            reshape_q (NodeProto): reshape node for Q
        Returns:
            Tuple[int, int]: num_heads and hidden_size
        é   Nz is not initializer.ÚConstantr   é   é   é   zq_shape_value=z7. Expected value are like [0, 0, num_heads, head_size].z--num_heads is z. Detected value is z. Using detected value.Fz--hidden_size is )r   Úget_initializerÚinputÚloggerÚdebugÚmatch_parent_pathr   r   ÚlenÚ	attributeÚtr   Úto_arrayÚnum_heads_warningÚwarningÚhidden_size_warning)r   r   Úq_shapeÚconstant_nodeÚq_shape_valuer   Ú	head_sizer   r   r   r   Úget_num_heads_and_hidden_size    s8   	
$ÿz5FusionQOrderedAttention.get_num_heads_and_hidden_sizec           B      C   sr  | j  |ddgddg¡}|d ur|d }nd S | j  |dgd g¡}|d u r,t d¡ d S |d }| j  |g d¢g d¢¡}|d u rGt d	¡ d S |\}}	}
}}}}t || j ¡sYd S t || j ¡sbd S g }t|jƒD ]\}}||vrrqi||d jd kr|qi| 	|¡ qit
|ƒd
krŠd S |d }| j  |g d¢g d¢¡}|d u r¥t d¡ d S |\}}}}}}t || j ¡s¶d S t || j ¡s¿d S | j  |dgd
g¡}|d u rÔt d¡ d S |d }| j  |jd ¡d u råd S t || j d¡sïd S | j  |g d¢g d¢¡}|d u rt d¡ d S |\}}}}}}} }!t || j ¡sd S t || j ¡s%d S t | | j ¡s/d S t || j ¡s9d S | j  |!g d¢g d¢¡}"|"d u rQt d¡ d S |"\}}#}$}%}&}'t |%| j ¡scd S t |$| j ¡smd S | j  |'dgd
g¡}(|(d u rƒt d¡ d S |(d }(| j  |(jd ¡d u r•d S t |(| j d¡s d S | j  |!g d¢g d¢¡})|)d u r¸t d¡ d S |)\}}}*}+},}-t |+| j ¡sÊd S t |*| j ¡sÔd S | j  |-dgd
g¡}.|.d u rêt d¡ d S |.d }.| j  |.jd ¡d u rüd S t |.| j d¡sd S | j  |g d¢g d¢¡}/|/d u rt d¡ d S | j  |(jd ¡}0| j  |.jd ¡}1| j  |jd ¡}2t |0¡}3t |1¡}4t |2¡}5t |3jd
d … ¡}6t |4jd
d … ¡}7t |5jd
d … ¡}8|jd |kr3|'jd |kr5|-jd |kr7| j |/d jd ¡}9|  |#¡\}:};|jd g}<|< 	|jd
 ¡ |< 	|$jd
 ¡ |< 	|*jd
 ¡ |< 	|jd
 ¡ |< 	|(jd ¡ |< 	|.jd ¡ |< 	|jd ¡ |< 	|(jd
 ¡ |< 	|.jd
 ¡ |< 	|jd
 ¡ | j  |&jd ¡rú|< 	|&jd ¡ n|< 	|&jd
 ¡ | j  |,jd ¡r|< 	|,jd ¡ n|< 	|,jd
 ¡ | j  |jd ¡r0|< 	|jd ¡ n|< 	|jd
 ¡ |< 	| jd
 ¡ |< 	|jd
 ¡ |< 	|jd
 ¡ |9d ur[|< 	|9¡ n|< 	d¡ | j  |(jd ¡}=t |=¡ | j  |.jd ¡}>t |>¡ | j  |jd ¡}?t |?¡ | j  d¡}@tjd|<|
jd g|@d}A| j  ||jd |Ajd ¡ | j  |	|	jd |jd ¡ |Aj t d|:¡g¡ |Aj t dd
¡g¡ |Aj t dd¡g¡ |Aj t dd
¡g¡ |Aj t d|6|7|8g¡g¡ d |A_| j 	|A¡ | j| j |Aj!< | j" |
|||g¡ | j" |¡ | j" |"¡ | j" |)¡ | j" |¡ | j" |(|.|g¡ d!| _#d S d S d S d S )"NÚQuantizeLinearÚAddr   éÿÿÿÿÚDequantizeLinearz=fuse_qordered_attention: failed to match input qdq nodes path)r1   ÚMatMulÚReshapeÚ	Transposer3   r0   r4   )NNr   r   r   r   r   z1fuse_qordered_attention: failed to match qkv pathr   )r6   r5   r3   r0   r1   r4   )r   r   r   r   r   Nz/fuse_qordered_attention: failed to match v pathF)r3   r0   ÚSoftmaxr1   ÚDivr3   r0   r4   )r   r   r   r   Nr   r   r   z0fuse_qordered_attention: failed to match qk path)r   r   r   r   r   Nz/fuse_qordered_attention: failed to match q pathz/fuse_qordered_attention: failed to match k path)ÚMulÚSubÚCastÚ	Unsqueezer<   )Nr   r   r   r   z8fuse_qordered_attention: failed to match mask_nodes pathÚ r   )ÚinputsÚoutputsÚnamer   Úorder_inputÚorder_weightÚorder_outputÚqkv_hidden_sizeszcom.microsoftT)$r   r#   r!   r"   r   Úcheck_qdq_node_for_fusionÚ	enumerater    ÚoutputÚappendr$   Úget_constant_valuer   r   r'   ÚnpÚprodÚshaper   Úprocess_maskr/   Útranspose_2d_int8_tensorÚcreate_node_namer   Ú	make_nodeÚreplace_node_inputr%   ÚextendÚmake_attributeÚdomainÚnodes_to_addÚthis_graph_nameÚnode_name_to_graph_namer@   Únodes_to_removeÚprune_graph)Br   Únormalize_nodeÚinput_name_to_nodesÚoutput_name_to_nodeÚadd_before_layernormÚ
start_nodeÚdequantize_inputÚ	qkv_nodesÚ_Úprojection_matmulÚreshape_qkvÚtranspose_qkvÚdequantize_qkvÚquantize_qkvÚ
matmul_qkvÚother_inputsÚ_ir    Ú
root_inputÚv_nodesÚdequantize_vÚ
quantize_vÚadd_vÚmatmul_vÚdequantize_v_matmul_weightÚqk_nodesÚdequantize_qk_softmaxÚquantize_qk_softmaxÚ
softmax_qkÚadd_qkÚdiv_qkÚdequantize_qkÚquantize_qkÚ	matmul_qkÚq_nodesr   Údequantize_qÚ
quantize_qÚadd_qÚmatmul_qÚdequantize_q_matmul_weightÚk_nodesÚdequantize_kÚ
quantize_kÚadd_kÚmatmul_kÚdequantize_k_matmul_weightÚ
mask_nodesÚq_weightÚk_weightÚv_weightÚqwÚkwÚvwÚqw_out_sizeÚkw_out_sizeÚvw_out_sizeÚ
mask_indexr   r   Úattention_inputsÚq_weight_tensorÚk_weight_tensorÚv_weight_tensorÚattention_node_nameÚattention_noder   r   r   ÚfuseS   s¦  ý
ý
ý
ý


ô

÷ý



ý



ÿ




0





üÿÿ
žzFusionQOrderedAttention.fuse)Ú__name__Ú
__module__Ú__qualname__r	   Úintr   r   r   Útupler/   r—   Ú__classcell__r   r   r   r   r
      s    þýüû3r
   )Úloggingr   ÚnumpyrJ   Úfusion_attentionr   Úfusion_baser   Úfusion_utilsr   r   Úonnxr   r   Ú
onnx_modelr	   r˜   r!   r
   r   r   r   r   Ú<module>   s   