o
    ٷi3                     @   s\   d dl mZ d dlZd dlmZ d dlmZ d dlm	Z	 ee
Zdd ZG dd	 d	eZdS )
    )	getLoggerN)FusionGptAttentionPastBase)helper)	OnnxModelc                 C   s   t | | dkS )Ngư>)abs)valueexpected_value r	   j/home/ubuntu/.local/lib/python3.10/site-packages/onnxruntime/transformers/fusion_gpt_attention_megatron.pyis_close   s   r   c                       sB   e Zd ZdZdedef fddZdd Zdd	 Zd
d Z	  Z
S )FusionGptAttentionMegatronz^
    Fuse GPT-2 Attention with past state subgraph from Megatron into one Attention node.
    model	num_headsc                    s   t  || d S )N)super__init__)selfr   r   	__class__r	   r
   r      s   z#FusionGptAttentionMegatron.__init__c                 C   s   | j d}| |}	|jd }
|jd |jd krdnd}tjd||jd |j| |	|g|
|g|d}d|_|j	t
d| jt
ddg | jd ur[|j	t
d	t| jg |g}| j	| |D ]	}| j| j|j< qf| j| d
| _d S )NGptAttentionr      	Attention)inputsoutputsnamezcom.microsoftr   unidirectionalmask_filter_valueT)r   create_node_namecast_attention_maskoutputinputr   	make_nodedomain	attributeextendmake_attributer   r   floatnodes_to_addthis_graph_namenode_name_to_graph_namer   nodes_to_removeappendprune_graph)r   matmul_before_splitadd_before_splitpastpresentr   reshape_qkvmaskattention_node_name
int32_maskr   iattention_noder&   noder	   r	   r
   fuse_attention_node   s:   





z.FusionGptAttentionMegatron.fuse_attention_nodec                 C   s  | j |g dg d}|d u rtd d S |\}}}}	t|dkr<|d jdkr<| j |d \}
}|dkr<| | _|jd |j	d krMtd d S | j
|dd	s\td
 d S | j
|ddsktd d S | j |	jd s{td d S | j
|ddgstd d S | j
|ddgstd dS | j
|ddgstd dS | j
|	ddgstd d S | j
|	ddgstd d S | j |g dg d}|d u s|d |krtd d S | j |	g dg d}|d u s|d |krtd d S | j |	g dg d}|d u s|d |kr%td d S | j |	g dg d }|d u rB| j |	g d!g d }|d u sN|d |krUtd" d S |	jd S )#N)MulSubSlicer:   )r   r   r   r   z8fuse_attention: failed to match unidirectional mask pathr   r   r8   i'  zCfuse_attention failed: mul_qk.input[1] != last_slice_mask.output[0]g     @z?fuse_attention failed: mul_mask input 1 is not constant 10000.0g      ?z;fuse_attention failed: sub_mask input 0 is not constant 1.0z+expect slick_mask input 0 to be graph inputzKfuse_attention failed: last_slice_mask input 1 (starts) is not constant [0]   zIfuse_attention failed: last_slice_mask input 3 (axes) is not constant [3]F   zJfuse_attention failed: last_slice_mask input 4 (steps) is not constant [1]   zDfuse_attention failed: slice_mask input 3 (axes) is not constant [2]zEfuse_attention failed: slice_mask input 4 (steps) is not constant [1])	UnsqueezeGatherShapeMatMul)r=   r   r   r   z/fuse_attention: failed to match last slice pathz0fuse_attention: failed to match first slice path)r>   r9   r?   r@   rA   )r   r   r   r   r   z3fuse_attention: failed to match last slice sub path)r>   r9   r?   r@   LayerNormalization)r   r   r   r   r   )r>   r9   r?   r@   SkipLayerNormalizationz5fuse_attention: failed to match last slice sub path 1)r   match_parent_pathloggerdebuglenop_typeget_constant_inputr   r   r   utilscheck_node_input_valuefind_graph_inputinfo)r   sub_qkmul_qk	matmul_qklayernorm_before_attention
mask_nodesmul_masksub_masklast_slice_mask
slice_mask_mul_vallast_slice_pathfirst_slice_pathfirst_slice_subfirst_slice_sub_1r	   r	   r
   
match_maskJ   s   















z%FusionGptAttentionMegatron.match_maskc           .   	   C   s*  d }d }|j dk}d }|s| jj|g dg d|d}n| jj|g dg d|d}|d u r0d S d }|sB|\}	}
}}}}|	jd }n|\}
}}}}|jd }| j|g dg d	}|d u rj| j|g d
g d	}|d u rutd d S |\}}}}}}}|j dkr||jd krtd d S |j dkr||jd krtd d S | j|g dg d}|d u rtd d S |\}}}}| j|ddkrtd d S | ||||}| j|g dg d}|d u rtd d S |\}}} }!||!krtd d S | j|g dg d}"|"d u rtd d S |"\}#}$}%}&}'}(||(kr.td d S | j	|'\})}*t
|*tjrbt|*jdgkrb|*d dkrb|*d dkrb|*d dkrb|*d dksitd d S |*d }+|+| jkrtd|+ d | j  |+| _|*d },| j	|#\})}*ttt|,}-t|*|-std!|* d"|-  d S | j	|\})}*t|*|-std#|* d"|-  d S | |%||}|d u rtd$ d S | j|std% | ||}|d u rtd& d S | j|std' d S | |||||jd || d S )(NrD   )Addr_   rA   Reshape	TransposerA   )r   r   Nr   r   r   )output_name_to_node)r_   rA   r`   ra   rA   )r   Nr   r   r   r   )Concatra   r`   Splitr_   rA   rC   )r   r   r   r   r   Nr   )rc   ra   r`   rd   r_   rA   rD   z&fuse_attention: failed to match v pathrC   zAfuse_attention: skip_input != layernorm_before_attention.input[0]r;   )Softmaxr9   r8   rA   )r   r   r   r   z'fuse_attention: failed to match qk pathaxisz+fuse_attention failed: softmax_qk axis != 3)Divra   r`   rd   z&fuse_attention: failed to match q pathz-fuse_attention: skip since split_v != split_q)rg   ra   rc   ra   r`   rd   )r   r   r   r   r   r   z&fuse_attention: failed to match k pathz-fuse_attention: skip since split_v != split_kr<   r   r=   z:fuse_attention: reshape constant input is not [0, 0, N, H]zDetected num_heads=z. Ignore user specified value zfuse_attention: div_k value=z
 expected=zfuse_attention: div_q value=z!fuse_attention: match past failedz(fuse_attention: past is not graph input.z$fuse_attention: match present failedz1fuse_attention: expect present to be graph output)rI   r   rE   r   rF   rG   r   get_node_attributer^   rJ   
isinstancenpndarraylistshaper   rN   r%   sqrtr   match_past_pattern_2rM   match_presentfind_graph_outputr7   ).r   normalize_nodeinput_name_to_nodesrb   r.   r/   is_normalize_node_skiplayernorm	qkv_nodes
skip_inputadd_skipadd_after_attentionmatmul_after_attentionr0   transpose_qkv
matmul_qkvv_nodesconcat_vtranspose_v	reshape_vsplit_vr-   r,   rR   qk_nodes
softmax_qkrO   rP   rQ   attention_maskq_nodesdiv_qtranspose_q	reshape_qsplit_qk_nodesdiv_krX   concat_ktranspose_k	reshape_ksplit_kr4   r   r   hidden_size_per_headr   r	   r	   r
   fuse   s   

		
	




















zFusionGptAttentionMegatron.fuse)__name__
__module____qualname____doc__r   intr   r7   r^   r   __classcell__r	   r	   r   r
   r      s    /Zr   )loggingr   numpyrj   fusion_gpt_attentionr   onnxr   
onnx_modelr   r   rF   r   r   r	   r	   r	   r
   <module>   s   