o
    ôÚ·i„O  ã                   @   sp   d dl mZ d dlmZ d dlmZmZ d dlmZ ee	ƒZ
G dd„ deƒZG dd„ deƒZG d	d
„ d
eƒZdS )é    )Ú	getLogger)ÚFusion)ÚTensorProtoÚhelper)Ú	OnnxModelc                       s>   e Zd Zddededef‡ fdd„Zded	efd
d„Z‡  ZS )ÚFusionLayerNormalizationTFÚmodelÚcheck_constant_and_dimensionÚforcec                    s    t ƒ  |dd¡ || _|| _d S ©NÚLayerNormalizationÚ
ReduceMean)ÚsuperÚ__init__r	   r
   )Úselfr   r	   r
   ©Ú	__class__© ú_/home/ubuntu/vllm_env/lib/python3.10/site-packages/onnxruntime/transformers/fusion_layernorm.pyr      s   
z!FusionLayerNormalization.__init__Úinput_name_to_nodesÚoutput_name_to_nodec              	   C   s¶  g }| j  ||¡}t|ƒdkst|ƒdkrdS |jd }|d jdks,|d jd |kr.dS t|ƒdkrF|d jdksD|d jd |krFdS d}|D ]'}| j j|d|dd}	|	dur^|	} n| j  |d	dg¡}
|
durq|
d
 } nqJ|du rxdS | j  |g d¢g d¢fg d¢g d¢fg|¡\}}}|du r—dS |d
 }||vr¡dS |d }| j  |¡\}}|du s¹|dks¹|dkrÃt	 
d|› ¡ dS |d }| j  |d¡dkrÒdS |jd |vrÛdS ||jd  }|D ]ô}|jd	kr| |¡ |jd |vrùqä||jd  d }n|}|jdkrqä|jd |vrqä||jd  d }|jdkr%qä| |¡ | |¡ | |dd
… ¡ | |||g¡ |jd	krH|n|}|jd| j  |jd |¡  }| jrg| j  |dd¡sgqä|jd| j  |jd |¡  }| jr„| j  |dd¡s„qä|jd }| j  ||j||¡s¢| jrœd| _nt	 
d¡ qä| j |¡ tjd|jd ||g|g| j jdddd}|j t dt|ƒ¡g¡ | j |¡ | j| j|j< qädS )aÕ  
        Fuse Layer Normalization subgraph into one node LayerNormalization:
              +----------------------+
              |                      |
              |                      v
          [Root] --> ReduceMean -->  Sub  --> Pow --> ReduceMean --> Add --> Sqrt --> Div --> Mul --> Add
                     (axis=2 or -1)  |      (Y=2)   (axis=2 or -1)  (B=E-6 or E-12)    ^
                                     |                                                 |
                                     +-------------------------------------------------+

         It also handles cases of duplicated sub nodes exported from older version of PyTorch:
              +----------------------+
              |                      v
              |           +-------> Sub-----------------------------------------------+
              |           |                                                           |
              |           |                                                           v
          [Root] --> ReduceMean -->  Sub  --> Pow --> ReduceMean --> Add --> Sqrt --> Div  --> Mul --> Add
              |                      ^
              |                      |
              +----------------------+
        r   é   NÚSubé   ÚDivF©Ú	recursiveÚCastéÿÿÿÿ©ÚSqrtÚAddr   ÚPowr   ©r   r   r   r   r   )r    r!   r   r"   r   r   )r   r   r   r   r   r   ç-Cëâ6?úHskip SkipLayerNormalization fusion since epsilon value is not expected: é   ç       @ÚMulr!   úlayernorm weightúlayernorm biasTú4It is not safe to fuse LayerNormalization node. Skipr   Ú	LayerNorm©Úname_prefix©ÚinputsÚoutputsÚnameÚepsilon) r   Úget_childrenÚlenÚinputÚop_typeÚfind_first_child_by_typeÚmatch_child_pathÚmatch_parent_pathsÚget_constant_inputÚloggerÚdebugÚfind_constant_inputÚoutputÚappendÚextendÚinput_indexr	   Ú$is_constant_with_specified_dimensionÚis_safe_to_fuse_nodesr
   Úprune_graphÚnodes_to_remover   Ú	make_nodeÚcreate_node_nameÚ	attributeÚmake_attributeÚfloatÚnodes_to_addÚthis_graph_nameÚnode_name_to_graph_namer2   )r   Únoder   r   Úsubgraph_nodesÚchildrenÚ
root_inputÚdiv_nodeÚchildÚ
div_node_1Ú
div_node_2Ú_path_idÚparent_nodesÚ_Úsub_nodeÚadd_eps_nodeÚir3   Úpow_nodeÚdiv_childrenÚ	temp_nodeÚmul_nodeÚlast_add_nodeÚnode_before_weightÚweight_inputÚ
bias_inputÚlayer_norm_outputÚnormalize_noder   r   r   Úfuse   sÂ   
  þþ
ú


ÿÿ
ü	
üÀzFusionLayerNormalization.fuse)TF)	Ú__name__Ú
__module__Ú__qualname__r   Úboolr   Údictrg   Ú__classcell__r   r   r   r   r      s    r   c                       sT   e Zd Zdef‡ fdd„Zdd„ Zddedee fd	d
„Z	de
de
fdd„Z‡  ZS )ÚFusionLayerNormalizationNCHWr   c                    s   t ƒ  |dd¡ d S r   ©r   r   ©r   r   r   r   r   r   «   s   z%FusionLayerNormalizationNCHW.__init__c                 C   sŠ   | j  |¡}|d u rt |› d|› d¡ d S t|jƒdks,|jd dks,|jd dkr<t |› d|› d|j› ¡ d S | |jd g¡S )Nú z is not initializer.r&   r   r   z* shall have 3 dimensions Cx1x1. Got shape r   )r   Úget_constant_valuer<   r=   r5   ÚshapeÚreshape)r   Úoutput_nameÚdescriptionÚvaluer   r   r   Úget_weight_or_bias®   s   *z/FusionLayerNormalizationNCHW.get_weight_or_biasNÚ
input_nameÚpermc                 C   sT   | j  d¡}|du r|d d | }tjd|g|g|d}|j t d|¡g¡ |S )z&Append a Transpose node after an inputÚ	TransposeNÚ_outú-r/   rz   )r   rH   r   rG   rI   rA   rJ   )r   ry   rz   ru   Ú	node_nameÚtranspose_noder   r   r   Úcreate_transpose_nodeº   s   z2FusionLayerNormalizationNCHW.create_transpose_noder   r   c           !      C   s¦  t  |d¡}t|tƒr|dgkrdS g }| j ||¡}t|ƒdkr#dS |jd }|d jdks8|d jd |kr:dS |d }| jj	|d|dd}	|	du rNdS | j 
|	g d	¢g d
¢|¡}
|
du radS |
\}}}}}||krndS | j |¡\}}|du s‚|dks‚|dkrŒt d|› ¡ dS t  |d¡}t|tƒs™J ‚|dgkr dS | j |d¡dkr«dS ||	jd  d }|}|jdkr½dS ||jd  d }|jdkrÍdS | |¡ | |
¡ | |||	g¡ | j ||j||¡sðt d¡ dS |jdkr÷|	n|}|jd| j |jd |¡  }|  |d¡}|du rdS |jd| j |jd |¡  }|  |d¡}|du r1dS t |d tj|j|¡}t |d tj|j|¡}| j || j¡ | j || j¡ | j |¡ |  |jd g d¢¡}| jjddd}|  |d g d¢|jd ¡}tjd|jd |d |d g|d g|d}|j  t !dt"|ƒ¡g¡ | j# |¡ | j# |¡ | j# |¡ | j| j$|j%< | j| j$|j%< | j| j$|j%< d} |  &| ¡ dS )a*  
        Fuse Layer Normalization subgraph into one node LayerNormalization:
              +----------------------+
              | NxCxHxW              |
              |                      v                                                     (Cx1x1)  (Cx1x1)
          [Root] --> ReduceMean -->  Sub --> Pow --> ReduceMean --> Add --> Sqrt --> Div --> Mul --> Add -->
                     (axes=1)        |      (Y=2)     (axes=1)     (E-6)             ^
                                     |                                               |
                                     +-----------------------------------------------+

        Fused subgraph:
                       (0,2,3,1)                            (0,3,1,2)
            [Root] --> Transpose --> LayerNormalization --> Transpose -->
        Úaxesr   Nr   r   r   Fr   r   r#   r$   r%   r'   r(   r!   r+   r   r)   r*   Ú_NHWC)r   r   r&   r   r   r,   r-   Ú	_out_nhwc)r   r&   r   r   r/   r3   zLayerNormalization(NHWC))'r   Úget_node_attributeÚ
isinstanceÚlistr   r4   r5   r6   r7   r8   Úmatch_parent_pathr;   r<   r=   r>   r?   r@   rA   rD   rB   rx   r   Úmake_tensorr   ÚFLOATrs   Úadd_initializerrM   rF   r€   rH   rG   rI   rJ   rK   rL   rN   r2   Úincrease_counter)!r   rO   r   r   r   rP   rQ   rR   ÚsubrS   rX   Ú
_sqrt_nodeÚsecond_add_nodeÚreduce_mean_noder]   rZ   r\   r3   r_   r`   ra   rb   rc   Úweightrd   ÚbiasÚweight_nhwcÚ	bias_nhwcÚtranspose_inputÚlayernorm_node_nameÚtranspose_outputrf   Úcounter_namer   r   r   rg   Æ   s®   
 ü




ü


ÿüz!FusionLayerNormalizationNCHW.fuse)N)rh   ri   rj   r   r   rx   Ústrr†   Úintr€   rl   rg   rm   r   r   r   r   rn   ª   s
    rn   c                       s4   e Zd Zdef‡ fdd„Zdedefdd„Z‡  ZS )ÚFusionLayerNormalizationTFr   c                    s   t ƒ  |ddd¡ d S )Nr   r!   ÚTFro   rp   r   r   r   r   G  s   z#FusionLayerNormalizationTF.__init__r   r   c                 C   s(  g }| j  |g d¢g d¢fg d¢g d¢fg|¡\}}}|du r!dS t|ƒdks)J ‚|d dv r;|d	 dv r;|d
 dv sBt d¡ dS |dd… \}}}	}
}}|dd… \}}}}d}t|ƒdkrk|d }|jdkskJ ‚| j  |dd|¡}|du rt d¡ dS | j  |d|¡}|du r|n| j  |d|¡}|du r t d¡ dS | j  |¡\}}|du s¸|dks¸|dkr¿|du r¿t d¡ dS |du rÚ|j	d |j	vsÓ|j	d |j	vrÚt d¡ dS |durõ|j	d |j	vsî|j	d |j	vrõt d¡ dS |j	d |j	d	 krt d¡ dS ||||	|
|||||||g}|dur7| j  |dd|¡}|du r/t d¡ dS | 
|||g¡ | j  ||j| j  ¡ | j  ¡ ¡sOt d¡ dS | j 
|¡ |	j	d	 }|j	d }tjd|j	d ||g|jd g| j jdddd}|j 
t dt|ƒ¡g¡ | j |¡ | j| j|j< dS )aU  
         Layer Norm from Tensorflow model(using keras2onnx or tf2onnx):
          +------------------------------------+
          |                                    |
          |                                    |
        (Cast_1)                               |
          |                                    |
          |                                    v                                           (B)                             (B)             (A)
         Add --> (Cast_1) --> ReduceMean -->  Sub  --> Mul --> ReduceMean --> (Cast_3) --> Add --> Sqrt --> Reciprocol --> Mul --> Mul --> Sub --> Add
          |                       |                                                                                         |       ^              ^
          |                       |                                                                                         |       |              |
          |                       +--------------------------------------------------(Cast_2)-------------------------------|-------+              |
          |                                                                                                                 v                      |
          +---------------------------------------------------------------------------------------------------------------> Mul--------------------+
        )
r   r(   r(   Ú
Reciprocalr    r!   r   r(   r   r   )
r   r   Nr   r   r   Nr   r   N)r   r(   r(   rœ   r    r!   r   r   r(   r   r   )r   r   Nr   r   r   r   Nr   r   NNr&   r   )r   r   r   r   z=return indice is exepected in [0, 1], but got {return_indice}é   éüÿÿÿé   r   r(   zmul_node_3 not foundzroot node is nonegñhãˆµøä>zepsilon is not matchedz;reduce_mean_node_1 and mul_node_3 shall link from root nodez%mul_node_2 shall have two same inputszcast_node_2 not foundz$not safe to fuse layer normalizationr   r,   r-   r/   r3   )r   r:   r5   r<   r=   r7   Úmatch_parentÚ
get_parentr;   r6   rA   rD   r?   r   r   rF   r   rG   rH   rI   rJ   rK   rL   r@   rM   rN   r2   )r   rO   r   r   Úreturn_indicerY   rX   Ú
sub_node_0Ú
mul_node_0Ú
mul_node_1Úreciprocol_nodeÚ	sqrt_nodeÚ
add_node_0Úreduce_mean_node_0Ú
mul_node_2Ú
sub_node_1Úreduce_mean_node_1Úcast_node_3Ú
mul_node_3Únode_before_reduceÚ	root_noder\   r3   rP   Úcast_node_2rc   rd   Ú
fused_noder   r   r   rg   J  sÂ   óòð!
Ý&$

	ù
ÿý
 
 
 

ô


ü



üzFusionLayerNormalizationTF.fuse)rh   ri   rj   r   r   rl   rg   rm   r   r   r   r   rš   F  s    rš   N)Úloggingr   Úfusion_baser   Úonnxr   r   Ú
onnx_modelr   rh   r<   r   rn   rš   r   r   r   r   Ú<module>   s     