o
    ڷi'                     @   sL   d dl mZ d dlmZ d dlmZ d dlmZ eeZ	G dd deZ
dS )    )	getLogger)Fusion)helper)	OnnxModelc                       s   e Zd Zdef fddZdedefddZdededed	B fd
dZdededed	B fddZ	dededed	B fddZ
  ZS )
FusionGelumodelc                    s   t  |dd d S )NGeluErf)super__init__)selfr   	__class__ Z/home/ubuntu/vllm_env/lib/python3.10/site-packages/onnxruntime/transformers/fusion_gelu.pyr      s   zFusionGelu.__init__input_name_to_nodesoutput_name_to_nodec                 C   s6   |  |||r	d S | |||rd S | ||| d S )N)fuse_1fuse_2fuse_3)r   erf_noder   r   r   r   r   fuse   s
   zFusionGelu.fusereturnNc                 C   s"  |j d |vr	dS ||j d  }t|dks|d jdkrdS |d }| j|ds,dS |j d |vr5dS ||j d  }t|dksI|d jdkrKdS |d }| j|dd|}|du r^dS | jj|ddd	dkrkdS |jd }|jd |j d kr|dnd}	||j|	 kr||j d  }t|dks|d jdkrdS |d }
| j|
d
sdS |
j d }n$| j|d|	|}
|
du rdS | j|
d
sdS ||
jvrdS |j d }|||||
g}| j||g||sdS | j	
| tjd|g|g| jdd}d|_| j| | j| j|j< | d dS )ay  
        This pattern is from PyTorch model
        Fuse Gelu with Erf into one node:
        Pattern 1:
                       +-------Mul(0.5)---------------------+
                       |                                    |
                       |                                    v
                    [root] --> Div -----> Erf  --> Add --> Mul -->
                              (B=1.4142...)       (1)

        Pattern 2:
                       +------------------------------------+
                       |                                    |
                       |                                    v
                    [root] --> Div -----> Erf  --> Add --> Mul -->Mul -->
                              (B=1.4142...)       (1)            (0.5)

        Note that constant input for Add and Mul could be first or second input: like either A=0.5 or B=0.5 is fine.
        r   N   AddMulDiv-?MbP?delta      ?r   inputsoutputsnamecom.microsoftT)outputlenop_typer   has_constant_inputmatch_parentfind_constant_inputinputis_safe_to_fuse_nodesnodes_to_removeextendr   	make_nodecreate_node_namedomainnodes_to_addappendthis_graph_namenode_name_to_graph_namer%   increase_counter)r   r   r   r   childrenadd_after_erfmul_after_erfdivsubgraph_inputanothermul_halfsubgraph_outputsubgraph_nodes
fused_noder   r   r   r      sf   




zFusionGelu.fuse_1c                 C   s,  |j d |vr	dS ||j d  }t|dks|d jdkrdS |d }| j|ds,dS |j d |vr5dS ||j d  }t|dksI|d jdkrKdS |d }| j|dsXdS |j d |vradS ||j d  }t|dksu|d jdkrwdS |d }| j|dd|}|du rdS d}	| jj|dd	d
dkr| j|dd|}	|	du rdS | j|	dsdS | j|d|}
|
du rdS |
j d |jvrdS |||||g}|	r|	|	 | j
||j d g||sdS | j| tjd|
j d g|j d g| jdd}d|_| j	| | j| j|j< | d dS )a&  
        This pattern is from Keras model
        Fuse Gelu with Erf into one node:
                       +------------------------------------------+
                       |                                          |
                       |                                          v
                    [root] --> Div -----> Erf  --> Add --> Mul -->Mul
                              (B=1.4142...)       (A=1)   (A=0.5)

        Note that constant input for Add and Mul could be first or second input: like either A=0.5 or B=0.5 is fine.
        r   Nr   r   r   r!   r   r   r   r   Sqrtg       @r   r"   r&   T)r'   r(   r)   r   r*   r+   r,   
get_parentr-   r5   r.   r/   r0   r   r1   r2   r3   r4   r6   r7   r%   r8   )r   r   r   r   r9   r:   r;   mulr<   	sqrt_node	root_noderA   rB   r   r   r   r   m   sj   
 
zFusionGelu.fuse_2c                 C   s  |j d |vr	dS ||j d  }t|dks|d jdkrdS |d }| j|ds,dS |j d |vr5dS ||j d  }t|dksI|d jdkrKdS |d }| j|dsXdS | j|dd|}|du rgdS | jj|ddd	}|dk rvdS | j||dkrdnd|}	|	du rdS |j d |vrdS ||j d  }t|dks|d jdkrdS |d }
|
jd |	j d ks|
jd |	j d ksdS |||||
g}| j	||
j d g||sdS | j
| tjd
|	j d g|
j d g| jd
d}d|_| j| | j| j|j< | d
 dS )a?  
        This pattern is from TensorFlow model
        Fuse Gelu with Erf into one node:
                       +----------------------------------------------+
                       |                                              |
                       |                                              v
                    [root] --> Mul -----> Erf    -->   Add --> Mul -->Mul
                               (A=0.7071067690849304)  (B=1)  (B=0.5)

        Note that constant input for Add and Mul could be first or second input: like either A=0.5 or B=0.5 is fine.
        r   Nr   r   r   r!   g   `?r   r   r   r"   r&   T)r'   r(   r)   r   r*   r+   r,   rD   r-   r.   r/   r0   r   r1   r2   r3   r4   r5   r6   r7   r%   r8   )r   r   r   r   r9   r:   r?   	first_mulirG   last_mulrA   rB   r   r   r   r      sd   (
 
zFusionGelu.fuse_3)__name__
__module____qualname__r   r   dictr   boolr   r   r   __classcell__r   r   r   r   r      s    T"Mr   N)loggingr   fusion_baser   onnxr   
onnx_modelr   rK   loggerr   r   r   r   r   <module>   s   