o
    ٷi                     @   sH   d dl Z d dlmZ d dlmZ d dlmZ eeZG dd deZ	dS )    N)init_logger)CustomOpc                	       s  e Zd ZdZddedededdf fd	d
Z	ddej	dej	dej	fddZ
	ddej	dej	dej	dej	fddZ	ddej	dej	dej	dej	fddZ	ddej	dej	dej	dej	fddZ	ddej	dej	dej	dej	fddZ	ddej	dej	dej	dej	fddZ  ZS )AdaLayerNormzH
    AdaLayerNorm:
        out = layernorm(x) * (1 + scale) + shift
    Fư>hidden_sizeelementwise_affineepsreturnNc                    s8   t    || _|| _|| _tj| j| j| jd| _d S )N)r   r   )super__init__r   r   r   nn	LayerNorm	layernorm)selfr   r   r   	__class__ [/home/ubuntu/.local/lib/python3.10/site-packages/vllm_omni/diffusion/layers/adalayernorm.pyr      s
   
zAdaLayerNorm.__init__
mod_paramsindexc                 C   s  |j ddd\}}}|d ur{|dd }|d | ||d  }}|d | ||d  }	}
|d | ||d  }}|d}|d}|d}|	d}|
d}|d}|d}t|dk||}t|dk||}t|dk||}n|d}|d}|d}|||fS )N   )dimr         )chunksize	unsqueezetorchwhere)r   r   r   shiftscalegateactual_batchshift_0shift_1scale_0scale_1gate_0gate_1index_expandedshift_0_expshift_1_expscale_0_expscale_1_exp
gate_0_exp
gate_1_expshift_resultscale_resultgate_resultr   r   r   
preprocess   s(   










zAdaLayerNorm.preprocessxc                 C      |  |||S Nforward_nativer   r5   r   r   r   r   r   forward_cuda>      zAdaLayerNorm.forward_cudac                 C   r6   r7   r8   r:   r   r   r   forward_hipF   r<   zAdaLayerNorm.forward_hipc           	      C   s@   |  ||\}}}dd l}|j|| jgd| || jd}||fS )Nr   r   )normalized_shapeweightbiasr   )r4   	torch_npunpu_layer_norm_evalr   r   )	r   r5   r   r   r1   r2   r3   rA   outputr   r   r   forward_npuN   s   zAdaLayerNorm.forward_npuc                 C   r6   r7   r8   r:   r   r   r   forward_xpu^   r<   zAdaLayerNorm.forward_xpuc                 C   s,   |  ||\}}}| |d|  | |fS )Nr   )r4   r   )r   r5   r   r   r1   r2   r3   r   r   r   r9   f   s   zAdaLayerNorm.forward_native)Fr   r7   )__name__
__module____qualname____doc__intboolfloatr   r   Tensorr4   r;   r=   rD   rE   r9   __classcell__r   r   r   r   r   
   sz     

+



r   )
r   torch.nnr   vllm.loggerr   $vllm_omni.diffusion.layers.custom_opr   rF   loggerr   r   r   r   r   <module>   s    