o
    ۷i_                     @   s  d dl Z d dlZd dlmZ d dlm  mZ ddlmZm	Z	 ddl
mZ ddlmZmZ G dd dejZG d	d
 d
ejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZe	dd rejZn	G d!d" d"ejZG d#d$ d$ejZG d%d& d&ejZG d'd( d(ejZG d)d* d*ejZ 	+		,	-	-d6d.e!d/e"dB d0e#d1e$d2e$d3ejfd4d5Z%dS )7    N   )is_torch_npu_availableis_torch_version   )get_activation)CombinedTimestepLabelEmbeddings)PixArtAlphaCombinedTimestepSizeEmbeddingsc                       s|   e Zd ZdZ					ddededB dedB d	ed
edef fddZ	ddej	dej	dB dej	dB dej	fddZ
  ZS )AdaLayerNorma  
    Norm layer modified to incorporate timestep embeddings.

    Parameters:
        embedding_dim (`int`): The size of each embedding vector.
        num_embeddings (`int`, *optional*): The size of the embeddings dictionary.
        output_dim (`int`, *optional*):
        norm_elementwise_affine (`bool`, defaults to `False):
        norm_eps (`bool`, defaults to `False`):
        chunk_dim (`int`, defaults to `0`):
    NFh㈵>r   embedding_dimnum_embeddings
output_dimnorm_elementwise_affinenorm_eps	chunk_dimc                    sj   t    || _|p|d }|d urt||| _nd | _t | _t||| _	t
|d ||| _d S Nr   )super__init__r   nn	EmbeddingembSiLUsiluLinearlinear	LayerNormnorm)selfr   r   r   r   r   r   	__class__ T/home/ubuntu/vllm_env/lib/python3.10/site-packages/diffusers/models/normalization.pyr   (   s   
	
zAdaLayerNorm.__init__xtimesteptembreturnc                 C   s   | j d ur
|  |}| | |}| jdkr7|jddd\}}|d d d d d f }|d d d d d f }n	|jddd\}}| |d|  | }|S )Nr   r   dimr   )r   r   r   r   chunkr   )r   r"   r#   r$   shiftscaler    r    r!   forward?   s   


zAdaLayerNorm.forward)NNFr
   r   )NN)__name__
__module____qualname____doc__intboolfloatr   torchTensorr+   __classcell__r    r    r   r!   r	      s<    r	   c                   @   s"   e Zd ZdejdejfddZdS )FP32LayerNorminputsr%   c                 C   sN   |j }t| | j| jd ur| j nd | jd ur| j nd | j|S N)	dtypeF
layer_normr2   normalized_shapeweightbiasepsto)r   r7   origin_dtyper    r    r!   r+   U   s   zFP32LayerNorm.forwardN)r,   r-   r.   r3   r4   r+   r    r    r    r!   r6   T   s    r6   c                	       s`   e Zd ZdZddedededdf fd	d
Z	ddej	dej	dB de
ej	df fddZ  ZS )SD35AdaLayerNormZeroXz
    Norm layer adaptive layer norm zero (AdaLN-Zero).

    Parameters:
        embedding_dim (`int`): The size of each embedding vector.
        num_embeddings (`int`): The size of the embeddings dictionary.
    r;   Tr   	norm_typer>   r%   Nc                    X   t    t | _tj|d| |d| _|dkr$tj|ddd| _d S t	d| d)	N	   r>   r;   Fư>elementwise_affiner?   Unsupported `norm_type` (z-) provided. Supported ones are: 'layer_norm'.
r   r   r   r   r   r   r   r   r   
ValueErrorr   r   rC   r>   r   r    r!   r   i   s   

zSD35AdaLayerNormZeroX.__init__hidden_statesr   .c              	   C   s   |  | |}|jddd\	}}}}}}}	}
}| |}|d|d d d f   |d d d f  }|d|
d d d f   |	d d d f  }|||||||fS )NrE   r   r&   r   r   r(   r   )r   rN   r   	shift_msa	scale_msagate_msa	shift_mlp	scale_mlpgate_mlp
shift_msa2
scale_msa2	gate_msa2norm_hidden_statesnorm_hidden_states2r    r    r!   r+   s   s   
((zSD35AdaLayerNormZeroX.forwardr;   Tr8   )r,   r-   r.   r/   r0   strr1   r   r3   r4   tupler+   r5   r    r    r   r!   rB   `   s     rB   c                       s   e Zd ZdZddededB f fddZ				dd	ejd
ejdB dejdB dej	dB dejdB de
ejejejejejf fddZ  ZS )AdaLayerNormZero
    Norm layer adaptive layer norm zero (adaLN-Zero).

    Parameters:
        embedding_dim (`int`): The size of each embedding vector.
        num_embeddings (`int`): The size of the embeddings dictionary.
    Nr;   Tr   r   c                    s   t    |d urt||| _nd | _t | _tj|d| |d| _|dkr2tj	|ddd| _
d S |dkr@t|ddd| _
d S td	| d
)N   rF   r;   FrG   rH   fp32_layer_norm)rI   r>   rJ   @) provided. Supported ones are: 'layer_norm', 'fp32_layer_norm'.)r   r   r   r   r   r   r   r   r   r   r   r6   rL   )r   r   r   rC   r>   r   r    r!   r      s   


zAdaLayerNormZero.__init__r"   r#   class_labelshidden_dtyper   r%   c                 C   s   | j d ur| j |||d}| | |}|jddd\}}}}	}
}| |d|d d d f   |d d d f  }|||	|
|fS )N)rd   r`   r   r&   )r   r   r   r(   r   )r   r"   r#   rc   rd   r   rP   rQ   rR   rS   rT   rU   r    r    r!   r+      s   
.zAdaLayerNormZero.forward)Nr;   T)NNNN)r,   r-   r.   r/   r0   r   r3   r4   
LongTensorr9   r]   r+   r5   r    r    r   r!   r^      s(    r^   c                       sb   e Zd ZdZddef fddZ	ddejd	ejdB d
eejejejejejf fddZ	  Z
S )AdaLayerNormZeroSingler_   r;   Tr   c                    rD   )	N   rF   r;   FrG   rH   rJ   rb   rK   rM   r   r    r!   r      s   


zAdaLayerNormZeroSingle.__init__Nr"   r   r%   c                 C   sZ   |  | |}|jddd\}}}| |d|d d d f   |d d d f  }||fS )Nrg   r   r&   rO   )r   r"   r   rP   rQ   rR   r    r    r!   r+      s   .zAdaLayerNormZeroSingle.forwardr[   r8   r,   r-   r.   r/   r0   r   r3   r4   r]   r+   r5   r    r    r   r!   rf      s    rf   c                       sd   e Zd ZdZdededef fddZ	ddej	d	ej	dB d
e
ej	ej	ej	ej	f fddZ  ZS )LuminaRMSNormZeroz
    Norm layer adaptive RMS normalization zero.

    Parameters:
        embedding_dim (`int`): The size of each embedding vector.
    r   r   r   c                    sB   t    t | _tjt|dd| dd| _t||d| _	d S )Ni      TrF   r?   )
r   r   r   r   r   r   minr   RMSNormr   )r   r   r   r   r   r    r!   r      s   

zLuminaRMSNormZero.__init__Nr"   r   r%   c                 C   sP   |  | |}|jddd\}}}}| |d|d d d f   }||||fS )Nrj   r   r&   rO   )r   r"   r   rQ   rR   rT   rU   r    r    r!   r+      s   zLuminaRMSNormZero.forwardr8   )r,   r-   r.   r/   r0   r2   r1   r   r3   r4   r]   r+   r5   r    r    r   r!   ri      s    ri   c                       s   e Zd ZdZddedef fddZ			ddejd	e	e
ejf dB d
edB dejdB deejejejejejf f
ddZ  ZS )AdaLayerNormSingleaT  
    Norm layer adaptive layer norm single (adaLN-single).

    As proposed in PixArt-Alpha (see: https://huggingface.co/papers/2310.00426; Section 2.3).

    Parameters:
        embedding_dim (`int`): The size of each embedding vector.
        use_additional_conditions (`bool`): To use additional conditions for normalization or not.
    Fr   use_additional_conditionsc                    sB   t    t||d |d| _t | _tj|d| dd| _d S )Nrg   )size_emb_dimro   r`   TrF   )	r   r   r   r   r   r   r   r   r   )r   r   ro   r   r    r!   r      s   


zAdaLayerNormSingle.__init__Nr#   added_cond_kwargs
batch_sizerd   r%   c                 C   s>   |pd d d}| j |fi |||d}| | ||fS )N)
resolutionaspect_ratio)rr   rd   )r   r   r   )r   r#   rq   rr   rd   embedded_timestepr    r    r!   r+      s   zAdaLayerNormSingle.forward)F)NNN)r,   r-   r.   r/   r0   r1   r   r3   r4   dictr\   r9   r]   r+   r5   r    r    r   r!   rn      s"    
rn   c                       sZ   e Zd ZdZ	ddededededB def
 fd	d
Zdej	dej	dej	fddZ
  ZS )AdaGroupNorma  
    GroupNorm layer modified to incorporate timestep embeddings.

    Parameters:
        embedding_dim (`int`): The size of each embedding vector.
        num_embeddings (`int`): The size of the embeddings dictionary.
        num_groups (`int`): The number of groups to separate the channels into.
        act_fn (`str`, *optional*, defaults to `None`): The activation function to use.
        eps (`float`, *optional*, defaults to `1e-5`): The epsilon value to use for numerical stability.
    Nr
   r   out_dim
num_groupsact_fnr?   c                    sF   t    || _|| _|d u rd | _nt|| _t||d | _d S r   )	r   r   ry   r?   actr   r   r   r   )r   r   rx   ry   rz   r?   r   r    r!   r     s   

zAdaGroupNorm.__init__r"   r   r%   c                 C   sl   | j r|  |}| |}|d d d d d d f }|jddd\}}tj|| j| jd}|d|  | }|S )Nr   r   r&   rk   )r{   r   r(   r:   
group_normry   r?   )r   r"   r   r*   r)   r    r    r!   r+   '  s   

zAdaGroupNorm.forward)Nr
   )r,   r-   r.   r/   r0   r\   r2   r   r3   r4   r+   r5   r    r    r   r!   rw     s    $rw   c                       sP   e Zd ZdZ				ddedef fddZd	ejd
ejdejfddZ  Z	S )AdaLayerNormContinuousa  
    Adaptive normalization layer with a norm layer (layer_norm or rms_norm).

    Args:
        embedding_dim (`int`): Embedding dimension to use during projection.
        conditioning_embedding_dim (`int`): Dimension of the input condition.
        elementwise_affine (`bool`, defaults to `True`):
            Boolean flag to denote if affine transformation should be applied.
        eps (`float`, defaults to 1e-5): Epsilon factor.
        bias (`bias`, defaults to `True`): Boolean flag to denote if bias should be use.
        norm_type (`str`, defaults to `"layer_norm"`):
            Normalization layer to use. Values supported: "layer_norm", "rms_norm".
    Tr
   r;   r   conditioning_embedding_dimc                    sn   t    t | _tj||d |d| _|dkr#t||||| _d S |dkr0t	|||| _d S t
d| )Nr   rF   r;   rms_normunknown norm_type )r   r   r   r   r   r   r   r   r   rm   rL   )r   r   r~   rI   r?   r>   rC   r   r    r!   r   B  s   

zAdaLayerNormContinuous.__init__r"   conditioning_embeddingr%   c                 C   sj   |  | ||j}tj|ddd\}}| |d| d d d d d f  |d d d d d f  }|S )Nr   r   r&   )r   r   r@   r9   r3   r(   r   )r   r"   r   r   r*   r)   r    r    r!   r+   Z  s   :zAdaLayerNormContinuous.forward)Tr
   Tr;   )
r,   r-   r.   r/   r0   r   r3   r4   r+   r5   r    r    r   r!   r}   3  s    $r}   c                       sV   e Zd Z					ddedededB f fdd	Zd
ejdejdejfddZ  ZS )LuminaLayerNormContinuousTr
   r;   Nr   r~   rx   c                    s   t    t | _tj|||d| _|dkr t||||| _n|dkr-t	|||d| _nt
d| d | _|d urFtj|||d| _d S d S )NrF   r;   r   r?   rI   r   )r   r   r   r   r   r   linear_1r   r   rm   rL   linear_2)r   r   r~   rI   r?   r>   rC   rx   r   r    r!   r   c  s   

z"LuminaLayerNormContinuous.__init__r"   r   r%   c                 C   sX   |  | ||j}|}| |d| d d d d d f  }| jd ur*| |}|S Nr   )r   r   r@   r9   r   r   )r   r"   r   r   r*   r    r    r!   r+     s   $

z!LuminaLayerNormContinuous.forward)Tr
   Tr;   N)	r,   r-   r.   r0   r   r3   r4   r+   r5   r    r    r   r!   r   b  s(    
 r   c                       sj   e Zd ZdZdedef fddZ	ddejdejd	ejdB d
eejejejejejf fddZ	  Z
S )%CogView3PlusAdaLayerNormZeroTextImager_   r   r'   c                    sR   t    t | _tj|d| dd| _tj|ddd| _tj|ddd| _	d S )N   TrF   Fr
   rH   )
r   r   r   r   r   r   r   r   norm_xnorm_c)r   r   r'   r   r    r!   r     s
   

z.CogView3PlusAdaLayerNormZeroTextImage.__init__Nr"   contextr   r%   c                 C   s   |  | |}|jddd\}}}}}}	}
}}}}}| |}| |}|d|d d d f   |d d d f  }|d|d d d f   |
d d d f  }|||||	|||||f
S )Nr   r   r&   )r   r   r(   r   r   )r   r"   r   r   rP   rQ   rR   rS   rT   rU   c_shift_msac_scale_msa
c_gate_msac_shift_mlpc_scale_mlp
c_gate_mlpnormed_xnormed_contextr    r    r!   r+     s(   

((z-CogView3PlusAdaLayerNormZeroTextImage.forwardr8   rh   r    r    r   r!   r     s    r   c                       sj   e Zd Z			ddedededededd	f fd
dZdejdejdejde	ejejf fddZ
  ZS )CogVideoXLayerNormZeroTr
   conditioning_dimr   rI   r?   r>   r%   Nc                    s@   t    t | _tj|d| |d| _tj|||d| _d S )Nr`   rF   r   )	r   r   r   r   r   r   r   r   r   )r   r   r   rI   r?   r>   r   r    r!   r     s   

zCogVideoXLayerNormZero.__init__rN   encoder_hidden_statesr$   c           
      C   s   |  | |jddd\}}}}}}	| |d| d d d d d f  |d d d d d f  }| |d| d d d d d f  |d d d d d f  }|||d d d d d f |	d d d d d f fS )Nr`   r   r&   rO   )
r   rN   r   r$   r)   r*   gate	enc_shift	enc_scaleenc_gater    r    r!   r+     s   &::0zCogVideoXLayerNormZero.forward)Tr
   T)r,   r-   r.   r0   r1   r2   r   r3   r4   r]   r+   r5   r    r    r   r!   r     s4    r   z>=z2.1.0c                       8   e Zd ZdZddededef fddZd	d
 Z  ZS )r   a  
        LayerNorm with the bias parameter.

        Args:
            dim (`int`): Dimensionality to use for the parameters.
            eps (`float`, defaults to 1e-5): Epsilon factor.
            elementwise_affine (`bool`, defaults to `True`):
                Boolean flag to denote if affine transformation should be applied.
            bias (`bias`, defaults to `True`): Boolean flag to denote if bias should be use.
        r
   Tr?   rI   r>   c                    sx   t    || _t|tjr|f}t|| _|r4t	
t|| _|r/t	
t|| _d S d | _d S d | _d | _d S r8   )r   r   r?   
isinstancenumbersIntegralr3   Sizer'   r   	Parameteronesr=   zerosr>   r   r'   r?   rI   r>   r   r    r!   r     s   
$
zLayerNorm.__init__c                 C   s   t || j| j| j| jS r8   )r:   r;   r'   r=   r>   r?   )r   inputr    r    r!   r+     s   zLayerNorm.forward)r
   TT	r,   r-   r.   r/   r2   r1   r   r+   r5   r    r    r   r!   r     s    r   c                       r   )rm   a  
    RMS Norm as introduced in https://huggingface.co/papers/1910.07467 by Zhang et al.

    Args:
        dim (`int`): Number of dimensions to use for `weights`. Only effective when `elementwise_affine` is True.
        eps (`float`): Small value to use when calculating the reciprocal of the square-root.
        elementwise_affine (`bool`, defaults to `True`):
            Boolean flag to denote if affine transformation should be applied.
        bias (`bool`, defaults to False): If also training the `bias` param.
    TFr?   rI   r>   c                    sx   t    || _|| _t|tjr|f}t|| _	d | _
d | _|r8tt|| _
|r:tt|| _d S d S d S r8   )r   r   r?   rI   r   r   r   r3   r   r'   r=   r>   r   r   r   r   r   r   r    r!   r   
  s   
zRMSNorm.__init__c                 C   s   t  r5dd l}| jd ur| jjtjtjfv r|| jj}|j|| j| j	dd }| j
d ur3|| j
 }|S |j}|tjdjddd}|t|| j	  }| jd urw| jjtjtjfv rf|| jj}|| j }| j
d uru|| j
 }|S ||}|S )Nr   )epsilonr   Tkeepdim)r   	torch_npur=   r9   r3   float16bfloat16r@   npu_rms_normr?   r>   float32powmeanrsqrt)r   rN   r   input_dtypevariancer    r    r!   r+     s*   







zRMSNorm.forward)TFr   r    r    r   r!   rm     s    rm   c                       s0   e Zd Zddedef fddZdd Z  ZS )	MochiRMSNormTr?   rI   c                    sR   t    || _t|tjr|f}t|| _|r$t	
t|| _d S d | _d S r8   )r   r   r?   r   r   r   r3   r   r'   r   r   r   r=   )r   r'   r?   rI   r   r    r!   r   =  s   

zMochiRMSNorm.__init__c                 C   sX   |j }|tjdjddd}|t|| j  }| jd ur%|| j }||}|S )Nr   r   Tr   )	r9   r@   r3   r   r   r   r   r?   r=   )r   rN   r   r   r    r    r!   r+   L  s   


zMochiRMSNorm.forward)T)r,   r-   r.   r2   r1   r   r+   r5   r    r    r   r!   r   <  s    r   c                       s(   e Zd ZdZ fddZdd Z  ZS )GlobalResponseNormz
    Global response normalization as introduced in ConvNeXt-v2 (https://huggingface.co/papers/2301.00808).

    Args:
        dim (`int`): Number of dimensions to use for the `gamma` and `beta`.
    c                    s>   t    ttddd|| _ttddd|| _d S r   )r   r   r   r   r3   r   gammabeta)r   r'   r   r    r!   r   a  s   
zGlobalResponseNorm.__init__c                 C   s@   t j|dddd}||jdddd  }| j||  | j | S )Nr   )r   r   T)pr'   r   r   )r'   r   rG   )r3   r   r   r   r   )r   r"   gxnxr    r    r!   r+   f  s   zGlobalResponseNorm.forward)r,   r-   r.   r/   r   r+   r5   r    r    r   r!   r   X  s    r   c                       sB   e Zd Zddededef fddZd	ejd
ejfddZ  Z	S )LpNormr   r   -q=r   r'   r?   c                    s    t    || _|| _|| _d S r8   )r   r   r   r'   r?   )r   r   r'   r?   r   r    r!   r   m  s   

zLpNorm.__init__rN   r%   c                 C   s   t j|| j| j| jdS )N)r   r'   r?   )r:   	normalizer   r'   r?   )r   rN   r    r    r!   r+   t  s   zLpNorm.forward)r   r   r   )
r,   r-   r.   r0   r2   r   r3   r4   r+   r5   r    r    r   r!   r   l  s    r   
batch_normr
   TrC   num_featuresr?   rI   r>   r%   c                 C   sf   | dkrt ||||d}|S | dkrtj||||d}|S | dkr+tj|||d}|S td| d)Nr   )r?   rI   r>   r;   r   )r?   affinez
norm_type=z is not supported.)rm   r   r   BatchNorm2drL   )rC   r   r?   rI   r>   r   r    r    r!   get_normalizationx  s   r   )r   Nr
   TT)&r   r3   torch.nnr   torch.nn.functional
functionalr:   utilsr   r   activationsr   
embeddingsr   r   Moduler	   r   r6   rB   r^   rf   ri   rn   rw   r}   r   r   r   rm   r   r   r   r\   r0   r2   r1   r   r    r    r    r!   <module>   sX   9"+ "&/1-
!>