o
    piA                     @   sj  d dl Z d dlmZmZmZ d dlZd dlmZ d dlm  m	Z
 ddlmZ ddlmZ ddlmZmZ G dd	 d	ejZG d
d dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZeddrejZn	G dd dejZG d d! d!ejZG d"d# d#ejZdS )$    N)DictOptionalTuple   )is_torch_version   )get_activation)CombinedTimestepLabelEmbeddings)PixArtAlphaCombinedTimestepSizeEmbeddingsc                       s|   e Zd ZdZ					ddedee dee d	ed
edef fddZ	dde	j
dee	j
 dee	j
 de	j
fddZ  ZS )AdaLayerNorma  
    Norm layer modified to incorporate timestep embeddings.

    Parameters:
        embedding_dim (`int`): The size of each embedding vector.
        num_embeddings (`int`, *optional*): The size of the embeddings dictionary.
        output_dim (`int`, *optional*):
        norm_elementwise_affine (`bool`, defaults to `False):
        norm_eps (`bool`, defaults to `False`):
        chunk_dim (`int`, defaults to `0`):
    NFh㈵>r   embedding_dimnum_embeddings
output_dimnorm_elementwise_affinenorm_eps	chunk_dimc                    sj   t    || _|p|d }|d urt||| _nd | _t | _t||| _	t
|d ||| _d S Nr   )super__init__r   nn	EmbeddingembSiLUsiluLinearlinear	LayerNormnorm)selfr   r   r   r   r   r   	__class__ \/home/ubuntu/SoloSpeech/.venv/lib/python3.10/site-packages/diffusers/models/normalization.pyr   ,   s   
	
zAdaLayerNorm.__init__xtimesteptembreturnc                 C   s   | j d ur
|  |}| | |}| jdkr7|jddd\}}|d d d d d f }|d d d d d f }n	|jddd\}}| |d|  | }|S )Nr   r   dimr   )r   r   r   r   chunkr   )r   r$   r%   r&   shiftscaler"   r"   r#   forwardC   s   


zAdaLayerNorm.forward)NNFr   r   )NN)__name__
__module____qualname____doc__intr   boolfloatr   torchTensorr-   __classcell__r"   r"   r    r#   r      s<    r   c                   @   s"   e Zd ZdejdejfddZdS )FP32LayerNorminputsr'   c                 C   sN   |j }t| | j| jd ur| j nd | jd ur| j nd | j|S N)	dtypeF
layer_normr4   normalized_shapeweightbiasepsto)r   r9   origin_dtyper"   r"   r#   r-   Y   s   zFP32LayerNorm.forwardN)r.   r/   r0   r5   r6   r-   r"   r"   r"   r#   r8   X   s    r8   c                       s   e Zd ZdZddedee f fddZ				dd	ejd
eej deej	 deej
 deej deejejejejejf fddZ  ZS )AdaLayerNormZero
    Norm layer adaptive layer norm zero (adaLN-Zero).

    Parameters:
        embedding_dim (`int`): The size of each embedding vector.
        num_embeddings (`int`): The size of the embeddings dictionary.
    Nr=   Tr   r   c                    s   t    |d urt||| _nd | _t | _tj|d| |d| _|dkr2tj	|ddd| _
d S |dkr@t|ddd| _
d S td	| d
)N   r@   r=   Fư>elementwise_affinerA   fp32_layer_norm)rJ   r@   Unsupported `norm_type` (@) provided. Supported ones are: 'layer_norm', 'fp32_layer_norm'.)r   r   r	   r   r   r   r   r   r   r   r   r8   
ValueError)r   r   r   	norm_typer@   r    r"   r#   r   m   s   


zAdaLayerNormZero.__init__r$   r%   class_labelshidden_dtyper   r'   c                 C   s   | j d ur| j |||d}| | |}|jddd\}}}}	}
}| |d|d d d f   |d d d f  }|||	|
|fS )N)rQ   rF   r   r(   )r   r   r   r*   r   )r   r$   r%   rP   rQ   r   	shift_msa	scale_msagate_msa	shift_mlp	scale_mlpgate_mlpr"   r"   r#   r-      s   
.zAdaLayerNormZero.forward)Nr=   T)NNNN)r.   r/   r0   r1   r2   r   r   r5   r6   
LongTensorr;   r   r-   r7   r"   r"   r    r#   rD   d   s(    rD   c                       sb   e Zd ZdZddef fddZ	ddejd	eej d
e	ejejejejejf fddZ
  ZS )AdaLayerNormZeroSinglerE   r=   Tr   c                    sX   t    t | _tj|d| |d| _|dkr$tj|ddd| _d S t	d| d)	N   rG   r=   FrH   rI   rL   rM   )
r   r   r   r   r   r   r   r   r   rN   )r   r   rO   r@   r    r"   r#   r      s   


zAdaLayerNormZeroSingle.__init__Nr$   r   r'   c                 C   sZ   |  | |}|jddd\}}}| |d|d d d f   |d d d f  }||fS )NrZ   r   r(   r   r   r*   r   )r   r$   r   rR   rS   rT   r"   r"   r#   r-      s   .zAdaLayerNormZeroSingle.forward)r=   Tr:   )r.   r/   r0   r1   r2   r   r5   r6   r   r   r-   r7   r"   r"   r    r#   rY      s    rY   c                       sd   e Zd ZdZdededef fddZ	ddej	d	e
ej	 d
eej	ej	ej	ej	f fddZ  ZS )LuminaRMSNormZeroz
    Norm layer adaptive RMS normalization zero.

    Parameters:
        embedding_dim (`int`): The size of each embedding vector.
    r   r   r   c                    sD   t    t | _tjt|dd| dd| _t|||d| _	d S )Ni      TrG   rA   rJ   )
r   r   r   r   r   r   minr   RMSNormr   )r   r   r   r   r    r"   r#   r      s   

zLuminaRMSNormZero.__init__Nr$   r   r'   c                 C   sP   |  | |}|jddd\}}}}| |d|d d d f   }||||fS )Nr]   r   r(   r[   )r   r$   r   rS   rT   rV   rW   r"   r"   r#   r-      s   zLuminaRMSNormZero.forwardr:   )r.   r/   r0   r1   r2   r4   r3   r   r5   r6   r   r   r-   r7   r"   r"   r    r#   r\      s    r\   c                       s   e Zd ZdZddedef fddZ			ddejd	e	e
eejf  d
e	e de	ej deejejejejejf f
ddZ  ZS )AdaLayerNormSingleaL  
    Norm layer adaptive layer norm single (adaLN-single).

    As proposed in PixArt-Alpha (see: https://arxiv.org/abs/2310.00426; Section 2.3).

    Parameters:
        embedding_dim (`int`): The size of each embedding vector.
        use_additional_conditions (`bool`): To use additional conditions for normalization or not.
    Fr   use_additional_conditionsc                    sB   t    t||d |d| _t | _tj|d| dd| _d S )NrZ   )size_emb_dimrb   rF   TrG   )	r   r   r
   r   r   r   r   r   r   )r   r   rb   r    r"   r#   r      s   


zAdaLayerNormSingle.__init__Nr%   added_cond_kwargs
batch_sizerQ   r'   c                 C   s0   | j |fi |||d}| | ||fS )N)re   rQ   )r   r   r   )r   r%   rd   re   rQ   embedded_timestepr"   r"   r#   r-      s   zAdaLayerNormSingle.forward)F)NNN)r.   r/   r0   r1   r2   r3   r   r5   r6   r   r   strr;   r   r-   r7   r"   r"   r    r#   ra      s"    
ra   c                       sZ   e Zd ZdZ	ddedededee def
 fd	d
Zde	j
de	j
de	j
fddZ  ZS )AdaGroupNorma  
    GroupNorm layer modified to incorporate timestep embeddings.

    Parameters:
        embedding_dim (`int`): The size of each embedding vector.
        num_embeddings (`int`): The size of the embeddings dictionary.
        num_groups (`int`): The number of groups to separate the channels into.
        act_fn (`str`, *optional*, defaults to `None`): The activation function to use.
        eps (`float`, *optional*, defaults to `1e-5`): The epsilon value to use for numerical stability.
    Nr   r   out_dim
num_groupsact_fnrA   c                    sF   t    || _|| _|d u rd | _nt|| _t||d | _d S r   )	r   r   rj   rA   actr   r   r   r   )r   r   ri   rj   rk   rA   r    r"   r#   r      s   

zAdaGroupNorm.__init__r$   r   r'   c                 C   sl   | j r|  |}| |}|d d d d d d f }|jddd\}}tj|| j| jd}|d|  | }|S )Nr   r   r(   )rA   )rl   r   r*   r<   
group_normrj   rA   )r   r$   r   r,   r+   r"   r"   r#   r-   	  s   

zAdaGroupNorm.forward)Nr   )r.   r/   r0   r1   r2   r   rg   r4   r   r5   r6   r-   r7   r"   r"   r    r#   rh      s    $rh   c                       sL   e Zd Z				ddedef fddZdejd	ejd
ejfddZ  ZS )AdaLayerNormContinuousTr   r=   r   conditioning_embedding_dimc                    sn   t    t | _tj||d |d| _|dkr#t||||| _d S |dkr0t	|||| _d S t
d| )Nr   rG   r=   rms_normunknown norm_type )r   r   r   r   r   r   r   r   r   r`   rN   )r   r   ro   rJ   rA   r@   rO   r    r"   r#   r     s   

zAdaLayerNormContinuous.__init__r$   conditioning_embeddingr'   c                 C   sj   |  | ||j}tj|ddd\}}| |d| d d d d d f  |d d d d d f  }|S )Nr   r   r(   )r   r   rB   r;   r5   r*   r   )r   r$   rr   r   r,   r+   r"   r"   r#   r-   .  s   :zAdaLayerNormContinuous.forward)Tr   Tr=   )	r.   r/   r0   r2   r   r5   r6   r-   r7   r"   r"   r    r#   rn     s    
$rn   c                       sV   e Zd Z					ddededee f fdd	Zd
ejdejdejfddZ  Z	S )LuminaLayerNormContinuousTr   r=   Nr   ro   ri   c                    sp   t    t | _tj|||d| _|dkr t||||| _nt	d| |d ur6tj|||d| _
d S d S )NrG   r=   rq   )r   r   r   r   r   r   linear_1r   r   rN   linear_2)r   r   ro   rJ   rA   r@   rO   ri   r    r"   r#   r   7  s   

z"LuminaLayerNormContinuous.__init__r$   rr   r'   c                 C   sX   |  | ||j}|}| |d| d d d d d f  }| jd ur*| |}|S Nr   )rt   r   rB   r;   r   ru   )r   r$   rr   r   r,   r"   r"   r#   r-   V  s   $

z!LuminaLayerNormContinuous.forward)Tr   Tr=   N)
r.   r/   r0   r2   r   r   r5   r6   r-   r7   r"   r"   r    r#   rs   6  s(    
rs   c                       sj   e Zd Z			ddedededededd	f fd
dZdejdejdejde	ejejf fddZ
  ZS )CogVideoXLayerNormZeroTr   conditioning_dimr   rJ   rA   r@   r'   Nc                    s@   t    t | _tj|d| |d| _tj|||d| _d S )NrF   rG   r^   )	r   r   r   r   r   r   r   r   r   )r   rx   r   rJ   rA   r@   r    r"   r#   r   g  s   

zCogVideoXLayerNormZero.__init__hidden_statesencoder_hidden_statesr&   c           
      C   s   |  | |jddd\}}}}}}	| |d| d d d d d f  |d d d d d f  }| |d| d d d d d f  |d d d d d f  }|||d d d d d f |	d d d d d f fS )NrF   r   r(   r[   )
r   ry   rz   r&   r+   r,   gate	enc_shift	enc_scaleenc_gater"   r"   r#   r-   u  s   &::0zCogVideoXLayerNormZero.forward)Tr   T)r.   r/   r0   r2   r3   r4   r   r5   r6   r   r-   r7   r"   r"   r    r#   rw   f  s4    rw   z>=z2.1.0c                       s4   e Zd Zd
dededef fddZdd	 Z  ZS )r   r   TrA   rJ   r@   c                    sx   t    || _t|tjr|f}t|| _|r4t	
t|| _|r/t	
t|| _d S d | _d S d | _d | _d S r:   )r   r   rA   
isinstancenumbersIntegralr5   Sizer)   r   	Parameteronesr?   zerosr@   )r   r)   rA   rJ   r@   r    r"   r#   r     s   
$
zLayerNorm.__init__c                 C   s   t || j| j| j| jS r:   )r<   r=   r)   r?   r@   rA   )r   inputr"   r"   r#   r-     s   zLayerNorm.forward)r   TTr.   r/   r0   r4   r3   r   r-   r7   r"   r"   r    r#   r     s    r   c                       s0   e Zd Zddedef fddZdd Z  ZS )	r`   TrA   rJ   c                    sR   t    || _t|tjr|f}t|| _|r$t	
t|| _d S d | _d S r:   )r   r   rA   r   r   r   r5   r   r)   r   r   r   r?   )r   r)   rA   rJ   r    r"   r#   r     s   

zRMSNorm.__init__c                 C   s~   |j }|tjdjddd}|t|| j  }| jd ur8| jj tj	tj
fv r1|| jj }|| j }|S ||}|S )Nr   T)keepdim)r;   rB   r5   float32powmeanrsqrtrA   r?   float16bfloat16)r   ry   input_dtypevariancer"   r"   r#   r-     s   


zRMSNorm.forward)Tr   r"   r"   r    r#   r`     s    r`   c                       s$   e Zd Z fddZdd Z  ZS )GlobalResponseNormc                    s>   t    ttddd|| _ttddd|| _d S rv   )r   r   r   r   r5   r   gammabeta)r   r)   r    r"   r#   r     s   
zGlobalResponseNorm.__init__c                 C   s@   t j|dddd}||jdddd  }| j||  | j | S )Nr   )r   r   T)pr)   r   r   )r)   r   rH   )r5   r   r   r   r   )r   r$   gxnxr"   r"   r#   r-     s   zGlobalResponseNorm.forward)r.   r/   r0   r   r-   r7   r"   r"   r    r#   r     s    r   ) r   typingr   r   r   r5   torch.nnr   torch.nn.functional
functionalr<   utilsr   activationsr   
embeddingsr	   r
   Moduler   r   r8   rD   rY   r\   ra   rh   rn   rs   rw   r`   r   r"   r"   r"   r#   <module>   s.   9+ !&!0
 