o
    }oi                     @   s   d dl Z d dlZd dlmZmZ dejdededejfddZG d	d
 d
ejZG dd dejZ		 		ddejdede	de
de
defddZG dd dejZG dd dejZdS )    N)Tensornnposdimthetareturnc                 C   sR   |d dks
J dt jd|dt j| jd| }d||  }t d| |}| S )z
    Different from the original ROPE used for flux.
    Megatron attention takes the out product and calculate sin/cos inside, so we only need to get the freqs here
    in the shape of [seq, ..., dim]
       r   zThe dimension must be even.)dtypedeviceg      ?z...n,d->...nd)torcharangefloat64r
   einsumfloat)r   r   r   scaleomegaout r   a/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/diffusion/models/flux/layers.pyrope   s
   r   c                       sH   e Zd ZdZdededee f fddZdejdejfd	d
Z	  Z
S )EmbedNDz;
    Generate Rope matrix with preset axes dimensions.
    r   r   axes_dimc                    s    t    || _|| _|| _d S N)super__init__r   r   r   )selfr   r   r   	__class__r   r   r   )   s   

zEmbedND.__init__idsr   c                    sn    j d }tj fddt|D dd}|ddddd}tj||gddjg |j d d dR  S )	Nc                    s(   g | ]}t  d |f j| jqS ).)r   r   r   ).0ir   r   r   r   
<listcomp>4   s   ( z#EmbedND.forward.<locals>.<listcomp>r      r   r      )shaper   catrange	unsqueezepermutestackreshape)r   r   n_axesembr   r"   r   forward0   s   
,zEmbedND.forward)__name__
__module____qualname____doc__intlistr   r   r   r0   __classcell__r   r   r   r   r   $   s    r   c                       s<   e Zd ZdZdedef fddZdedefdd	Z  ZS )
MLPEmbedderzF
    MLP embedder with two projection layers and Silu in between.
    in_dim
hidden_dimc                    s<   t    tj||dd| _t | _tj||dd| _d S )NT)bias)r   r   r   Linearin_layerSiLUsilu	out_layer)r   r9   r:   r   r   r   r   @   s   

zMLPEmbedder.__init__xr   c                 C   s   |  | | |S r   )r@   r?   r=   )r   rA   r   r   r   r0   G   s   zMLPEmbedder.forward)	r1   r2   r3   r4   r5   r   r   r0   r7   r   r   r   r   r8   ;   s    r8   Tr%   '  	timestepsembedding_dimflip_sin_to_cosdownscale_freq_shiftr   
max_periodc           	      C   s   t | jdksJ d|d }t| tjd|tj| jd }|||  }t|}| dddf 	 |dddf  }|| }tj
t|t|gdd}|rktj
|dd|df |ddd|f gdd}|d dkrytjj|d	}|S )
a&  
    This matches the implementation in Denoising Diffusion Probabilistic Models: Create sinusoidal timestep embeddings.

    Args
        timesteps (torch.Tensor):
            a 1-D Tensor of N indices, one per batch element. These may be fractional.
        embedding_dim (int):
            the dimension of the output.
        flip_sin_to_cos (bool):
            Whether the embedding order should be `cos, sin` (if True) or `sin, cos` (if False)
        downscale_freq_shift (float):
            Controls the delta between frequencies between dimensions
        scale (float):
            Scaling factor applied to the embeddings.
        max_period (int):
            Controls the maximum frequency of the embeddings
    Returns
        torch.Tensor: an [N x dim] Tensor of positional embeddings.
    r%   zTimesteps should be a 1d-arrayr   r   )startendr	   r
   Nr   r$   )r   r%   r   r   )lenr'   mathlogr   r   float32r
   expr   r(   sincosr   
functionalpad)	rC   rD   rE   rF   r   rG   half_dimexponentr/   r   r   r   get_timestep_embeddingL   s   
$2rU   c                       sR   e Zd Z				ddedededed	ef
 fd
dZdejdejfddZ	  Z
S )	TimestepsTr   r%   rB   rD   rE   rF   r   rG   c                    s,   t    || _|| _|| _|| _|| _d S r   )r   r   rD   rE   rF   r   rG   )r   rD   rE   rF   r   rG   r   r   r   r      s   

zTimesteps.__init__rC   r   c                 C   s"   t || j| j| j| j| jd}|S )N)rE   rF   r   rG   )rU   rD   rE   rF   r   rG   )r   rC   t_embr   r   r   r0      s   zTimesteps.forwardTr   r%   rB   )r1   r2   r3   r5   boolr   r   r   r   r0   r7   r   r   r   r   rV      s"    rV   c                       sZ   e Zd ZdZ				ddededed	ed
edef fddZdej	dej	fddZ
  ZS )TimeStepEmbeddera  
    A neural network module that embeds timesteps for use in models such as diffusion models.
    It projects the input timesteps to a higher-dimensional space and then embeds them using
    an MLP (Multilayer Perceptron). The projection and embedding provide a learned representation
    of the timestep that can be used in further computations.

    Args:
        embedding_dim (int):
            The dimensionality of the timestep embedding space.
        hidden_dim (int):
            The dimensionality of the hidden layer in the MLPEmbedder.
        flip_sin_to_cos (bool, optional):
            Whether to flip the sine and cosine components during the projection (default is True).
        downscale_freq_shift (float, optional):
            A scaling factor for the frequency shift during the projection (default is 0).
        scale (float, optional):
            A scaling factor applied to the timestep projections (default is 1).
        max_period (int, optional):
            The maximum period for the sine and cosine functions used in projection (default is 10000).

    Methods:
        forward: Takes a tensor of timesteps and returns their embedded representation.
    Tr   r%   rB   rD   r:   rE   rF   r   rG   c                    s0   t    t|||||d| _t||d| _d S )N)rD   rE   rF   r   rG   )r9   r:   )r   r   rV   	time_projr8   time_embedder)r   rD   r:   rE   rF   r   rG   r   r   r   r      s   

zTimeStepEmbedder.__init__rC   r   c                 C   s   |  |}| |}|S r   )r[   r\   )r   rC   timesteps_projtimesteps_embr   r   r   r0      s   

zTimeStepEmbedder.forwardrX   )r1   r2   r3   r4   r5   rY   r   r   r   r   r0   r7   r   r   r   r   rZ      s(    rZ   rX   )rK   r   r   r   r5   r   Moduler   r8   rY   r   rU   rV   rZ   r   r   r   r   <module>   s2   
7