o
    }oiD                     @   s   d dl Z d dlmZmZmZ d dlZd dlZd dlm	  m
Z d dlmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lm	Z	 G d
d deZdd ZG dd deZG dd deZdS )    N)DictLiteralOptional)TimestepEmbeddingget_3d_sincos_pos_embed)	rearrange)	Rearrange)parallel_state)get_pos_emb_on_this_cp_rank)MegatronModule)nnc                       sF   e Zd ZdZddedef fddZdejdejf fd	d
Z  Z	S )ParallelTimestepEmbeddinga  
    ParallelTimestepEmbedding is a subclass of TimestepEmbedding that initializes
    the embedding layers with an optional random seed for syncronization.

    Args:
        in_channels (int): Number of input channels.
        time_embed_dim (int): Dimension of the time embedding.
        seed (int, optional): Random seed for initializing the embedding layers.
                              If None, no specific seed is set.

    Attributes:
        linear_1 (nn.Module): First linear layer for the embedding.
        linear_2 (nn.Module): Second linear layer for the embedding.

    Methods:
        __init__(in_channels, time_embed_dim, seed=None): Initializes the embedding layers.
    Nin_channelstime_embed_dimc                    s   t  j||d |d ur0tj  t| | j  | j  W d    n1 s+w   Y  t	
 dkrXt| jjdd t| jjdd t| jjdd t| jjdd d S d S )N)r   r      pipeline_parallelT)super__init__torchrandomfork_rngmanual_seedlinear_1reset_parameterslinear_2r	   &get_pipeline_model_parallel_world_sizesetattrweightbias)selfr   r   seed	__class__ h/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/diffusion/models/dit/dit_embeddings.pyr   2   s   

z"ParallelTimestepEmbedding.__init__xreturnc                    s   t  |jtjddS )z
        Computes the positional embeddings for the input tensor.

        Args:
            x (torch.Tensor): Input tensor of shape (B, T, H, W, C).

        Returns:
            torch.Tensor: Positional embeddings of shape (B, T, H, W, C).
        Tnon_blocking)r   forwardtor   bfloat16)r   r%   r!   r#   r$   r)   @   s   
z!ParallelTimestepEmbedding.forwardN
__name__
__module____qualname____doc__intr   r   Tensorr)   __classcell__r#   r#   r!   r$   r      s    "r   c                 C   s   t  }t  }tj|gdddjdd}| jg | jd| |d| j|d d R  } | ||} | jg | jd| d| j|d d R  } | S )	as  
    Adjusts the positional embeddings tensor to the current context parallel rank.

    Args:
        pos_emb (torch.Tensor): The positional embeddings tensor.
        seq_dim (int): The sequence dimension index in the positional embeddings tensor.

    Returns:
        torch.Tensor: The adjusted positional embeddings tensor for the current context parallel rank.
    cpuT)device
pin_memoryr'   Nr      )	r	   get_context_parallel_world_sizeget_context_parallel_rankr   tensorcudaviewshapeindex_select)pos_embseq_dimcp_sizecp_rankcp_idxr#   r#   r$   r
   M   s   40r
   c                       sD   e Zd ZdZ		ddededef fddZdejfd	d
Z  Z	S )SinCosPosEmb3DaH  
    SinCosPosEmb3D is a 3D sine-cosine positional embedding module.

    Args:
        model_channels (int): Number of channels in the model.
        h (int): Length of the height dimension.
        w (int): Length of the width dimension.
        t (int): Length of the temporal dimension.
        spatial_interpolation_scale (float, optional): Scale factor for spatial interpolation. Default is 1.0.
        temporal_interpolation_scale (float, optional): Scale factor for temporal interpolation. Default is 1.0.

    Methods:
        forward(pos_ids: torch.Tensor) -> torch.Tensor:
            Computes the positional embeddings for the input tensor.

            Args:
                pos_ids (torch.Tensor): Input tensor of shape (B S 3).

            Returns:
                torch.Tensor: Positional embeddings of shape (B S D).
          ?hwtc                    sv   t  j|d || _|| _|| _t|j||g|||}t|d}tj	
|jd |j| _tj	jt|dd| j_d S )Nconfigzt hw c -> (t hw) cr   F)requires_grad)r   r   rH   rI   rJ   r   hidden_sizer   r   r   	Embeddingr?   pos_embedding	Parameterr<   r   )r   rL   rH   rI   rJ   spatial_interpolation_scaletemporal_interpolation_scaleparamr!   r#   r$   r   x   s   	
zSinCosPosEmb3D.__init__pos_idsc                 C   s4   |d | j  | j |d | j  |d  }| |S N).r   ).r   ).r9   )rH   rI   rP   )r   rU   pos_idr#   r#   r$   r)      s   *
zSinCosPosEmb3D.forward)rG   rG   r-   r#   r#   r!   r$   rF   a   s    rF   c                       sJ   e Zd Zdededef fddZdd Zdd	 Zd
ejfddZ	  Z
S )FactorizedLearnable3DEmbeddingrJ   rH   rI   c                    s   t  j|d tj||j| _tj||j| _tj||j| _d|	 v r]|d }tj
 " t| |jr?|   n|   W d    d S W d    d S 1 sVw   Y  d S |jrf|   d S d S )NrK   r    )r   r   r   r   rO   rN   emb_temb_hemb_wkeysr   r   r   perform_initializationcustomize_init_paramr   )r   rL   rJ   rH   rI   kwargsr    r!   r#   r$   r      s"   


"z'FactorizedLearnable3DEmbedding.__init__c                 C   s4   | j | jj | j | jj | j | jj d S r,   )rL   init_methodrY   r   rZ   r[   r   r#   r#   r$   r^      s   z3FactorizedLearnable3DEmbedding.customize_init_paramc                 C   s"   | j   | j  | j  d S r,   )rY   r   rZ   r[   ra   r#   r#   r$   r      s   

z/FactorizedLearnable3DEmbedding.reset_parametersrU   c                 C   s*   |  |d | |d  | |d  S rV   )rY   rZ   r[   )r   rU   r#   r#   r$   r)      s   *z&FactorizedLearnable3DEmbedding.forward)r.   r/   r0   r2   r   r^   r   r   r3   r)   r4   r#   r#   r!   r$   rX      s    rX   )mathtypingr   r   r   numpynpr   torch.nn.functionalr   
functionalFdiffusers.models.embeddingsr   r   einopsr   einops.layers.torchr   megatron.corer	   ;megatron.core.models.common.embeddings.rotary_pos_embeddingr
    megatron.core.transformer.moduler   r   rF   rX   r#   r#   r#   r$   <module>   s    .2