o
    ϯi                     @   sP   d dl Z d dlmZ d dlZd dlmZ G dd dejZG dd dejZdS )    N)Tuple)nnc                       sF   e Zd ZdZddededdf fddZd	ejdejfd
dZ  Z	S )PositionalEncodinga?  
    Applies a positional encoding to a tensor with shape (batch_size x seq_len x embed_dim).

    The positional encoding is computed as follows:
        PE(pos,2i) = sin(pos/10000^(2i/dmodel))
        PE(pos,2i+1) = cos(pos/10000^(2i/dmodel))

        where pos = position, pos in [0, seq_len)
        dmodel = data embedding dimension = embed_dim
        i = dimension index, i in [0, embed_dim)

    Reference: "Attention Is All You Need" https://arxiv.org/abs/1706.03762
    Implementation Reference: https://pytorch.org/tutorials/beginner/transformer_tutorial.html
       	embed_dimseq_lenreturnNc                    s   t    tj||tjd}tjd|tjdd}ttd|d t	d |  }t
|| |d d dd df< t|| |d d dd df< |d}| d| d S )N)dtyper         g     @pe)super__init__torchzerosfloatarange	unsqueezeexpmathlogsincosregister_buffer)selfr   r   r   positiondiv_term	__class__ [/home/ubuntu/.local/lib/python3.10/site-packages/pytorchvideo/layers/positional_encoding.pyr      s   
   
zPositionalEncoding.__init__xc                 C   s\   | j d|dksJ d| j   d|   || j d d d |dd d f  S )Nr
   z'Cannot apply position encoding of size z when input has size )r   size)r   r!   r   r   r    forward&   s   &zPositionalEncoding.forward)r   )
__name__
__module____qualname____doc__intr   r   Tensorr#   __classcell__r   r   r   r    r   
   s    r   c                       sh   e Zd ZdZ		ddedeeeef dededd	f
 fd
dZedd Z	de
jde
jfddZ  ZS )#SpatioTemporalClsPositionalEncodingzJ
    Add a cls token and apply a spatiotemporal encoding to a tensor.
    FTr   patch_embed_shapesep_pos_embedhas_clsr   Nc                    s   t    t|dksJ d|| _|| _|| _|d |d  | _|d | _| jr=t	t
dd|| _| j| j d }n| j| j }| jrpt	t
d| j|| _t	t
d| j|| _| jrnt	t
dd|| _dS dS t	t
d||| _dS )ap  
        Args:
            embed_dim (int): Embedding dimension for input sequence.
            patch_embed_shape (Tuple): The number of patches in each dimension
                (T, H, W) after patch embedding.
            sep_pos_embed (bool): If set to true, one positional encoding is used for
                spatial patches and another positional encoding is used for temporal
                sequence. Otherwise, only one positional encoding is used for all the
                patches.
            has_cls (bool): If set to true, a cls token is added in the beginning of each
                input sequence.
           z5Patch_embed_shape should be in the form of (T, H, W).r
   r   r   N)r   r   lencls_embed_onr-   _patch_embed_shapenum_spatial_patchnum_temporal_patchr   	Parameterr   r   	cls_tokenpos_embed_spatialpos_embed_temporalpos_embed_class	pos_embed)r   r   r,   r-   r.   num_patchesr   r   r    r   3   s0   

z,SpatioTemporalClsPositionalEncoding.__init__c                 C   s   | j S )N)r2   )r   r   r   r    r,   b   s   z5SpatioTemporalClsPositionalEncoding.patch_embed_shaper!   c                 C   s   |j \}}}| jr| j|dd}tj||fdd}| jrB| jd| j	dtj
| j| jdd }| jr<t| j|gd}|| }|S || j }|S )zC
        Args:
            x (torch.Tensor): Input tensor.
        r
   )dim)shaper1   r6   expandr   catr-   r7   repeatr4   repeat_interleaver8   r3   r9   r:   )r   r!   BNC
cls_tokensr:   r   r   r    r#   f   s(   
z+SpatioTemporalClsPositionalEncoding.forward)FT)r$   r%   r&   r'   r(   r   boolr   propertyr,   r   r)   r#   r*   r   r   r   r    r+   .   s$    /
r+   )r   typingr   r   r   Moduler   r+   r   r   r   r    <module>   s   $