o
    ϯi2                     @   s   d dl mZmZmZ d dlZd dlmZ d dlmZ d dlm	Z	 	 G dd dejj
ZG dd	 d	ej
ZG d
d dej
ZG dd dej
ZG dd dej
ZG dd dejZG dd dej
ZdS )    )ListOptionalTupleN)set_attributes)nn)pack_padded_sequencec                       sJ   e Zd ZdZdef fddZ	ddejdeej dejfd	d
Z	  Z
S )MaskedTemporalPoolingz}
    Applies temporal pooling operations on masked inputs. For each pooling operation
    all masked values are ignored.
    methodc                    s    t    |dv sJ || _dS )a  
        method (str): the method of pooling to use. Options:
            'max': reduces temporal dimension to each valid max value.
            'avg': averages valid values in the temporal dimension.
            'sum': sums valid values in the temporal dimension.
            Note if all batch row elements are invalid, the temporal dimension is
            pooled to 0 values.
        )maxavgsumN)super__init___method)selfr	   	__class__ Z/home/ubuntu/.local/lib/python3.10/site-packages/pytorchvideo/models/masked_multistream.pyr   )   s   
	
zMaskedTemporalPooling.__init__Nxmaskreturnc                 C   sZ  |  dks
J d|jd |jd }}|du r#tj||ftjd}| jdkrQtd|| ddf< ||d	jd	d
 }d||ddf< tj	|dd
d }|S | jdkr||
d	  }|||d	jd	d
}| jd	d
 }|jdd
}||jdd
d	|  }|S | jdkr||
d	  }|jdd
}|S t| j d)aA  
        Args:
            x (torch.Tensor): tensor with shape (batch_size, seq_len, feature_dim)
            mask (torch.Tensor): bool tensor with shape (batch_size, seq_len).
                Sequence elements that are False are invalid.

        Returns:
            Tensor with shape (batch_size, feature_dim)
           5Requires x shape (batch_size x seq_len x feature_dim)r      Ndtyper
   z-infdimr   )minr   z/ not available options are: 'max', 'avg', 'sum')r   shapetorchonesboolr   floatviewanyr
   	unsqueezer   intdivclampexpandsizeNotImplementedError)r   r   r   btinvalid_first_dimvalid_lengthsr   r   r   forward6   s0   

&	

zMaskedTemporalPooling.forwardN)__name__
__module____qualname____doc__strr   r"   Tensorr   r3   __classcell__r   r   r   r   r   #   s    r   c                       sh   e Zd ZdZddedef fddZedeej	 fdd	Z
	
ddej	deej	 dej	fddZ  ZS )TransposeMultiheadAttentiona
  
    Wrapper for nn.MultiheadAttention which first transposes the input tensor
    from (batch_size, seq_len, feature_dim) to (seq_length, batch_size, feature_dim),
    then applies the attention and transposes the attention outputs back to the input
    shape.
    r   feature_dim	num_headsc                    s$   t    tj||d| _d| _dS )z
        Args:
            feature_dim (int): attention embedding dimension
            num_heads (int): number of attention heads
        )	embed_dimr>   N)r   r   r   MultiheadAttention
_attention_attention_weights)r   r=   r>   r   r   r   r   h   s
   

z$TransposeMultiheadAttention.__init__r   c                 C   s   | j S )zD
        Contains attention weights from last forward call.
        )rB   )r   r   r   r   attention_weightst   s   z-TransposeMultiheadAttention.attention_weightsNr   r   c                 C   sf   |  dks
J d|durd|dddf< | }|dd}| j||||d\}| _|dd}|S )aH  
        Args:
            x (torch.Tensor): tensor of shape (batch_size, seq_len, feature_dim)
            mask (torch.Tensor): bool tensor with shape (batch_size, seq_len).
                Sequence elements that are False are invalid.

        Returns:
            Tensor with shape (batch_size, seq_len, feature_dim)
        r   r   NTr   r   )key_padding_mask)r   	transposerA   rB   )r   r   r   attn_outputr   r   r   r3   {   s   z#TransposeMultiheadAttention.forward)r   r4   )r5   r6   r7   r8   r)   r   propertyr   r"   r:   rC   r3   r;   r   r   r   r   r<   `   s    r<   c                       sN   e Zd ZdZ	ddededef fddZd	ej	d
ej	dej	fddZ
  ZS )LearnMaskedDefaultaU  
    Learns default values to fill invalid entries within input tensors. The
    invalid entries are represented by a mask which is passed into forward alongside
    the input tensor. Note the default value is only used if all entries in the batch row are
    invalid rather than just a portion of invalid entries within each batch row.
    gaussianFr=   init_methodfreezec                    sn   t    |dkrtjt|| d| _dS |dkr0tjt|| d| _tj	| j dS t
| d)al  
        Args:
            feature_dim (int): the size of the default value parameter, this must match the
                input tensor size.
            init_method (str): the initial default value parameter. Options:
                'guassian'
                'zeros'
            freeze (bool): If True, the learned default parameter weights are frozen.
        zeros)requires_gradrI   z2 not available. Options are: 'zeros' or 'gaussian'N)r   r   r   	Parameterr"   rL   _learned_defaultsr:   initnormal_r.   )r   r=   rJ   rK   r   r   r   r      s   
zLearnMaskedDefault.__init__r   r   r   c                 C   sZ   | |jd djdd}td| D ]}||}q||  | jd|    }|S )a  
        Args:
            x (torch.Tensor): tensor of shape (batch_size, feature_dim).
            mask (torch.Tensor): bool tensor of shape (batch_size, seq_len) If all elements
                in the batch dimension are False the learned default parameter is used for
                that batch element.

        Returns:
            Tensor with shape (batch_size, feature_dim)
        r   r   r   r   )r&   r!   r'   ranger   r(   r%   rO   )r   r   r   ir   r   r   r3      s
   zLearnMaskedDefault.forward)rI   F)r5   r6   r7   r8   r)   r9   r$   r   r"   r:   r3   r;   r   r   r   r   rH      s    $rH   c                	       s\   e Zd ZdZ		ddedededef fdd	Z	
ddej	de
ej	 dej	fddZ  ZS )LSTMz?
    Wrapper for torch.nn.LSTM that handles masked inputs.
            Fdim_in
hidden_dimdropoutbidirectionalc                    sF   t    tj||d||d| _| j  |rd| n|| _|| _dS )z
        Args:
          dim_in (int): input feature dimension
          hidden_dim (int): hidden dimesion of lstm layer
          dropout (float): dropout rate - 0.0 if no dropout
          bidirectional (bool): bidirectional or forward only
        T)batch_firstrX   rY      N)r   r   r   rT   lstmflatten_parameters
output_dimrY   )r   rV   rW   rX   rY   r   r   r   r      s   


zLSTM.__init__Ndatar   r   c           
      C   s   |  dksJ |jd |jd }}|du r!tj||ftjd}|jdd}t||d|dddd	}| 	|\}\}}| j
r_tj|dddddf |dddddf gd
d}	|	S |d
ddddf }	|	S )a  
        Args:
            data (torch.Tensor): tensor with shape (batch_size, seq_len, feature_dim)
            mask (torch.Tensor): bool tensor with shape (batch_size, seq_len).
                Sequence elements that are False are invalid.

        Returns:
            Tensor with shape (batch_size, output_dim) - outoput_dim is determined by
                hidden_dim and whether bidirectional or not
        r   r   r   Nr   )axisTF)rZ   enforce_sortedr   r   )r   r!   r"   r#   r$   r   r   r+   r-   r\   rY   cat)
r   r_   r   r/   r0   lengthsx_packed_houtr   r   r   r3      s"   6zLSTM.forward)rU   Fr4   )r5   r6   r7   r8   r)   r%   r$   r   r"   r:   r   r3   r;   r   r   r   r   rT      s*    rT   c                       sX   e Zd ZdZ		ddededef fddZ	dd	ejd
eej dejfddZ	  Z
S )TransposeTransformerEncoderzM
    Wrapper for torch.nn.TransformerEncoder that handles masked inputs.
    r   rV   r>   
num_layersc                    s$   t    tt|||| _dS )z
        Args:
          dim_in (int): input feature dimension
          num_heads (int): number of heads in the nn.MultiHeadAttention layers
          num_layers (int): the number of sub-encoder-layers in the encoder
        N)r   r   r   TransformerEncoderTransformerEncoderLayerencoder)r   rV   r>   ri   r   r   r   r     s   

z$TransposeTransformerEncoder.__init__Nr_   r   r   c                 C   sR   |durd|dddf< | }| j |dd|ddd}|dddddf S )aD  
        Args:
            data (torch.Tensor): tensor with shape (batch_size, seq_len, feature_dim)
            mask (torch.Tensor): bool tensor with shape (batch_size, seq_len).
                Sequence elements that are False are invalid.

        Returns:
            Tensor with shape (batch_size, feature_dim)
        NTr   r   )srcsrc_key_padding_mask)rl   rE   )r   r_   r   rg   r   r   r   r3   )  s   z#TransposeTransformerEncoder.forward)r   r   r4   )r5   r6   r7   r8   r)   r   r"   r:   r   r3   r;   r   r   r   r   rh     s&    rh   c                   @   s:   e Zd ZdZeeeeegZ	de
jde
jde
jfddZdS )MaskedSequentialz
    A sequential container that overrides forward to take a mask as well as the usual
    input tensor. This mask is only applied to modules in _MASK_MODULES (which take
    the mask argument).
    inputr   r   c                    s<   | D ] t  fdd| jD r ||d}q |}q|S )Nc                 3   s    | ]}t  |V  qd S r4   )
isinstance).0	mask_typemoduler   r   	<genexpr>S  s    z+MaskedSequential.forward.<locals>.<genexpr>)r   )r'   _MASK_MODULES)r   rp   r   r   rt   r   r3   Q  s
   
zMaskedSequential.forwardN)r5   r6   r7   r8   r   rH   r<   rT   rh   rw   r"   r:   r3   r   r   r   r   ro   B  s     ro   c                       sZ   e Zd ZdZdejdeej ddf fddZde	e
ejejf  dejfd	d
Z  ZS )MaskedMultiPathWayu  
    Masked multi-pathway is composed of a list of stream nn.Modules followed by a
    fusion nn.Module that reduces these streams. Each stream module takes a mask
    and input tensor.

    ::

                            Pathway 1  ... Pathway N
                                ↓              ↓
                             Block 1        Block N
                                ↓⭠ --Fusion----↓
    multipathway_blocksmultipathway_fusionr   Nc                   s   t    t| t  dS )z
        Args:
            multipathway_blocks (nn.module_list): list of models from all pathways.
            multipathway_fusion (nn.module): fusion model.
        N)r   r   r   locals)r   ry   rz   r   r   r   r   i  s   
zMaskedMultiPathWay.__init__
x_and_maskc                 C   sH   g }t t| jD ]}|| j| ||   q	| jd ur"| |}|S r4   )rR   lenry   appendrz   )r   r|   rg   pathway_idxr   r   r   r   r3   w  s   

zMaskedMultiPathWay.forward)r5   r6   r7   r8   r   
ModuleListr   Moduler   r   r   r"   r:   r3   r;   r   r   r   r   rx   [  s    rx   )typingr   r   r   r"   pytorchvideo.layers.utilsr   r   torch.nn.utils.rnnr   r   r   r<   rH   rT   rh   
Sequentialro   rx   r   r   r   r   <module>   s   =:6C/