o
    ϯi#                     @   s   d dl mZmZmZ d dlZd dlmZ d dlmZ d dl	m
Z
 G dd dejZdddd	d
dd
dejddejddededededee dee dee dedee dedededededejfddZG d d! d!ejZdS )"    )CallableOptionalTupleN)set_attributes)	_size_3_tc                       s   e Zd ZdZdddddddddededee deee  d	eee  d
eee  deee  deee  deee	  deddf fddZ
dejdejfddZ  ZS )ConvReduce3Du   
    Builds a list of convolutional operators and performs summation on the outputs.

    ::

                            Conv3d, Conv3d, ...,  Conv3d
                                           ↓
                                          Sum
    Nsum)stridepaddingpadding_modedilationgroupsbiasreduction_methodin_channelsout_channelskernel_sizer	   r
   r   r   r   r   r   returnc       
            s(  t    |
dv sJ |
| _g }tt|D ]u}|||| d}|dur0|| dur0|| |d< |dur@|| dur@|| |d< |durP|| durP|| |d< |dur`|| dur`|| |d< |	durp|	| durp|	| |d< |dur|| dur|| |d	< |tjd
i | qt|| _	dS )a  
        Args:
            in_channels int: number of input channels.
            out_channels int: number of output channels produced by the convolution(s).
            kernel_size tuple(_size_3_t): Tuple of sizes of the convolutionaling kernels.
            stride tuple(_size_3_t): Tuple of strides of the convolutions.
            padding tuple(_size_3_t): Tuple of paddings added to all three sides of the
                input.
            padding_mode tuple(string): Tuple of padding modes for each convs.
                Options include `zeros`, `reflect`, `replicate` or `circular`.
            dilation tuple(_size_3_t): Tuple of spacings between kernel elements.
            groups tuple(_size_3_t): Tuple of numbers of blocked connections from input
                channels to output channels.
            bias tuple(bool): If `True`, adds a learnable bias to the output.
            reduction_method str: Options include `sum` and `cat`.
        )r   cat)r   r   r   Nr	   r
   r   r   r   r    )
super__init__r   rangelenappendnnConv3d
ModuleListconvs)selfr   r   r   r	   r
   r   r   r   r   r   	conv_listind
conv_param	__class__r   T/home/ubuntu/.local/lib/python3.10/site-packages/pytorchvideo/layers/convolutions.pyr      s.   
zConvReduce3D.__init__xc                 C   sn   g }t t| jD ]}|| j| | q	| jdkr)tj|ddjddd}|S | jdkr5tj|dd}|S )Nr   r   )dimF)r'   keepdimr      )	r   r   r   r   r   torchstackr   r   )r   r&   outputr!   r   r   r%   forwardM   s   

zConvReduce3D.forward)__name__
__module____qualname____doc__intr   r   r   strboolr   r*   Tensorr-   __classcell__r   r   r#   r%   r      sB    



	


7r   F)   r7   r7   )   r8   r8   )r)   r)   r)   r)   gh㈵>g?)inner_channelsconv_xy_firstr   r	   r
   r   r   r   normnorm_epsnorm_momentum
activationr   r   r9   r:   r   r	   r
   r   r   r   r;   r<   r=   r>   r   c              	   C   s  |du r|}|	dksJ dt |dkrt|dksJ dtj|s$| n||s)|n||d ddf|d ddf|d ddf|d}|
du rFdn|
|||d}|du rSdn| }tj|s\|n| |sa|n|d|d |d fd|d |d fd|d |d f|d}t|||||d	S )
uB  
    Create a 2plus1d conv layer. It performs spatiotemporal Convolution, BN, and
    Relu following by a spatiotemporal pooling.

    ::

                        Conv_t (or Conv_xy if conv_xy_first = True)
                                           ↓
                                     Normalization
                                           ↓
                                       Activation
                                           ↓
                        Conv_xy (or Conv_t if conv_xy_first = True)

    Normalization options include: BatchNorm3d and None (no normalization).
    Activation options include: ReLU, Softmax, Sigmoid, and None (no activation).

    Args:
        in_channels (int): input channel size of the convolution.
        out_channels (int): output channel size of the convolution.
        kernel_size (tuple): convolutional kernel size(s).
        stride (tuple): convolutional stride size(s).
        padding (tuple): convolutional padding size(s).
        bias (bool): convolutional bias. If true, adds a learnable bias to the
            output.
        groups (int): Number of groups in convolution layers. value >1 is unsupported.
        dilation (tuple): dilation value in convolution layers. value >1 is unsupported.
        conv_xy_first (bool): If True, spatial convolution comes before temporal conv

        norm (callable): a callable that constructs normalization layer, options
            include nn.BatchNorm3d, None (not performing normalization).
        norm_eps (float): normalization epsilon.
        norm_momentum (float): normalization momentum.

        activation (callable): a callable that constructs activation layer, options
            include: nn.ReLU, nn.Softmax, nn.Sigmoid, and None (not performing
            activation).

    Returns:
        (nn.Module): 2plus1d conv layer.
    Nr)   z?Support for groups is not implemented in R2+1 convolution layerzBSupport for dillaiton is not implemented in R2+1 convolution layerr   )r   r   r   r	   r
   r   )num_featuresepsmomentumr8   conv_tr;   r>   conv_xyr:   )maxminr   r   Conv2plus1d)r   r   r9   r:   r   r	   r
   r   r   r   r;   r<   r=   r>   conv_t_modulenorm_moduleactivation_moduleconv_xy_moduler   r   r%   create_conv_2plus1dX   sH   =





	rL   c                       sf   e Zd ZdZdddddddejdejdejdejd	ed
df fddZdej	d
ej	fddZ
  ZS )rG   u  
    Implementation of 2+1d Convolution by factorizing 3D Convolution into an 1D temporal
    Convolution and a 2D spatial Convolution with Normalization and Activation module
    in between:

    ::

                        Conv_t (or Conv_xy if conv_xy_first = True)
                                           ↓
                                     Normalization
                                           ↓
                                       Activation
                                           ↓
                        Conv_xy (or Conv_t if conv_xy_first = True)

    The 2+1d Convolution is used to build the R(2+1)D network.
    NFrB   rC   r;   r>   rD   r:   r   c                   s6   t    t| t  | jdusJ | jdusJ dS )ar  
        Args:
            conv_t (torch.nn.modules): temporal convolution module.
            norm (torch.nn.modules): normalization module.
            activation (torch.nn.modules): activation module.
            conv_xy (torch.nn.modules): spatial convolution module.
            conv_xy_first (bool): If True, spatial convolution comes before temporal conv
        N)r   r   r   localsrC   rD   )r   rC   r;   r>   rD   r:   r#   r   r%   r      s   
zConv2plus1d.__init__r&   c                 C   sd   | j r| |n| |}| jr| |n|}| jr| |n|}| j r+| |}|S | |}|S )N)r:   rD   rC   r;   r>   )r   r&   r   r   r%   r-      s   
zConv2plus1d.forward)r.   r/   r0   r1   r   Moduler4   r   r*   r5   r-   r6   r   r   r#   r%   rG      s*    rG   )typingr   r   r   r*   torch.nnr   pytorchvideo.layers.utilsr   torch.nn.common_typesr   rN   r   BatchNorm3dReLUr2   r4   floatrL   rG   r   r   r   r%   <module>   sd   R	

g