o
    ϯi-                     @   s   d dl mZmZ d dlZd dlmZ d dlmZ G dd dejZ	dddd	 d
ddd
ddej
ddejfdedededededededededededededejfddZdS )    )CallableOptionalN)ResBlockc                       sR   e Zd ZdZ		ddedee dee f fddZd	ejd
ejfddZ	  Z
S )SqueezeAndExcitationLayer2DzL2D Squeeze and excitation layer, as per https://arxiv.org/pdf/1709.01507.pdf   N	in_planesreduction_ratioreduced_planesc                    s   t    td| _t|t|ksJ d|du r|| n|}ttj||ddddt tj||ddddt	 | _
dS )a^  
        Args:
            in_planes (int): input channel dimension.
            reduction_ratio (int): factor by which in_planes should be reduced to
                get the output channel dimension.
            reduced_planes (int): Output channel dimension. Only one of reduction_ratio
                or reduced_planes should be defined.
        )   r
   zHOnly of reduction_ratio or reduced_planes should be defined for SE layerNr
   Tkernel_sizestridebias)super__init__nnAdaptiveAvgPool2davgpoolbool
SequentialConv2dReLUSigmoid
excitation)selfr   r   r	   	__class__ Z/home/ubuntu/.local/lib/python3.10/site-packages/pytorchvideo/layers/squeeze_excitation.pyr      s   

z$SqueezeAndExcitationLayer2D.__init__xreturnc                 C   s    |  |}| |}|| }|S )zL
        Args:
            x (tensor): 2D image of format C * H * W
        )r   r   )r   r   
x_squeezed	x_excitedx_scaledr   r   r   forward.   s   

z#SqueezeAndExcitationLayer2D.forward)r   N)__name__
__module____qualname____doc__intr   r   torchTensorr$   __classcell__r   r   r   r   r   
   s    !r   Fr   c                 C   s   | | S )Nr   )r   yr   r   r   <lambda>>   s    r.      r
   gh㈵>g?dim_indim_outbranch_fusionconv_a_kernel_sizeconv_a_strideconv_a_paddingconv_b_kernel_sizeconv_b_strideconv_b_paddingnormnorm_epsnorm_momentum
activationr    c                 C   s   t j| ||||dd|||||r| nt  t j||||	|
dd||||g}|r3|t||d t j| }d\}}||	 dksF| |krXt j| |d||	 dd}||||}t||||rd| |dS d|dS )	ur  
    2-D Residual block with squeeze excitation (SE2D) for 2d. Performs a summation between an
    identity shortcut in branch1 and a main block in branch2. When the input and
    output dimensions are different, a convolution followed by a normalization
    will be performed.

    ::

                                         Input
                                           |-------+
                                           ↓       |
                                         conv2d    |
                                           ↓       |
                                          Norm     |
                                           ↓       |
                                       activation  |
                                           ↓       |
                                         conv2d    |
                                           ↓       |
                                          Norm     |
                                           ↓       |
                                          SE2D     |
                                           ↓       }
                                       Summation ←-+
                                           ↓
                                       Activation

    Normalization examples include: BatchNorm3d and None (no normalization).
    Activation examples include: ReLU, Softmax, Sigmoid, and None (no activation).
    Transform examples include: BottleneckBlock.

    Args:
        dim_in (int): input channel size to the bottleneck block.
        dim_out (int): output channel size of the bottleneck.
        use_se (bool): if true, use squeeze excitation layer in the bottleneck.
        se_reduction_ratio (int): factor by which input channels should be reduced to
            get the output channel dimension in SE layer.
        branch_fusion (callable): a callable that constructs summation layer.
            Examples include: lambda x, y: x + y, OctaveSum.

        conv_a_kernel_size (tuple): convolutional kernel size(s) for conv_a.
        conv_a_stride (tuple): convolutional stride size(s) for conv_a.
        conv_a_padding (tuple): convolutional padding(s) for conv_a.
        conv_b_kernel_size (tuple): convolutional kernel size(s) for conv_b.
        conv_b_stride (tuple): convolutional stride size(s) for conv_b.
        conv_b_padding (tuple): convolutional padding(s) for conv_b.

        norm (callable): a callable that constructs normalization layer. Examples
            include nn.BatchNorm3d, None (not performing normalization).
        norm_eps (float): normalization epsilon.
        norm_momentum (float): normalization momentum.

        activation (callable): a callable that constructs activation layer in
            bottleneck and block. Examples include: nn.ReLU, nn.Softmax, nn.Sigmoid,
            and None (not performing activation).

    Returns:
        (nn.Module): resnet basic block layer.
    F)r   r   paddingr   )r   )NNr
   r   N)branch1_convbranch1_normbranch2r<   r2   )r   r   Identityappendr   r   r   )r0   r1   use_sese_reduction_ratior2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r@   r>   r?   r   r   r   (create_audio_2d_squeeze_excitation_block9   sZ   R



rE   )typingr   r   r*   torch.nnr   pytorchvideo.models.resnetr   Moduler   BatchNorm2dr   r)   floatrE   r   r   r   r   <module>   s^   2	
