o
    ϯiS                     @   s   d dl mZmZmZmZ d dlZd dlmZ d dlm	Z	 G dd dej
Zddejdd	d
dededeee  dedee dedefddZdS )    )CallableIterableOptionalTupleN)set_attributesc                       sp   e Zd ZdZdddddejdejdejdejd	eej d
eej deddf fddZde	j
fddZ  ZS )NonLocala  
    Builds Non-local Neural Networks as a generic family of building
    blocks for capturing long-range dependencies. Non-local Network
    computes the response at a position as a weighted sum of the
    features at all positions. This building block can be plugged into
    many computer vision architectures.
    More details in the paper:
    Wang, Xiaolong, Ross Girshick, Abhinav Gupta, and Kaiming He.
    "Non-local neural networks."
    In Proceedings of the IEEE conference on CVPR, 2018.
    Ndot_product)poolnorminstantiation
conv_thetaconv_phiconv_gconv_outr	   r
   r   returnc                   sp   t    t| t  d ||||fvsJ |dv s J d|t| jj| jj| j	j| j
jhdks6J dd S )N)r   softmaxzUnknown norm type {}   z8Nonlocal convolution's input/ output dimension mismatch.)super__init__r   localsformatlenr   out_channelsr   r   r   in_channels)selfr   r   r   r   r	   r
   r   	__class__ T/home/ubuntu/.local/lib/python3.10/site-packages/pytorchvideo/layers/nonlocal_net.pyr      s"   

	zNonLocal.__init__c                 C   s  | j j}|}| \}}}}}|  |}	| jd ur| |}| |}
| |}|	||d}	|
||d}
|||d}td|	|
f}| j	dkrY||d  }t
jj|dd}n| j	dkrg|jd }|| }td||f}||||||}| |}| jd ur| |}|| S )	Nznct,ncp->ntpr   g         )dimr   zntg,ncg->nct)r   r   sizer	   r   r   viewtorcheinsumr   nn
functionalr   shaper   r
   )r   x	dim_inner
x_identityNCTHWthetaphig	theta_phispatial_temporal_dimtheta_phi_gpr   r   r   forward5   s0   










zNonLocal.forward)__name__
__module____qualname____doc__r&   Moduler   strr   r$   Tensorr8   __classcell__r   r   r   r   r   
   s.    	
r   r   r   r   r   gh㈵>g?)	pool_sizer   r
   norm_epsnorm_momentumdim_inr*   rB   r   r
   rC   rD   c           	      C   s   |du rd}t |tsJ |du rd}n|| ||d}tdd |D r/tj||g dd}nd}ttj| |ddd	dtj| |ddd	dtj| |ddd	dtj|| ddd	d|||d
S )a  
    Builds Non-local Neural Networks as a generic family of building
    blocks for capturing long-range dependencies. Non-local Network
    computes the response at a position as a weighted sum of the
    features at all positions. This building block can be plugged into
    many computer vision architectures.
    More details in the paper: https://arxiv.org/pdf/1711.07971
    Args:
        dim_in (int): number of dimension for the input.
        dim_inner (int): number of dimension inside of the Non-local block.
        pool_size (tuple[int]): the kernel size of spatial temporal pooling,
            temporal pool kernel size, spatial pool kernel size, spatial pool kernel
            size in order. By default pool_size is None, then there would be no pooling
            used.
        instantiation (string): supports two different instantiation method:
            "dot_product": normalizing correlation matrix with L2.
            "softmax": normalizing correlation matrix with Softmax.
        norm (nn.Module): nn.Module for the normalization layer. The default is
            nn.BatchNorm3d.
        norm_eps (float): normalization epsilon.
        norm_momentum (float): normalization momentum.
    NrA   )num_featuresepsmomentumc                 s   s    | ]}|d kV  qdS )r   Nr   ).0r"   r   r   r   	<genexpr>   s    z"create_nonlocal.<locals>.<genexpr>)r   r   r   )kernel_sizestridepaddingr   r   )r   r   r   r   r	   r
   r   )
isinstancer   anyr&   	MaxPool3dr   Conv3d)	rE   r*   rB   r   r
   rC   rD   
norm_model
pool_modelr   r   r   create_nonlocal_   s(   "
rT   )typingr   r   r   r   r$   torch.nnr&   pytorchvideo.layers.utilsr   r=   r   BatchNorm3dintr>   floatrT   r   r   r   r   <module>   s2   Z
	
