o
    ϯi>                     @   s\   d dl Z d dlmZ d dlZd dlmZ d dlm  mZ d dl	m
Z
 G dd dejZdS )    N)Optional)set_attributesc                       s   e Zd ZdZ						ddejd	eej d
edededededdf fddZ	dededdfddZ
dejdejdejfddZ  ZS )
MemoryBanka,  
    Performs Non-Parametric Instance Discrimination for self supervised learning on
    video. A memory bank is built to keep and update the historical feature embedding
    and use them for contrastive learning.

    The original paper is:
    Unsupervised Feature Learning via Non-Parametric Instance Discrimination
    https://arxiv.org/pdf/1805.01978.pdf

    More details can be found from the memory bank part in the following paper:
    Momentum Contrast for Unsupervised Visual Representation Learning
    https://arxiv.org/pdf/1911.05722.pdf
    N   Q?     +?backbonemlpneg_sizetemperature	bank_sizedimmmtreturnc                    s&   t    t| t  | || dS )a8  
        Args:
            backbone (nn.Module): backbone used to forward the input.
            mlp (nn.Module): multi-layer perception used in memory bank instance
                discrimination model.
            neg_size (int): size of negative samples per instance.
            temperature (float): temperature to use for contrastive learning.
            bank_size (int): size of the memory bank, expected to be the same size as
                the training set.
            dim (int): dimension of the channel.
            mmt (float): momentum to use.
        N)super__init__r   locals_init_mem_bank)selfr
   r   r   r   r   r   r   	__class__ S/home/ubuntu/.local/lib/python3.10/site-packages/pytorchvideo/models/memory_bank.pyr      s   
zMemoryBank.__init__c                 C   sN   dt |d  }| dt||d| | t| j	
 j dS )a'  
        Given the memory bank size and the channel dimension, initialize the memory
            bank.
        Args:
            bank_size (int): size of the memory bank, expected to be the same size as
                 the training set.
            dim (int): dimension of the channel.
        g      ?   memory   N)mathsqrtregister_buffertorchrandmul_add_tonextr
   
parametersdevice)r   r   r   stdvr   r   r   r   5   s   	
zMemoryBank._init_mem_bankxx_indc                 C   s  |j d }| |}| jdur| |}tj|ddd}tjd| j|| jd fd	|j
}|dd|j t| jd|d }||| jd | j}td||}t|| j}tj|f|j
tjd	}tjj||}| jrt @ t| jd|d}	|	| j |	t |d| j  |	!dj"dd
d!d}
|	|
}| j#d|| W d   |S 1 sw   Y  |S )a  
        Perform contrastive learning with random sampled negative instance from the
            memory bank. During training, update the memory bank with latest feature
            embedding.
        Args:
            x (torch.tensor): a batch of image with augmentation. The input tensor
                shape should able to be feed into the backbone.
            x_ind (torch.tensor): the index of the image x from the dataset. Expected
                shape is B.
        r   Nr      )pr   )sizez
bkc,bc->bk)r(   dtypeT)keepdimg      ?)$shaper
   r   F	normalizer!   randintr   r   r%   r(   selectcopy_dataindex_selectr   viewdetachr   einsumdivr   zeroslongnn
functionalcross_entropytrainingno_gradr#   r   r$   mulpowsumindex_copy_)r   r*   r+   
batch_sizeidxweightoutgtlossposnormupdatedr   r   r   forwardJ   s6   






zMemoryBank.forward)Nr   r   r   r   r	   )__name__
__module____qualname____doc__r@   Moduler   intfloatr   r   r!   TensorrR   __classcell__r   r   r   r   r      s6    	$r   )r   typingr   r!   torch.nnr@   torch.nn.functionalrA   r3   pytorchvideo.layers.utilsr   rW   r   r   r   r   r   <module>   s   