o
    ϯi                     @   sT   d dl Z d dlmZmZ d dlZd dlmZ d dlm  mZ	 G dd dej
ZdS )    N)CallableOptionalc                       s   e Zd ZdZdddddejfdejdeej deej d	ed
ede	de
ddf fddZdd Zde	fddZde	fddZe dd Ze dd Zdd ZdejdejdejfddZ  ZS ) BYOLz
    Bootstrap Your Own Latent A New Approach to Self-Supervised Learning
    Details can be found in:
    https://arxiv.org/pdf/2006.07733.pdf
    Ni   i   gGz?backbone	projector	predictorfeature_dimpredictor_innermmtnormreturnc           	   
      s   t    tjd || _|| _|durt||}|| _	t
|| _| j D ]}d|_q)|du rPttj||dd||tjddtj||dd| _dS || _dS )a3  
        Args:
            backbone (nn.Module): backbone for byol, input shape depends on the forward
                input size. Standard inputs include `B x C`, `B x C x H x W`, and
                `B x C x T x H x W`.
            projector (nn.Module): stand projector is a mlp with 2 to 3 hidden layers,
                with (synchronized) BatchNorm and ReLU activation.
            predictor (nn.Module): predictor MLP of BYOL of similar structure as the
                projector MLP.
            feature_dim (int): output feature dimension.
            predictor_inner (int): inner channel size for predictor.
            mmt (float): momentum update ratio for the momentum backbone.
            norm (callable): normalization to be used in projector, default is
                synchronized batchnorm.
        z PYTORCHVIDEO.model.BYOL.__init__NF)biasT)inplace)super__init__torch_C_log_api_usage_oncer
   r   nn
Sequentialr   copydeepcopybackbone_mmt
parametersrequires_gradLinearReLUr   )	selfr   r   r   r   r	   r
   r   p	__class__ L/home/ubuntu/.local/lib/python3.10/site-packages/pytorchvideo/models/byol.pyr      s*   



zBYOL.__init__c                 C   s   t d||g}|  }|S )z
        Similarity loss for byol.
        Args:
            q and k (nn.tensor): inputs to calculate the similarity, expected to have
                the same shape of `N x C`.
        znc,nc->n)r   einsummean)r   qk
similaritylossr!   r!   r"   sim_lossD   s   
zBYOL.sim_lossc                 C   s
   || _ dS )z
        Update the momentum. This function can be used to perform momentum annealing.
        Args:
            mmt (float): update the momentum.
        Nr
   )r   r
   r!   r!   r"   
update_mmtO   s   
zBYOL.update_mmtc                 C   s   | j S )z\
        Get the momentum. This function can be used to perform momentum annealing.
        r*   )r   r!   r!   r"   get_mmtW   s   zBYOL.get_mmtc                 C   sB   t | j | j D ]\}}|j| j |jd| j   |_qdS )z2
        Momentum update on the backbone.
        g      ?N)zipr   r   r   datar
   )r   param	param_mmtr!   r!   r"   _momentum_update_backbone]   s
    zBYOL._momentum_update_backbonec                 C   s@   t   | |}W d   n1 sw   Y  tj|ddS )zi
        Forward momentum backbone.
        Args:
            x (tensor): input to be forwarded.
        N   dim)r   no_gradr   F	normalize)r   xprojr!   r!   r"   forward_backbone_mmtg   s   
zBYOL.forward_backbone_mmtc                 C   s"   |  |}| |}tj|ddS )z`
        Forward backbone.
        Args:
            x (tensor): input to be forwarded.
        r2   r3   )r   r   r6   r7   )r   r8   r9   predr!   r!   r"   forward_backboner   s   

zBYOL.forward_backbonex1x2c                 C   sx   |  |}|  |}t  |   | |}| |}W d   n1 s'w   Y  | ||| || d }|S )aP  
        Args:
            x1 (torch.tensor): a batch of image with augmentation. The input tensor
                shape should able to be feed into the backbone.
            x2 (torch.tensor): the size batch of image with different augmentation. The
                input tensor shape should able to be feed into the backbone.
        N   )r<   r   r5   r1   r:   r)   )r   r=   r>   pred_1pred_2
proj_mmt_1
proj_mmt_2r(   r!   r!   r"   forward|   s   



zBYOL.forward)__name__
__module____qualname____doc__r   SyncBatchNormModuler   intfloatr   r   r)   r+   r,   r   r5   r1   r:   r<   TensorrD   __classcell__r!   r!   r   r"   r      sD    		2
	

$
r   )r   typingr   r   r   torch.nnr   torch.nn.functional
functionalr6   rJ   r   r!   r!   r!   r"   <module>   s   