o
    ϯi3                     @   sr  d dl mZmZmZmZmZ d dlZd dlZd dl	Z
G dd dZG dd dejjZG dd dejjZG d	d
 d
ejjZG dd dejjZG dd dejjZG dd dejjZG dd de
jjZG dd dejjZG dd dejjZG dd dejjZG dd dejjZG dd dejjZG dd dejjZG dd  d ejjZG d!d" d"ejjZdS )#    )CallableDictListOptionalTupleNc                   @   sH   e Zd ZdZdedefddZdeeej	f deeej	f fdd	Z
d
S )ApplyTransformToKeyat  
    Applies transform to key of dictionary input.

    Args:
        key (str): the dictionary key the transform is applied to
        transform (callable): the transform that is applied

    Example:
        >>>   transforms.ApplyTransformToKey(
        >>>       key='video',
        >>>       transform=UniformTemporalSubsample(num_video_samples),
        >>>   )
    key	transformc                 C   s   || _ || _d S N)_key
_transform)selfr   r	    r   V/home/ubuntu/.local/lib/python3.10/site-packages/pytorchvideo/transforms/transforms.py__init__   s   
zApplyTransformToKey.__init__xreturnc                 C   s   |  || j || j< |S r
   )r   r   r   r   r   r   r   __call__   s   zApplyTransformToKey.__call__N)__name__
__module____qualname____doc__strr   r   r   torchTensorr   r   r   r   r   r   
   s    *r   c                       sL   e Zd ZdZdef fddZdeeejf deeejf fddZ	  Z
S )		RemoveKeyz}
    Removes the given key from the input dict. Useful for removing modalities from a
    video clip that aren't needed.
    r   c                    s   t    || _d S r
   )superr   r   )r   r   	__class__r   r   r   (   s   

zRemoveKey.__init__r   r   c                 C   s   | j |v r	|| j = |S zQ
        Args:
            x (Dict[str, torch.Tensor]): video clip dict.
        )r   r   r   r   r   r   ,   s   
zRemoveKey.__call__)r   r   r   r   r   r   r   r   r   r   __classcell__r   r   r   r   r   "   s    .r   c                       sB   e Zd ZdZddedef fddZdejdejfd	d
Z  Z	S )UniformTemporalSubsamplezf
    ``nn.Module`` wrapper for ``pytorchvideo.transforms.functional.uniform_temporal_subsample``.
    num_samplestemporal_dimc                    s   t    || _|| _dS )z
        Args:
            num_samples (int): The number of equispaced samples to be selected
            temporal_dim (int): dimension of temporal to perform temporal subsample.
        N)r   r   _num_samples_temporal_dim)r   r$   r%   r   r   r   r   ;   s   

z!UniformTemporalSubsample.__init__r   r   c                 C      t jj|| j| jS [
        Args:
            x (torch.Tensor): video tensor with shape (C, T, H, W).
        )pytorchvideo
transforms
functionaluniform_temporal_subsampler&   r'   r   r   r   r   forwardE      
z UniformTemporalSubsample.forwardr#   )
r   r   r   r   intr   r   r   r/   r!   r   r   r   r   r"   6   s    
r"   c                       s@   e Zd ZdZd
dee def fddZdejfdd	Z	  Z
S ) UniformTemporalSubsampleRepeatedzs
    ``nn.Module`` wrapper for
    ``pytorchvideo.transforms.functional.uniform_temporal_subsample_repeated``.
    r#   frame_ratiosr%   c                    s   t    || _|| _d S r
   )r   r   _frame_ratiosr'   )r   r4   r%   r   r   r   r   U   s   

z)UniformTemporalSubsampleRepeated.__init__r   c                 C   r(   r)   )r+   r,   r-   #uniform_temporal_subsample_repeatedr5   r'   r   r   r   r   r/   Z   r0   z(UniformTemporalSubsampleRepeated.forwardr1   r   r   r   r   r   r2   r   r   r   r/   r!   r   r   r   r   r3   O   s    r3   c                       sH   e Zd ZdZ	ddededef fddZd	ejd
ejfddZ	  Z
S )ShortSideScalez\
    ``nn.Module`` wrapper for ``pytorchvideo.transforms.functional.short_side_scale``.
    bilinearpytorchsizeinterpolationbackendc                        t    || _|| _|| _d S r
   )r   r   _size_interpolation_backend)r   r;   r<   r=   r   r   r   r   i      

zShortSideScale.__init__r   r   c                 C   s   t jj|| j| j| jS r)   )r+   r,   r-   short_side_scaler?   r@   rA   r   r   r   r   r/   q   s   zShortSideScale.forwardr9   r:   r   r   r   r   r2   r   r   r   r   r/   r!   r   r   r   r   r8   d   s    r8   c                	       sN   e Zd ZdZ		ddedededef fdd	Zd
ejdejfddZ	  Z
S )RandomShortSideScalez
    ``nn.Module`` wrapper for ``pytorchvideo.transforms.functional.short_side_scale``. The size
    parameter is chosen randomly in [min_size, max_size].
    r9   r:   min_sizemax_sizer<   r=   c                    s&   t    || _|| _|| _|| _d S r
   )r   r   	_min_size	_max_sizer@   rA   )r   rG   rH   r<   r=   r   r   r   r      s
   

zRandomShortSideScale.__init__r   r   c                 C   s2   t | j| jd d }tjj||| j	| j
S )r*      rK   )r   randintrI   rJ   itemr+   r,   r-   rC   r@   rA   )r   r   r;   r   r   r   r/      s   zRandomShortSideScale.forwardrD   rE   r   r   r   r   rF   {   s    	rF   c                       sX   e Zd ZdZ	ddededef fddZd	eeej	f d
eeej	f fddZ
  ZS )UniformCropVideozX
    ``nn.Module`` wrapper for ``pytorchvideo.transforms.functional.uniform_crop``.
    video	aug_indexr;   	video_keyaug_index_keyc                    r>   r
   )r   r   r?   
_video_key_aug_index_key)r   r;   rR   rS   r   r   r   r      rB   zUniformCropVideo.__init__r   r   c                 C   s*   t jj|| j | j|| j || j< |S r    )r+   r,   r-   uniform_croprT   r?   rU   r   r   r   r   r      s   
zUniformCropVideo.__call__)rP   rQ   )r   r   r   r   r2   r   r   r   r   r   r   r!   r   r   r   r   rO      s    .rO   c                       s.   e Zd ZdZdejdejf fddZ  ZS )	Normalizea  
    Normalize the (CTHW) video clip by mean subtraction and division by standard deviation

    Args:
        mean (3-tuple): pixel RGB mean
        std (3-tuple): pixel RGB standard deviation
        inplace (boolean): whether do in-place normalization
    r   r   c                    s0   | dddd}t |}| dddd}|S )r*   rK   r         )permuter   r/   )r   r   vidr   r   r   r/      s   zNormalize.forward)r   r   r   r   r   r   r/   r!   r   r   r   r   rW      s    "	rW   c                       6   e Zd ZdZ fddZdejdejfddZ  ZS )ConvertFloatToUint8z=
    Converts a video from dtype float32 to dtype uint8.
    c                       t    tjtj| _d S r
   )r   r   torchvisionr,   ConvertImageDtyper   uint8convert_funcr   r   r   r   r         
zConvertFloatToUint8.__init__r   r   c                 C   s*   |j tjks|j tjksJ d| |S r*   z!image must have dtype torch.uint8)dtyper   floathalfrb   r   r   r   r   r/      s   
zConvertFloatToUint8.forward	r   r   r   r   r   r   r   r/   r!   r   r   r   r   r]          r]   c                       r\   )ConvertUint8ToFloatz=
    Converts a video from dtype uint8 to dtype float32.
    c                    r^   r
   )r   r   r_   r,   r`   r   float32rb   rc   r   r   r   r      rd   zConvertUint8ToFloat.__init__r   r   c                 C   s   |j tjks
J d| |S re   )rf   r   ra   rb   r   r   r   r   r/      s   
zConvertUint8ToFloat.forwardri   r   r   r   r   rk      rj   rk   c                       >   e Zd ZdZ fddZejjdejdejfddZ	  Z
S )MoveChannelRearz=
    A Scriptable version to perform C X Y Z -> X Y Z C.
    c                       t    d S r
   r   r   rc   r   r   r   r         zMoveChannelRear.__init__r   r   c                 C      | g d}|S )g
        Args:
            x (torch.Tensor): video tensor whose dimensions are to be permuted.
        )rK   rX   rY   r   rZ   r   r   r   r   r/         zMoveChannelRear.forwardr   r   r   r   r   r   jitscript_methodr   r/   r!   r   r   r   r   rn      
     rn   c                       rm   )MoveChannelFrontz=
    A Scriptable version to perform X Y Z C -> C X Y Z.
    c                    ro   r
   rp   rc   r   r   r   r     rq   zMoveChannelFront.__init__r   r   c                 C   rr   )rs   )rY   r   rK   rX   rt   r   r   r   r   r/     ru   zMoveChannelFront.forwardrv   r   r   r   r   rz      ry   rz   c                       sv   e Zd ZdZ				ddededeeef d	eeef d
ededededdf fddZ	de
jde
jfddZ  ZS )RandomResizedCropz_
    ``nn.Module`` wrapper for ``pytorchvideo.transforms.functional.random_resized_crop``.
    FTr9   
   target_heighttarget_widthscaleaspect_ratioshiftlog_uniform_ratior<   	num_triesr   Nc	           	         s>   t    || _|| _|| _|| _|| _|| _|| _|| _	d S r
   )
r   r   _target_height_target_width_scale_aspect_ratio_shift_log_uniform_ratior@   
_num_tries)	r   r}   r~   r   r   r   r   r<   r   r   r   r   r     s   

zRandomResizedCrop.__init__r   c                 C   s.   t jj|| j| j| j| j| j| j	| j
| j	S )za
        Args:
            x (torch.Tensor): Input video tensor with shape (C, T, H, W).
        )r+   r,   r-   random_resized_cropr   r   r   r   r   r   r@   r   r   r   r   r   r   +  s   zRandomResizedCrop.__call__)FTr9   r|   )r   r   r   r   r2   r   rg   boolr   r   r   r   r   r!   r   r   r   r   r{     s4    


	
r{   c                       s@   e Zd ZdZdee f fddZdejdejfddZ	  Z
S )	Permutez-
    Permutes the dimensions of a video.
    dimsc                    s6    fddt t D sJ dt    | _dS )zZ
        Args:
            dims (Tuple[int]): The desired ordering of dimensions.
        c                 3   s    | ]}| v V  qd S r
   r   ).0dr   r   r   	<genexpr>G  s    
z#Permute.__init__.<locals>.<genexpr>z0dims must contain every dimension (0, 1, 2, ...)N)rangelenr   r   _dims)r   r   r   r   r   r   B  s   



zPermute.__init__r   r   c                 C   s   |j | j S )rs   )rZ   r   r   r   r   r   r/   N  s   zPermute.forwardr7   r   r   r   r   r   =  s    r   c                       sb   e Zd ZdZ				ddee deee  dede	d	e	f
 fd
dZ
dejdejfddZ  ZS )	OpSamplerz
    Given a list of transforms with weights, OpSampler applies weighted sampling to
    select n transforms, which are then applied sequentially to the input.
    NrK   Ftransforms_listtransforms_probnum_sample_oprandomly_sample_depthreplacementc                    s   t    t|dksJ d|dksJ d|t|ks!J d|dur;t|t|ks1J dt|dks;J d|| _t|durF|ndgt| | _|| _|| _	|| _
dS )	a5  
        Args:
            transforms_list (List[Callable]): A list of tuples of all available transforms
                to sample from.
            transforms_prob (Optional[List[float]]): The probabilities associated with
                each transform in transforms_list. If not provided, the sampler assumes a
                uniform distribution over all transforms. They do not need to sum up to one
                but weights need to be positive.
            num_sample_op (int): Number of transforms to sample and apply to input.
            randomly_sample_depth (bool): If randomly_sample_depth is True, then uniformly
                sample the number of transforms to apply, between 1 and num_sample_op.
            replacement (bool): If replacement is True, transforms are drawn with replacement.
        r   z)Argument transforms_list cannot be empty.z&Need to sample at least one transform.zMArgument num_sample_op cannot be greater than number of available transforms.NzJArgument transforms_prob needs to have the same length as transforms_list.z4Argument transforms_prob needs to be greater than 0.rK   )r   r   r   minr   r   FloatTensorr   r   r   r   )r   r   r   r   r   r   r   r   r   r   \  s4   

zOpSampler.__init__r   r   c                 C   sT   | j rtd| jd d n| j}tj| j|| jd}|D ]	}| j| |}q|S )zC
        Args:
            x (torch.Tensor): Input tensor.
        rK   rL   )r   )	r   r   rM   r   rN   multinomialr   r   r   )r   r   depth
index_listindexr   r   r   r/     s   
zOpSampler.forward)NrK   FF)r   r   r   r   r   r   r   rg   r2   r   r   r   r   r/   r!   r   r   r   r   r   V  s$    
/r   c                   @   s&   e Zd ZdZdejdejfddZdS )Div255zS
    ``nn.Module`` wrapper for ``pytorchvideo.transforms.functional.div_255``.
    r   r   c                 C   s   t jtjjj|S )z
        Scale clip frames from [0, 255] to [0, 1].
        Args:
            x (Tensor): A tensor of the clip's RGB frames with shape:
                (C, T, H, W).
        Returns:
            x (Tensor): Scaled tensor by dividing 255.
        )r_   r,   Lambdar+   r-   div_255r   r   r   r   r/     s
   	zDiv255.forwardN)r   r   r   r   r   r   r/   r   r   r   r   r     s    r   )typingr   r   r   r   r   "pytorchvideo.transforms.functionalr+   r   torchvision.transformsr_   r   nnModuler   r"   r3   r8   rF   rO   r,   rW   r]   rk   rn   rz   r{   r   r   r   r   r   r   r   <module>   s(   -I