o
    ϯiD                     @   s  d Z ddlmZmZmZmZmZ ddlZddlZddl	m
  mZ ddlmZ dZdd Zdejd	ejfd
dZdejd	ejfddZdejd	ejfddZdejded	ejfddZdejded	ejfddZdejded	ejfddZdejded	ejfddZdejded	ejfddZdejded	ejfddZdejdefddZdejdefdd Zdejdefd!d"Zdejdefd#d$Zdejdefd%d&Z d'ed	efd(d)Z!d*e"d+eeef d	efd,d-Z#d*e"d+eeef d	ee fd.d/Z$d*e"d+ee"e"f d	ee" fd0d1Z%d*e"d+eeef d	ee fd2d3Z&eeeeeeeeeeeeee d4Z'e$e$e$e$ddde$e%e&e$e$e$e$d4Z(d5d5d5d5dddd6d7d8d9d9d:d:d4Z)d;d<iZ*d=d>iZ+G d?d@ d@Z,dS )AzAVideo transforms that are used for advanced augmentation methods.    )AnyCallableDictOptionalTupleN)InterpolationMode
   c                 C   s   d| v sJ ddS )z0
    Check if kwargs contains key ``fill``.
    fillzNeed to have fill in kwargs.N )kwargsr
   r
   Y/home/ubuntu/.local/lib/python3.10/site-packages/pytorchvideo/transforms/augmentations.py_check_fill_arg   s   r   videoreturnc                 K      t jj| S )z
    Maximize contrast of a video by remapping its pixels per channel so that the lowest
    becomes black and the lightest becomes white.

    Args:
        video (torch.Tensor): Video tensor with shape (T, C, H, W).
    )torchvision
transforms
functionalautocontrastr   r   r
   r
   r   _autocontrast   s   r   c                 K   sH   | j tjkr| j }| d tj} tjj| d |S tjj| S )z
    Equalize the histogram of a video by applying a non-linear mapping to the input in
    order to create a uniform distribution of grayscale values in the output.

    Args:
        video (torch.Tensor): Video tensor with shape (T, C, H, W).
       )dtypetorchuint8tor   r   r   equalize)r   r   
video_typer
   r
   r   	_equalize#   s
   r   c                 K   r   )zv
    Invert the colors of a video.

    Args:
        video (torch.Tensor): Video tensor with shape (T, C, H, W).
    )r   r   r   invertr   r
   r
   r   _invert2   s   r    factorc                 K   s$   t | tjjj| ||d tjdS )z
    Rotate the image by angle.

    Args:
        video (torch.Tensor): Video tensor with shape (T, C, H, W).
        factor (float): The rotation angle value in degrees, counter-clockwise.
    r	   r	   interpolation)r   r   r   r   rotater   BILINEARr   r!   r   r
   r
   r   _rotate<   s   r'   c                 K   s4   | j tjkrtjj| t|d S tjj| |S )z
    Solarize an video by inverting all pixel values above a threshold.

    Args:
        video (torch.Tensor): Video tensor with shape (T, C, H, W).
    g     o@)r   r   r   r   r   r   solarizeintr&   r
   r
   r   	_solarizeJ   s   r*   c                 K      t jj| |S )aP  
    Adjust contrast of an a video.

    Args:
        video (torch.Tensor): Video tensor with shape (T, C, H, W).
        factor (float): How much to adjust the contrast. Can be any non-negative
            number. 0 gives a solid gray video, 1 gives the original video while 2
            increases the contrast by a factor of 2.
    )r   r   r   adjust_contrastr&   r
   r
   r   _adjust_contrastW      
r-   c                 K   r+   )aG  
    Adjust the saturation of a video.

    Args:
        video (torch.Tensor): Video tensor with shape (T, C, H, W).
        factor (float): How much to adjust the saturation. 0 will give a black and
            white video, 1 will give the original video while 2 will enhance the
            saturation by a factor of 2.
    )r   r   r   adjust_saturationr&   r
   r
   r   _adjust_saturationd   r.   r0   c                 K   r+   )aX  
    Adjust brightness of a video.

    Args:
        video (torch.Tensor): Video tensor with shape (T, C, H, W).
        sharpness_factor (float): How much to adjust the sharpness. Can be any
            non-negative number. 0 gives a blurred video, 1 gives the original video
            while 2 increases the sharpness by a factor of 2.
    )r   r   r   adjust_brightnessr&   r
   r
   r   _adjust_brightnessq   r.   r2   c                 K   r+   )aQ  
    Adjust the sharpness of a video.

    Args:
        video (torch.Tensor): Video tensor with shape (T, C, H, W).
        factor (float): How much to adjust the sharpness. Can be any non-negative
            number. 0 gives a blurred video, 1 gives the original video while 2
            increases the sharpness by a factor of 2.
    )r   r   r   adjust_sharpnessr&   r
   r
   r   _adjust_sharpness~   r.   r4   c                 K   sX   |dkr| S | j tjkr$| j }| d tj} tjj| |d |S tjj| |S )z
    Posterize an image by reducing the number of bits for each color channel.

    Args:
        video (torch.Tensor): Video tensor with shape (T, C, H, W).
        factor (float): The number of bits to keep for each channel (0-8).
       r   )r   r   r   r   r   r   r   	posterize)r   r!   r   r   r
   r
   r   
_posterize   s   r7   c                 K   s<   t | | d| d }tj| d||dddg|d ddS )z
    Shear the video along the horizontal axis.

    Args:
        video (torch.Tensor): Video tensor with shape (T, C, H, W).
        factor (float): How much to shear along the horizontal axis using the affine
            matrix.
          r   r	   bilinearr"   r   sizeF_taffiner   r!   r   translation_offsetr
   r
   r   _shear_x      	rB   c                 K   s<   t | | d| d }tj| ddd|d|g|d ddS )z
    Shear the video along the vertical axis.

    Args:
        video (torch.Tensor): Video tensor with shape (T, C, H, W).
        factor (float): How much to shear along the vertical axis using the affine
            matrix.
    r9   r:   r   r	   r;   r"   r<   r@   r
   r
   r   _shear_y   rC   rE   c                 K   s8   t | || d }tj| dd|dddg|d ddS )z
    Translate the video along the vertical axis.

    Args:
        video (torch.Tensor): Video tensor with shape (T, C, H, W).
        factor (float): How much (relative to the image size) to translate along the
            vertical axis.
    rD   r:   r   r	   r;   r"   r<   r@   r
   r
   r   _translate_x      	rF   c                 K   s8   t | || d }tj| ddddd|g|d ddS )z
    Translate the video along the vertical axis.

    Args:
        video (torch.Tensor): Video tensor with shape (T, C, H, W).
        factor (float): How much (relative to the image size) to translate along the
            horizontal axis.
    r8   r:   r   r	   r;   r"   r<   r@   r
   r
   r   _translate_y   rG   rH   	magnitudec                 C   s   t d dkr| S |  S )z`
    Negate input value with 50% chance.

    Args:
        magnitude (float): Input value.
    r:         ?)r   randitem)rI   r
   r
   r   _randomly_negate   s   rM   levelparamsc                 C   s   | t  |d  }|d | fS )ap  
    Convert level to transform magnitude. This assumes transform magnitude increases
    linearly with level.

    Args:
        level (int): Level value.
        params (Tuple[float, float]): Params contains two values: 1) Base transform
            magnitude when level is 0; 2) Maxmimum increasing in transform magnitude
            when level is at Maxmimum.
    r:   r   _AUGMENTATION_MAX_LEVELrN   rO   rI   r
   r
   r   _increasing_magnitude_to_arg      rS   c                 C   s"   | t  |d  }|d t| fS )a  
    Convert level to transform magnitude. This assumes transform magnitude increases
    (or decreases with 50% chance) linearly with level.

    Args:
        level (int): Level value.
        params (Tuple[float, float]): Params contains two values: 1) Base transform
            magnitude when level is 0; 2) Maxmimum increasing in transform magnitude
            when level is at maxmimum.
    r:   r   )rQ   rM   rR   r
   r
   r   "_increasing_randomly_negate_to_arg  s   rU   c                 C   s"   | t  |d  }|d t| fS )a  
    Convert level to transform magnitude. This assumes transform magnitude decreases
    linearly with level. The return value is converted to int.

    Args:
        level (int): Level value.
        params (Tuple[float, float]): Params contains two values: 1) Base transform
            magnitude when level is 0; 2) Maxmimum decreasing in transform magnitude
            when level is at maxmimum.
    r:   r   )rQ   r)   rR   r
   r
   r   _decreasing_int_to_arg  s   rV   c                 C   s   | t  |d  }|d | fS )ap  
    Convert level to transform magnitude. This assumes transform magnitude decreases
    linearly with level.

    Args:
        level (int): Level value.
        params (Tuple[float, float]): Params contains two values: 1) Base transform
            magnitude when level is 0; 2) Maxmimum decreasing in transform magnitude
            when level is at maxmimum.
    r:   r   rP   rR   r
   r
   r   _decreasing_to_arg#  rT   rW   )AdjustBrightnessAdjustContrastAdjustSaturationAdjustSharpnessAutoContrastEqualizeInvertRotate	PosterizeSolarizeShearXShearY
TranslateX
TranslateY)r:   g?)r      )   rg   )r:   r:   )r   g333333?)r   g?sampling_stdrJ   r	   )rJ   rJ   rJ   c                   @   s   e Zd Z								ddedededeeeef  d	eeeef  d
eeee	f  deeee
f  dedeeee
f  ddfddZdefddZdejdejfddZdS )AugmentTransformr   rJ   Ngaussiantransform_namerI   probname_to_transform_funclevel_to_argtransform_max_parastransform_hparassampling_typesampling_hparasr   c
           
      C   s   |dv sJ |p	t }|pt}|pt}|pt| _|| _|	pt| _d| jv s&J | jdkr2d| jv s2J | jdkrjd| jv s>J d| jv sEJ | jd dkrWt| jd t	sVJ n| jd d	krjt| jd t	t
fsjJ ||v spJ t| _|| _|| _|| | _|| | _|| | _|| _|| _d
S )aM  
        The AugmentTransform composes a video transform that performs augmentation
        based on a maximum magnitude. AugmentTransform also offers flexible ways to
        generate augmentation magnitude based on different sampling strategies.

        Args:
            transform_name (str): The name of the video transform function.
            magnitude (int): Magnitude used for transform function.
            prob (float): The probablity of applying each transform function.
            name_to_transform_func (Optional[Dict[str, Callable]]): A Dictionary that
                contains mapping of the transform name to the transform function.
            level_to_arg (Optional[Dict[str, Callable]]): A Dictionary that contains
                mapping of the transform name to its level function, which converts
                the the magnitude to the transform function arguments.
            transform_max_paras (Optional[Dict[str, Tuple]]): A Dictionary that
                contains mapping of the transform name to its maximum transform
                magnitude.
            transform_hparas (Optional[Dict[Any]]): Transform hyper parameters.
                Needs to have key fill. By default, it uses transform_default_hparas.
            sampling_type (str): Sampling method for magnitude of transform. It should
                be either gaussian or uniform.
            sampling_hparas (Optional[Dict[Any]]): Hyper parameters for sampling. If
                gaussian sampling is used, it needs to have key sampling_std. By
                default, it uses transform_default_hparas.
        )rj   uniformr	   rj   rh   rs   sampling_data_typesampling_minr)   floatN)_NAME_TO_TRANSFORM_FUNC_LEVEL_TO_ARG_TRANSFORM_MAX_PARAMSTRANSFORM_DEFAULT_HPARASrp   rq   SAMPLING_DEFAULT_HPARASrr   
isinstancer)   rv   rQ   	max_levelrk   rI   transform_fnlevel_fnlevel_parasrl   )
selfrk   rI   rl   rm   rn   ro   rp   rq   rr   r
   r
   r   __init__s  s4   &







zAugmentTransform.__init__c              	   C   s   | j dkrtdt| jtj| j| jd dd S | j dkrV| jd dkr6tj	| jd	 | jd
 dd S | jd dkrRtj
dd | j| jd	   | jd	  S tdt)z7
        Get magnitude based on sampling type.
        rj   r   rh   )r:   )r=   rs   rt   r)   ru   r:   rv   z2sampling_data_type must be either 'int' or 'float')rq   maxminr}   r   normalrI   rr   rL   randintrK   
ValueErrorNotImplementedError)r   r
   r
   r   _get_magnitude  s6   

	zAugmentTransform._get_magnituder   c                 C   sV   t d | jkr|S |  }| jdur| || jnd}| j|g|R i | jS )z
        The input is a video tensor.

        Args:
            video (torch.Tensor): Input video tensor with shape (T, C, H, W).
        r:   Nr
   )	r   rK   rL   rl   r   r   r   r~   rp   )r   r   rI   
level_argsr
   r
   r   __call__  s   
zAugmentTransform.__call__)r   rJ   NNNNrj   N)__name__
__module____qualname__strr)   rv   r   r   r   r   r   r   r   r   Tensorr   r
   r
   r
   r   ri   r  s@    	

Bri   )-__doc__typingr   r   r   r   r   r   r   (torchvision.transforms.functional_tensorr   functional_tensorr>   !torchvision.transforms.functionalr   rQ   r   r   r   r   r    rv   r'   r*   r-   r0   r2   r4   r7   rB   rE   rF   rH   rM   r)   rS   rU   rV   rW   rw   rx   ry   r{   rz   ri   r
   r
   r
   r   <module>   s   



""