o
    oiUC                     @   s   d dl mZmZmZmZmZmZmZ d dlZd dl	m
Z d dlmZ d dlmZ d dlmZmZ d dlmZmZ d dlmZ d dlmZ d	d
lmZ dgZG dd deZdS )    )AnyDictListOptionalTupleUnioncastN)_AugmentationBase)SequentialBase)ImageSequential_get_new_batch_shape)ModuleTensor)Boxes)	Keypoints   )	ParamItemVideoSequentialc                       s,  e Zd ZdZddddddeded	ed
eeee	eef f de
ee  ddf fddZdejdedejfddZdededefddZdedejded	ededefddZdededefddZdededefddZdejdee fd d!Z	d8ded"ee d#e
eeef  def fd$d%Z	d8ded"ee d#e
eeef  def fd&d'Z	d8ded"ee d#e
eeef  def fd(d)Z	d8ded"ee d#e
eeef  def fd*d+Z	d8deeef d"ee d#e
eeef  deeef f fd,d-Z	d8deeef d"ee d#e
eeef  deeef f fd.d/Z 	d8deee!f d"ee d#e
eeef  deee!f f fd0d1Z"	d8deee!f d"ee d#e
eeef  deee!f f fd2d3Z#	d9ded"e
ee  d#e
eeef  defd4d5Z$	d9ded"e
ee  d#e
eeef  defd6d7Z%  Z&S ):r   a  VideoSequential for processing 5-dim video data like (B, T, C, H, W) and (B, C, T, H, W).

    `VideoSequential` is used to replace `nn.Sequential` for processing video data augmentations.
    By default, `VideoSequential` enabled `same_on_frame` to make sure the same augmentations happen
    across temporal dimension. Meanwhile, it will not affect other augmentation behaviours like the
    settings on `same_on_batch`, etc.

    Args:
        *args: a list of augmentation module.
        data_format: only BCTHW and BTCHW are supported.
        same_on_frame: apply the same transformation across the channel per frame.
        random_apply: randomly select a sublist (order agnostic) of args to
            apply transformation.
            If int, a fixed number of transformations will be selected.
            If (a,), x number of transformations (a <= x <= len(args)) will be selected.
            If (a, b), x number of transformations (a <= x <= b) will be selected.
            If None, the whole list of args will be processed as a sequence.

    Note:
        Transformation matrix returned only considers the transformation applied in ``kornia.augmentation`` module.
        Those transformations in ``kornia.geometry`` will not be taken into account.

    Example:
        If set `same_on_frame` to True, we would expect the same augmentation has been applied to each
        timeframe.

        >>> import kornia
        >>> input = torch.randn(2, 3, 1, 5, 6).repeat(1, 1, 4, 1, 1)
        >>> aug_list = VideoSequential(
        ...     kornia.augmentation.ColorJiggle(0.1, 0.1, 0.1, 0.1, p=1.0),
        ...     kornia.color.BgrToRgb(),
        ...     kornia.augmentation.RandomAffine(360, p=1.0),
        ...     random_apply=10,
        ...     data_format="BCTHW",
        ...     same_on_frame=True)
        >>> output = aug_list(input)
        >>> (output[0, :, 0] == output[0, :, 1]).all()
        tensor(True)
        >>> (output[0, :, 1] == output[0, :, 2]).all()
        tensor(True)
        >>> (output[0, :, 2] == output[0, :, 3]).all()
        tensor(True)

        If set `same_on_frame` to False:

        >>> aug_list = VideoSequential(
        ...     kornia.augmentation.ColorJiggle(0.1, 0.1, 0.1, 0.1, p=1.0),
        ...     kornia.augmentation.RandomAffine(360, p=1.0),
        ...     kornia.augmentation.RandomMixUpV2(p=1.0),
        ... data_format="BCTHW",
        ... same_on_frame=False)
        >>> output = aug_list(input)
        >>> output.shape
        torch.Size([2, 3, 4, 5, 6])
        >>> (output[0, :, 0] == output[0, :, 1]).all()
        tensor(False)

        Reproduce with provided params.
        >>> out2 = aug_list(input, params=aug_list._params)
        >>> torch.equal(output, out2)
        True

    Perform ``OneOf`` transformation with ``random_apply=1`` and ``random_apply_weights`` in ``VideoSequential``.

        >>> import kornia
        >>> input, label = torch.randn(2, 3, 1, 5, 6).repeat(1, 1, 4, 1, 1), torch.tensor([0, 1])
        >>> aug_list = VideoSequential(
        ...     kornia.augmentation.ColorJiggle(0.1, 0.1, 0.1, 0.1, p=1.0),
        ...     kornia.augmentation.RandomAffine(360, p=1.0),
        ...     kornia.augmentation.RandomMixUpV2(p=1.0),
        ... data_format="BCTHW",
        ... same_on_frame=False,
        ... random_apply=1,
        ... random_apply_weights=[0.5, 0.3, 0.8]
        ... )
        >>> out = aug_list(input)
        >>> out.shape
        torch.Size([2, 3, 4, 5, 6])

    BTCHWTFN)data_formatsame_on_framerandom_applyrandom_apply_weightsargsr   r   r   r   returnc                   sr   t  j|d d ||d || _| | _| jdvr!td| d|  | jdkr-d| _d S | jdkr7d| _d S d S )	N)same_on_batchkeepdimr   r   )BCTHWr   z-Only `BCTHW` and `BTCHW` are supported. Got `z`.r      r   r   )super__init__r   upperr   AssertionError_temporal_channel)selfr   r   r   r   r   	__class__ W/home/ubuntu/.local/lib/python3.10/site-packages/kornia/augmentation/container/video.pyr    w   s"   





zVideoSequential.__init__batch_shapechennel_indexc                 C   s$   t tj|d | ||d d   S )Nr   )r   torchSize)r$   r)   r*   r'   r'   r(   '__infer_channel_exclusive_batch_shape__   s   $z7VideoSequential.__infer_channel_exclusive_batch_shape__param	frame_numc                 C   sV   |ddddf j d|gdgt|jdd  R  }|jdgt|jdd R  S )af  Repeat parameters across channels.

        The input is shaped as (B, ...), while to output (B * same_on_frame, ...), which
        to guarantee that the same transformation would happen for each frame.

        (B1, B2, ..., Bn) => (B1, ... B1, B2, ..., B2, ..., Bn, ..., Bn)
                              | ch_size | | ch_size |  ..., | ch_size |
        N.r   )repeatlenshapereshapelist)r$   r.   r/   repeatedr'   r'   r(    __repeat_param_across_channels__   s   6	 z0VideoSequential.__repeat_param_across_channels__vr   c                 C   s   |  s|S |r|r|j|d | gdg|jd  R  S |r%| ||S |rH|djd|d gdg|jd  R  jdg|jdd  R  S |S )Nr   r   r0   )numelr1   ndimr7   	unsqueezer4   r3   )r$   r8   r)   r/   r   r   r'   r'   r(   __broadcast_param__   s   &Bz#VideoSequential.__broadcast_param__inputc                 C   sB   | j dkr|dd}| j dkr	 |jdg|jdd  R  }|S )Nr   r   r   r   r0   )r   	transposer4   r3   r$   r=   r/   r'   r'   r(   _input_shape_convert_in   s   

z'VideoSequential._input_shape_convert_inc                 C   sD   |j d|g|jdd  R  }| jdkr|dd}| jdkr 	 |S )Nr0   r   r   r   r   )viewr3   r   r>   r?   r'   r'   r(   _input_shape_convert_back   s   

z)VideoSequential._input_shape_convert_backc              	   C   s  || j  }|  }| || j }g }|D ]\}}t|tjttjfrt|dd}| j	r:|r:t
dg|dd  }n%| j	r@|}n|rOt
|g|dd  }nt
|d | g|dd  }||}	t|	tr|	 D ]*\}
}|
dkrt|tjtjfrqm|
dkr|	|
|i qm| |||| j	||	|
< qmt||	}nt|tfr||}| j	rtdt||}nt|d }t||}|| q|S )Nr   Fr   r   orderforward_input_shapez:Sequential is currently unsupported for ``same_on_frame``.)r#   get_forward_sequencer-   
isinstanceK
RandomCropr	   MixAugmentationBaseV2getattrr   r+   r,   forward_parametersdictitemsColorJiggleColorJitterupdater<   r   r
   
ValueErrorr   append)r$   r)   r/   named_modulesparamsnamemoduleis_same_on_batch	mod_shape	mod_paramkr8   r.   	seq_paramr'   r'   r(   rK      sF   

 





z"VideoSequential.forward_parametersrT   
extra_argsc                    :   | | j}| ||}t j|||d}| ||}|S Nr\   )sizer#   r@   r   transform_inputsrB   r$   r=   rT   r\   r/   r%   r'   r(   ra      
   z VideoSequential.transform_inputsc                    r]   r^   )r`   r#   r@   r   inverse_inputsrB   rb   r%   r'   r(   rd      rc   zVideoSequential.inverse_inputsc                    r]   r^   )r`   r#   r@   r   transform_masksrB   rb   r%   r'   r(   re   
  rc   zVideoSequential.transform_masksc                    r]   r^   )r`   r#   r@   r   inverse_masksrB   rb   r%   r'   r(   rf     rc   zVideoSequential.inverse_masksc              	         t |tr;|d|d}}tj|d|d|d|ddd}t j|||d	}|j||ddd}|S t j|||d	}|S 
al  Transform bounding boxes.

        Args:
            input: tensor with shape :math:`(B, T, N, 4, 2)`.
                If input is a `Keypoints` type, the internal shape is :math:`(B * T, N, 4, 2)`.
            params: params for the sequence.
            extra_args: Optional dictionary of extra arguments with specific options for different input types.
        r   r   r0   r         vertices_plus)moder_   )	rF   r   r`   r   from_tensorrA   r   transform_boxesdatar$   r=   rT   r\   	batchsizer/   r%   r'   r(   rn         
,zVideoSequential.transform_boxesc              	      rg   rh   )	rF   r   r`   r   rm   rA   r   inverse_boxesro   rp   r%   r'   r(   rs   4  rr   zVideoSequential.inverse_boxesc                    s|   t |tr3|d|d}}t|d|d|d}t j|||d}|j||dd}|S t j|||d}|S af  Transform bounding boxes.

        Args:
            input: tensor with shape :math:`(B, T, N, 2)`.
                If input is a `Keypoints` type, the internal shape is :math:`(B * T, N, 2)`.
            params: params for the sequence.
            extra_args: Optional dictionary of extra arguments with specific options for different input types.
        r   r   r0   r   ri   r_   )rF   r   r`   r   rA   r   transform_keypointsro   rp   r%   r'   r(   ru   H     
z#VideoSequential.transform_keypointsc                    s|   t |tr3|d|d}}t|d|d|d}t j|||d}|j||dd}|S t j|||d}|S rt   )rF   r   r`   r   rA   r   inverse_keypointsro   )r$   r=   rT   r\   r/   rq   r%   r'   r(   rw   \  rv   z!VideoSequential.inverse_keypointsc                 C   s2   |du r| j dur| j }ntd| j|||dS )zInverse transformation.

        Used to inverse a tensor according to the performed transformation by a forward pass, or with respect to
        provided parameters.
        Nz.No valid params to inverse the transformation.r_   )_paramsRuntimeErrorrd   )r$   r=   rT   r\   r'   r'   r(   inversep  s
   
zVideoSequential.inversec                 C   sP   t |jdkrtd|j d|du r| |j| _| j}| j|||d}|S )z'Define the video computation performed.   z"Input must be a 5-dim tensor. Got .Nr_   )r2   r3   r"   rK   rx   ra   )r$   r=   rT   r\   outputr'   r'   r(   forward  s   zVideoSequential.forward)N)NN)'__name__
__module____qualname____doc__r   strboolr   intr   r   r   floatr    r+   r,   r-   r   r7   r<   r@   rB   r   rK   r   r   ra   rd   re   rf   r   rn   rs   r   ru   rw   rz   r~   __classcell__r'   r'   r%   r(   r   #   s   V

	3










)typingr   r   r   r   r   r   r   r+   kornia.augmentationaugmentationrG   kornia.augmentation.baser	   "kornia.augmentation.container.baser
   #kornia.augmentation.container.imager   r   kornia.corer   r   kornia.geometry.boxesr   kornia.geometry.keypointsr   rT   r   __all__r   r'   r'   r'   r(   <module>   s   $