o
    ϯip/                     @   s   d dl Z d dlmZmZ d dlmZ d dlmZmZm	Z	m
Z
mZmZmZ G dd de	ZG dd de	ZG d	d
 d
eZdedefddZG dd deZG dd deZG dd deZG dd deZdS )    N)ABCabstractmethod)Fraction)AnyDict
NamedTupleOptionalTupleUnionListc                   @   sJ   e Zd ZU dZeeef ed< eeef ed< eed< eed< e	ed< dS )ClipInfoa  
    Named-tuple for clip information with:
        clip_start_sec  (Union[float, Fraction]): clip start time.
        clip_end_sec (Union[float, Fraction]): clip end time.
        clip_index (int): clip index in the video.
        aug_index (int): augmentation index for the clip. Different augmentation methods
            might generate multiple views for the same clip.
        is_last_clip (bool): a bool specifying whether there are more clips to be
            sampled from the video.
    clip_start_secclip_end_sec
clip_index	aug_indexis_last_clipN)
__name__
__module____qualname____doc__r
   floatr   __annotations__intbool r   r   S/home/ubuntu/.local/lib/python3.10/site-packages/pytorchvideo/data/clip_sampling.pyr   	   s   
 r   c                   @   sN   e Zd ZU dZee ed< ee ed< ee ed< ee ed< ee ed< dS )ClipInfoLista  
    Named-tuple for clip information with:
        clip_start_sec  (float): clip start time.
        clip_end_sec (float): clip end time.
        clip_index (int): clip index in the video.
        aug_index (int): augmentation index for the clip. Different augmentation methods
            might generate multiple views for the same clip.
        is_last_clip (bool): a bool specifying whether there are more clips to be
            sampled from the video.
    r   r   r   r   r   N)r   r   r   r   r   r   r   r   r   r   r   r      s   
 r   c                	   @   sj   e Zd ZdZdeeef ddfddZedeeef deeef d	e	e
ef defd
dZdddZdS )ClipSamplerz
    Interface for clip samplers that take a video time, previous sampled clip time,
    and returns a named-tuple ``ClipInfo``.
    clip_durationreturnNc                 C   s   t || _d| _d| _d S Nr   )r   _clip_duration_current_clip_index_current_aug_index)selfr   r   r   r   __init__5   s   

zClipSampler.__init__last_clip_timevideo_duration
annotationc                 C   s   d S Nr   )r$   r&   r'   r(   r   r   r   __call__:   s   zClipSampler.__call__c                 C   s   dS )zBResets any video-specific attributes in preperation for next videoNr   r$   r   r   r   resetC   s   zClipSampler.reset)r   N)r   r   r   r   r
   r   r   r%   r   r   strr   r   r*   r,   r   r   r   r   r   /   s    


r   sampling_typer   c                 G   sN   | dkrt | S | dkrt| S | dkrt| S | dkr t| S t|  d)a  
    Constructs the clip samplers found in ``pytorchvideo.data.clip_sampling`` from the
    given arguments.

    Args:
        sampling_type (str): choose clip sampler to return. It has three options:

            * uniform: constructs and return ``UniformClipSampler``
            * random: construct and return ``RandomClipSampler``
            * constant_clips_per_video: construct and return ``ConstantClipsPerVideoSampler``

        *args: the args to pass to the chosen clip sampler constructor.
    uniformrandomconstant_clips_per_videorandom_multiz not supported)UniformClipSamplerRandomClipSamplerConstantClipsPerVideoSamplerRandomMultiClipSamplerNotImplementedError)r.   argsr   r   r   make_clip_samplerH   s   r9   c                
       s   e Zd ZdZ			ddeeef deeeef  dedef fd	d
Z	deeef deeef dede
eef fddZdededeeef defddZdd Z  ZS )r3   zC
    Evenly splits the video into clips of size clip_duration.
    NFư>r   stridebackpad_lastepsc                    sR   t  | |dur|n|| _|| _|| _| jdkr| j|ks'J d| ddS )a  
        Args:
            clip_duration (Union[float, Fraction]):
                The length of the clip to sample (in seconds).
            stride (floUnion[float, Fraction]at, optional):
                The amount of seconds to offset the next clip by
                default value of None is equivalent to no stride => stride == clip_duration.
            eps (float):
                Epsilon for floating point comparisons. Used to check the last clip.
            backpad_last (bool):
                Whether to include the last frame(s) by "back padding".

                For instance, if we have a video of 39 frames (30 fps = 1.3s)
                with a stride of 16 (0.533s) with a clip duration of 32 frames
                (1.0667s). The clips will be (in frame numbers):

                with backpad_last = False
                - [0, 32]

                with backpad_last = True
                - [0, 32]
                - [8, 40], this is "back-padded" from [16, 48] to fit the last window
        Note that you can use Fraction for clip_duration and stride if you want to
        avoid float precision issue and need accurate frames in each clip.
        Nr   z(stride must be >0 and <= clip_duration ())superr%   _stride_eps_backpad_last)r$   r   r;   r<   r=   	__class__r   r   r%   g   s    
zUniformClipSampler.__init__r&   r'   r   c                 C   sl   t t|td| j| j  d}t || j }|r2td|| }||8 }t td|}t || j }||fS )zK
        Helper to calculate the start/end clip with backpad logic
        r   )r   maxr!   r@   )r$   r&   r'   r<   
clip_startclip_endbuffer_amountr   r   r   _clip_start_end   s   	z"UniformClipSampler._clip_start_endr(   c           
      C   s~   | j ||| jd\}}| j ||| jd\}}| jr#t|| | jk }n||k}| j}	|  jd7  _|r7|   t|||	d|S )a  
        Args:
            last_clip_time (float): the last clip end time sampled from this video. This
                should be 0.0 if the video hasn't had clips sampled yet.
            video_duration: (float): the duration of the video that's being sampled in seconds
            annotation (Dict): Not used by this sampler.
        Returns:
            clip_info: (ClipInfo): includes the clip information (clip_start_time,
            clip_end_time, clip_index, aug_index, is_last_clip), where the times are in
            seconds and is_last_clip is False when there is still more of time in the video
            to be sampled.
        )r<      r   )rI   rB   absrA   r"   r,   r   )
r$   r&   r'   r(   rF   rG   _next_clip_endr   r   r   r   r   r*      s   

zUniformClipSampler.__call__c                 C   s
   d| _ d S r    )r"   r+   r   r   r   r,      s   
zUniformClipSampler.reset)NFr:   )r   r   r   r   r
   r   r   r   r   r%   r	   rI   r   r-   r   r   r*   r,   __classcell__r   r   rC   r   r3   b   sB    
)





$r3   c                   @   s2   e Zd ZdZdededeeef defddZ	dS )	r4   zF
    Randomly samples clip of size clip_duration from the videos.
    r&   r'   r(   r   c                 C   s6   t || j d}ttd|}t||| j dddS )a#  
        Args:
            last_clip_time (float): Not used for RandomClipSampler.
            video_duration: (float): the duration (in seconds) for the video that's
                being sampled
            annotation (Dict): Not used by this sampler.
        Returns:
            clip_info (ClipInfo): includes the clip information of (clip_start_time,
            clip_end_time, clip_index, aug_index, is_last_clip). The times are in seconds.
            clip_index, aux_index and is_last_clip are always 0, 0 and True, respectively.

        r   T)rE   r!   r   r0   r/   r   )r$   r&   r'   r(   max_possible_clip_startr   r   r   r   r*      s
   zRandomClipSampler.__call__N)
r   r   r   r   r   r   r-   r   r   r*   r   r   r   r   r4      s    
r4   c                       sT   e Zd ZdZdededdf fddZded	ed
eee	f de
f fddZ  ZS )r6   z
    TODO
    r   	num_clipsr   Nc                    s   t  | || _d S r)   )r?   r%   
_num_clips)r$   r   rP   rC   r   r   r%      s   
zRandomMultiClipSampler.__init__r&   r'   r(   c           
         s   | j d g | j d g | j d g | j d g | j d g f\}}}}}t| j D ]}	t |||\||	< ||	< ||	< ||	< ||	< q%t|||||S r)   )rQ   ranger?   r*   r   )
r$   r&   r'   r(   clip_start_listclip_end_listclip_index_listaug_index_listis_last_clip_listirC   r   r   r*      s6   




zRandomMultiClipSampler.__call__)r   r   r   r   r   r   r%   r   r-   r   r   r*   rN   r   r   rC   r   r6      s    
r6   c                	       s`   e Zd ZdZ	ddedededdf fdd	Zd
ededeee	f de
fddZdd Z  ZS )r5   z
    Evenly splits the video into clips_per_video increments and samples clips of size
    clip_duration at these increments.
    rJ   r   clips_per_videoaugs_per_clipr   Nc                    s   t  | || _|| _d S r)   )r?   r%   _clips_per_video_augs_per_clip)r$   r   rY   rZ   rC   r   r   r%     s   
z%ConstantClipsPerVideoSampler.__init__r&   r'   r(   c           
      C   s   t t|| j d}t || j}|| j }| j}| j}|  jd7  _| j| jkr2|  jd7  _d| _d}	| j| jksA|| j |krFd| _d}	|	rL|   t||| j |||	S )ac  
        Args:
            last_clip_time (float): Not used for ConstantClipsPerVideoSampler.
            video_duration: (float): the duration (in seconds) for the video that's
                being sampled.
            annotation (Dict): Not used by this sampler.
        Returns:
            a named-tuple `ClipInfo`: includes the clip information of (clip_start_time,
                clip_end_time, clip_index, aug_index, is_last_clip). The times are in seconds.
                is_last_clip is True after clips_per_video clips have been sampled or the end
                of the video is reached.

        r   rJ   FT)	r   rE   r!   r[   r"   r#   r\   r,   r   )
r$   r&   r'   r(   rO   uniform_clipr   r   r   r   r   r   r   r*   !  s.   
z%ConstantClipsPerVideoSampler.__call__c                 C   s   d| _ d| _d S r    )r"   r#   r+   r   r   r   r,   P  s   
z"ConstantClipsPerVideoSampler.reset)rJ   )r   r   r   r   r   r   r%   r   r-   r   r   r*   r,   rN   r   r   rC   r   r5     s,    

/r5   )r0   abcr   r   	fractionsr   typingr   r   r   r   r	   r
   r   r   r   r   r-   r9   r3   r4   r6   r5   r   r   r   r   <module>   s   $k,