o
    ϯin/                     @   s   d dl mZ d dlmZ d dlmZmZmZm	Z	m
Z
 d dlZd dlmZmZmZmZmZ d dlmZmZ d dlmZ G dd	 d	eZG d
d deZdS )    )fields)Enum)AnyCallableDictListOptionalN)EncodedVideoInfoVideoClipInfoVideoDatasetTypeVideoFrameInfo	VideoInfo)
ActionDataEpicKitchenDataset)Videoc                   @   s   e Zd ZdZdS )ClipSampling   N)__name__
__module____qualname__Random r   r   ^/home/ubuntu/.local/lib/python3.10/site-packages/pytorchvideo/data/epic_kitchen_forecasting.pyr      s    r   c                       sT  e Zd ZdZejejdddddddf	deded	ed
edede	de	de
dee
 de
deeeef gef def fddZedeeeef geeef f de
de
de
deeeef geeef f f
ddZede
de	de	de
deee
 gee
 f f
ddZed
ede	de	de
de
deeeef eeee f gee f fddZ  ZS )EpicKitchenForecastingz
    Action forecasting video data set for EpicKitchen-55 Dataset.
    <https://epic-kitchens.github.io/2019/>

    This dataset handles the loading, decoding, and clip sampling for the videos.
    g       @g      $@r   NTvideo_info_file_pathactions_file_pathvideo_data_manifest_file_pathclip_samplingdataset_typeseconds_per_clipclip_time_stridenum_input_clipsframes_per_clipnum_forecast_actions	transformmultithreaded_ioc              
      s   ddd t tD  ddd t tD  ddd t tD  dd	d t tD  d
	 t|||||
}|	d ur@t|	|||nd }t||
|	|}t	 j
||||||||d d S )Nz
        Args:
            video_info_file_path (str):
                Path or URI to manifest with basic metadata of each video.
                File must be a csv (w/header) with columns:
                c                 S      g | ]}|j qS r   name.0fr   r   r   
<listcomp>3       z3EpicKitchenForecasting.__init__.<locals>.<listcomp>z

            actions_file_path (str):
                Path or URI to manifest with action annotations for each video.
                File must ber a csv (w/header) with columns:
                c                 S   r&   r   r'   r)   r   r   r   r,   8   r-   a  

            video_data_manifest_file_path (str):
                The path to a json file outlining the available video data for the
                associated videos. File must be a csv (w/header) with columns either:

                For Frame Videos:
                c                 S   r&   r   r'   r)   r   r   r   r,   ?   r-   z6

                For Encoded Videos:
                c                 S   r&   r   r'   r)   r   r   r   r,   B   r-   a  

                To generate this file from a directory of video frames, see helper
                functions in Module: pytorchvideo.data.epic_kitchen.utils

            clip_sampling (ClipSampling):
                The type of sampling to perform to perform on the videos of the dataset.

            dataset_type (VideoDatasetType): The dataformat in which dataset
                video data is store (e.g. video frames, encoded video etc).

            seconds_per_clip (float): The length of each sampled subclip in seconds.

            clip_time_stride (float): The time difference in seconds between the start of
                each input subclip.

            num_input_clips (int): The number of subclips to be included in the input
                video data.

            frames_per_clip (Optional[int]): The number of frames per clip to sample.
                If None, all frames in the clip will be included.

            num_forecast_actions (int): The number of actions to be included in the
                action vector.

            transform (Callable[[Dict[str, Any]], Any]):
                This callable is evaluated on the clip output before the clip is returned.
                It can be used for user-defined preprocessing and augmentations to the clips.
                The clip input is a dictionary with the following format:
                    {
                        'video_id': <str>,
                        'video': <video_tensor>,
                        'audio': <audio_tensor>,
                        'label': <List[ActionData]>,
                        'start_time': <float>,
                        'stop_time': <float>
                    }

                If transform is None, the raw clip output in the above format is
                    returned unmodified.

            multithreaded_io (bool):
                Boolean to control whether parllelizable io operations are performed across
                multiple threads.
        )r   r   r   r   r$   frame_filterclip_samplerr%   )dataclass_fieldsr   r   r   r	   r    _define_clip_structure_generator_frame_filter_generator_transform_generatorsuper__init__)selfr   r   r   r   r   r   r    r!   r"   r#   r$   r%   define_clip_structure_fnr.   	__class__r   r   r5      sH   
B
zEpicKitchenForecasting.__init__returnc                    s0   dt ttf dt ttf f fdd}|S )a]  
        Args:
            transform (Callable[[Dict[str, Any]], Dict[str, Any]]): A function that performs
            any operation on a clip before it is returned in the default transform function.
            num_forecast_actions: (int) The number of actions to be included in the
                action vector.
            frames_per_clip (int): The number of frames per clip to sample.
            num_input_clips (int): The number of subclips to be included in the video data.

        Returns:
            A function that performs any operation on a clip and returns the transformed clip.
        clipr:   c                    s   t  fddtt d d D sJ d fdd d D d  }| d<  d  d  ks8J t fd	dtD }| d<  D ]} | d u r\tg  |< qMrc   S )
Nc                 3   s0    | ]} d  | j  d  |d  j kV  qdS )actionsr   N
start_timer*   ir;   r   r   	<genexpr>   s
    
zVEpicKitchenForecasting._transform_generator.<locals>.transform_clip.<locals>.<genexpr>r<   r   zActions must be sortedc                    s   g | ]}|j  d  kr|qS )	stop_timer=   )r*   arA   r   r   r,      s    zWEpicKitchenForecasting._transform_generator.<locals>.transform_clip.<locals>.<listcomp>videoc                    s<   g | ]} d  dd| |d  ddddf qS )rE   Nr   r   r?   )r;   r"   r   r   r,      s    &)allrangelensizetorchstacktensor)r;   next_k_actionsclip_video_tensorkeyr"   r#   r!   r$   rA   r   transform_clip   s2   

zCEpicKitchenForecasting._transform_generator.<locals>.transform_clip)r   strr   )r$   r#   r"   r!   rQ   r   rP   r   r3      s   ,z+EpicKitchenForecasting._transform_generatorc                    sB   |d    | dt t dt t f fdd}|S )a  
        Args:
            frames_per_clip (int): The number of frames per clip to sample.
            seconds_per_clip (float): The length of each sampled subclip in seconds.
            clip_time_stride (float): The time difference in seconds between the start of
                each input subclip.
            num_input_clips (int): The number of subclips to be included in the video data.

        Returns:
            A function that takes in a list of frame indicies and outputs a subsampled list.
        r   frame_indicesr:   c                    sv   t | }| }t| }t  tD ]}t| | }tD ]} |||   q#q fddt| D S )Nc                    s   g | ]
\}}| v r|qS r   r   )r*   r@   xselected_framesr   r   r,      s    zXEpicKitchenForecasting._frame_filter_generator.<locals>.frame_filter.<locals>.<listcomp>)rH   intsetrG   add	enumerate)rS   "num_available_frames_for_all_clipsavailable_frames_per_secondintra_clip_sampling_strider@   clip_start_indexjr    desired_frames_per_secondr"   r!   time_window_lengthrU   r   r.      s"   

zDEpicKitchenForecasting._frame_filter_generator.<locals>.frame_filter)r   rW   )r"   r   r    r!   r.   r   r`   r   r2      s   &z.EpicKitchenForecasting._frame_filter_generatorc                    sj   | t jkstdt j d|  d||d |  dtttf dtttt f dtt f fdd	}|S )
a  
        Args:
            clip_sampling (ClipSampling):
                The type of sampling to perform to perform on the videos of the dataset.
            seconds_per_clip (float): The length of each sampled clip in seconds.
            clip_time_stride: The time difference in seconds between the start of
                each input subclip.
            num_input_clips (int):  The number of subclips to be included in the video data.
            num_forecast_actions (int): The number of actions to be included in the
                action vector.

        Returns:
            A function that takes a dictionary of videos and outputs a list of sampled
            clips.
        zOnly z is implemented. z not implemented.r   videosvideo_actionsr:   c           	   	      s   g }|  D ]G\}}t|d d   D ]8\}}d}t|d t|D ](}|| j|jkr1|d7 }| krK|j dkrI|t||j |j  nq#qq|S )Nr   r   )itemsrZ   rG   rH   r>   rC   appendr
   )	rc   rd   candidate_sample_clipsvideo_idr<   r@   actionnumber_valid_actionsr_   r#   rb   r   r   define_clip_structure  s*   zVEpicKitchenForecasting._define_clip_structure_generator.<locals>.define_clip_structure)	r   r   NotImplementedErrorr   rR   r   r   r   r
   )r   r   r    r!   r#   rm   r   rl   r   r1      s   


z7EpicKitchenForecasting._define_clip_structure_generator)r   r   r   __doc__r   r   r   FramerR   floatrW   r   r   r   r   boolr5   staticmethodr3   r   r2   r   r   r
   r1   __classcell__r   r   r8   r   r      s    	
o3)&r   )dataclassesr   r0   enumr   typingr   r   r   r   r   rJ   (pytorchvideo.data.dataset_manifest_utilsr	   r
   r   r   r   pytorchvideo.data.epic_kitchenr   r   pytorchvideo.data.videor   r   r   r   r   r   r   <module>   s   