o
    ϯi$                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZm	Z	m
Z
mZmZmZ d dlZd dlZd dlZd dlmZ d dlmZ d dlmZ ddlmZ G d	d
 d
ejjjZ	ddededededee
e e
e f f
ddZdS )    N)defaultdict)AnyCallableListOptionalTupleType)	g_pathmgr)ClipSampler)
FrameVideo   )MultiProcessSamplerc                   @   s   e Zd ZdZejjjddddfdededede	d	e
ejjj d
eeegef  dedee deddfddZedee dededee fddZedd ZdefddZdd ZdS )SSv2aq  
    Action recognition video dataset for
    `Something-something v2 (SSv2) <https://20bn.com/datasets/something-something>`_ stored
    as image frames.

    This dataset handles the parsing of frames, loading and clip sampling for the
    videos. All io is done through :code:`iopath.common.file_io.PathManager`, enabling
    non-local storage uri's to be used.
    N Flabel_name_filevideo_label_filevideo_path_label_fileclip_samplervideo_sampler	transformvideo_path_prefixframes_per_cliprand_sample_framesreturnc
           
      C   sr   t jd || _|| _t||||d\| _| _|| j| _d| _	|dur.t
jtj||	dnd| _d| _d| _dS )a  
        Args:
            label_name_file (str): SSV2 label file that contains the label names and
                indexes.

            video_label_file (str): a file that contains video ids and the corresponding
                video label.

            video_path_label_file (str): a file that contains frame paths for each
                video and the corresponding frame label. The file must be a space separated
                csv of the format: (original_vido_id video_id frame_id path labels).

            clip_sampler (ClipSampler): Defines how clips should be sampled from each
                video. See the clip sampling documentation for more information.

            video_sampler (Type[torch.utils.data.Sampler]): Sampler for the internal
                video container. This defines the order videos are decoded and,
                if necessary, the distributed split.

            transform (Optional[Callable]): This callable is evaluated on the clip output before
                the clip is returned. It can be used for user defined preprocessing and
                augmentations on the clips. The clip output format is described in __next__().

            video_path_prefix (str): prefix path to add to all paths from data_path.

            frames_per_clip (Optional[int]): The number of frames per clip to sample.

            rand_sample_frames (bool): If True, randomly sampling frames for each clip.
        z"PYTORCHVIDEO.dataset.SSv2.__init__)prefixN)r   rand_sample        )torch_C_log_api_usage_once
_transform_clip_sampler_read_video_paths_and_labels_path_to_videos_labels_video_sampler_video_sampler_iter	functoolspartialr   _sample_clip_frames_frame_filter_loaded_video_next_clip_start_time)
selfr   r   r   r   r   r   r   r   r    r.   J/home/ubuntu/.local/lib/python3.10/site-packages/pytorchvideo/data/ssv2.py__init__    s*   *
zSSv2.__init__frame_indicesr   c           	         s   t  }t|d | }g }t|D ]+}tt|| }tt||d  }|r4|t|| q||| d  q fdd|D S )a  
        Use segment-based input frame sampling that splits eachvideo into segments,
        and from each of them, we sample one frame to form a clip.

        Args:
            frame_indices (list): list of frame indices.
            frames_per_clip (int): The number of frames per clip to sample.
            rand_sample (bool): if True, randomly sampling frames.

        Returns:
            (list): Outputs a subsampled list with num_samples frames.
        r      c                    s   g | ]} | qS r.   r.   ).0idxr1   r.   r/   
<listcomp>   s    z,SSv2._sample_clip_frames.<locals>.<listcomp>)	lenfloatrangeintnproundappendrandomrandint)	r1   r   r   
num_framesseg_sizeseqistartendr.   r5   r/   r)   f   s   zSSv2._sample_clip_framesc                 C   s   | j S N)r%   r-   r.   r.   r/   r      s   zSSv2.video_samplerc           
      C   s   | j stt| j| _ | jr| j\}}nt| j }| j| }t|}||f| _| 	| j
|ji \}}}}}|dkrD|d|j| j| _|| _
|rOd| _d| _
| jd | j| t||||d}	| jdurj| |	}	|	S )a  
        Retrieves the next clip based on the clip sampling strategy and video sampler.

        Returns:
            A dictionary with the following format.

            .. code-block:: text

                {
                    'video': <video_tensor>,
                    'label': <index_label>,
                    'video_label': <index_label>
                    'video_index': <video_index>,
                    'clip_index': <clip_index>,
                    'aug_index': <aug_index>,
                }
        r   Nr   video)rH   label
video_namevideo_index
clip_index	aug_index)r&   iterr   r%   r+   nextr#   r   from_frame_pathsr!   r,   durationget_clipr*   _loaded_clipr$   strr    )
r-   rH   rK   path_to_video_frames
clip_startclip_endrL   rM   is_last_clipsample_dictr.   r.   r/   __next__   s6   






zSSv2.__next__c                 C   s   | S rF   r.   rG   r.   r.   r/   __iter__   s   zSSv2.__iter__)__name__
__module____qualname____doc__r   utilsdataRandomSamplerrT   r
   r   Samplerr   r   dictr   r:   boolr0   staticmethodr   r)   propertyr   rZ   r[   r.   r.   r.   r/   r      sT    	

F
9r   r   r   r   r   r   r   c                 C   s^  t t}t|d0}tj|dd}|D ]}t|dksJ |d }tj	||d }	|| 
|	 qW d   n1 s=w   Y  t| d}t|}
W d   n1 sXw   Y  t|d}t|}W d   n1 ssw   Y  g }g }|D ],}|d }||v r|d	 }|d
d}|dd}t|
| }|
||  |
| q~||fS )aA  
    Args:
        label_name_file (str): ssv2 label file that contians the label names and
            indexes. ('/path/to/folder/something-something-v2-labels.json')
        video_label_file (str): a file that contains video ids and the corresponding
            video label. (e.g., '/path/to/folder/something-something-v2-train.json')
        video_path_label_file (str): a file that contains frame paths for each
            video and the corresponding frame label. The file must be a space separated
            csv of the format:
                `original_vido_id video_id frame_id path labels`
        prefix (str): prefix path to add to all paths from video_path_label_file.

    Returns:
        image_paths (list): list of list containing path to each frame.
        labels (list): list containing label of each video.
    r )	delimiter   original_vido_idpathNidtemplate[r   ])r   listr	   opencsv
DictReaderr7   osrm   joinr=   jsonloadreplacer:   )r   r   r   r   pathsf
csv_readerrowrJ   rm   label_name_dictvideo_label_jsonlabelsimage_pathsrH   ro   rI   r.   r.   r/   r"      s:   

r"   )r   ) rt   r'   rx   rv   r>   collectionsr   typingr   r   r   r   r   r   numpyr;   r   torch.utils.dataiopath.common.file_ior	   pytorchvideo.data.clip_samplingr
   pytorchvideo.data.frame_videor   r`   r   ra   IterableDatasetr   rT   r:   r"   r.   r.   r.   r/   <module>   s8     5