o
    ϯi;                     @  s   d dl mZ d dlZd dlmZ d dlmZmZmZm	Z	m
Z
mZmZ d dlZd dlmZ d dlmZmZ d dlmZ G dd	 d	ZG d
d dZddeejjjdfdddZdS )    )annotationsN)defaultdict)AnyCallableDictOptionalSetTupleType)	g_pathmgr)ClipInfoClipSampler)LabeledVideoDatasetc                   @  st   e Zd ZdZeeddZdZdZe	ddd	Z
e		
d d!ddZe	
d d"ddZed#ddZed$ddZd
S )%AvaLabeledVideoFramePathsz
    Pre-processor for Ava Actions Dataset stored as image frames -
        `<https://research.google.com/ava/download.html>_`
    This class handles the parsing of all the necessary
    csv files containing frame paths and frame labels.
    i  i     i  inpr   c                 C  s   |d }|d }|d }g }g }g }i }t t|D ]P}	||	 ||	 }
}d||	 d ||	 d ||	 d ||	 d }||vrXt|||< |||	  |g  |g  || }|| |
 || | q|||d	S )
Nlabels
extra_infoboxesz{:.2f},{:.2f},{:.2f},{:.2f}r            )r   r   r   )rangelenformatappend)clsr   r   r   r   
labels_aggextra_info_agg	boxes_aggbb_dicti	box_labelbox_extra_infobbox_keyidx r&   I/home/ubuntu/.local/lib/python3.10/site-packages/pytorchvideo/data/ava.py_aggregate_bboxes_labels   s0   (

z2AvaLabeledVideoFramePaths._aggregate_bboxes_labelsNframe_paths_filestrframe_labels_filevideo_path_prefixlabel_map_fileOptional[str]returnc                 C  s   |durt |\}}nd}t ||\}}}	t ||	|}
g }|
 D ]7}|
|  D ].}|
| | }t|d dkrZt |}||d< ||d< tj	|| d }|
||f q,q$|S )a  
        Args:
            frame_labels_file (str): Path to the file containing containing labels
                per key frame. Acceptible file formats are,
                Type 1:
                    <original_vido_id, frame_time_stamp, bbox_x_1, bbox_y_1, ...
                    bbox_x_2, bbox_y_2, action_lable, detection_iou>
                Type 2:
                    <original_vido_id, frame_time_stamp, bbox_x_1, bbox_y_1, ...
                    bbox_x_2, bbox_y_2, action_lable, person_label>
            frame_paths_file (str): Path to a file containing relative paths
                to all the frames in the video. Each line in the file is of the
                form <original_vido_id video_id frame_id rel_path labels>
            video_path_prefix (str): Path to be augumented to the each relative frame
                path to get the global frame path.
            label_map_file (str): Path to a .pbtxt containing class id's and class names.
                If not set, label_map is not loaded and bbox labels are not pruned
                based on allowable class_id's in label_map.
        Returs:
            A list of tuples of the the form (video_frames directory, label dictionary).
        Nr   r   video_index
clip_index)r   read_label_mapload_image_listsload_and_parse_labels_csvkeysr   r(   ospathdirnamer   )r   r)   r+   r,   r-   _allowed_class_idsimage_pathsvideo_idx_to_namevideo_name_to_idxvideo_frame_labelslabeled_video_pathsvideo_idframe_video_secr   video_frames_dirr&   r&   r'   from_csv@   s<   

	z"AvaLabeledVideoFramePaths.from_csvr=   dictr:   Optional[Set]c                 C  sP  i }t | d}|D ]}| d}|d }|| }t|d }	|	tjd ks0|	tjd k r1q|	tj }	tt	t|dd }
|d dkrIdnt
|d }|d	urX||vrXqt|d
 }||vrfi ||< |	|| vrttt|| |	< || |	 d |
 || |	 d | || |	 d | qW d	   |S 1 sw   Y  |S )av  
        Parses AVA per frame labels .csv file.
        Args:
            frame_labels_file (str): Path to the file containing labels
                per key frame. Acceptible file formats are,
                Type 1:
                    <original_vido_id, frame_time_stamp, bbox_x_1, bbox_y_1, ...
                    bbox_x_2, bbox_y_2, action_lable, detection_iou>
                Type 2:
                    <original_vido_id, frame_time_stamp, bbox_x_1, bbox_y_1, ...
                    bbox_x_2, bbox_y_2, action_lable, person_label>
            video_name_to_idx (dict): Dictionary mapping video names to indices.
            allowed_class_ids (set): A set of integer unique class (bbox label)
                id's that are allowed in the dataset. If not set, all class id's
                are allowed in the bbox labels.
        Returns:
            (dict): A dictionary of dictionary containing labels per each keyframe
                in each video. Here, the label for each keyframe is again a dict
                of the form,
                {
                    'labels': a list of bounding boxes
                    'boxes':a list of action lables for the bounding box
                    'extra_info': ist of extra information cotaining either
                        detections iou's or person id's depending on the
                        csv format.
                }
        r,r   r   r       N   r   r   r   )r   openstripsplitfloatr   AVA_VALID_FRAMESAVA_VIDEO_START_SEClistmapintr   r   )r+   r=   r:   labels_dictflinerow
video_name	video_idx	frame_secbboxlabelr   r&   r&   r'   r4      s8   !

&&z3AvaLabeledVideoFramePaths.load_and_parse_labels_csvr	   c                 C  s  g }i }g }t | dN}|  |D ]?}| }t|dks!J |d }||vr;t|}	|	||< || |i  || }
t|d }tj	||d ||
 |< qW d   n1 s]w   Y  g }t
t|D ]}|g  t|| }|D ]}|| || |  qyqj|||fS )aU  
        Loading image paths from the corresponding file.
        Args:
            frame_paths_file (str): Path to a file containing relative paths
                to all the frames in the video. Each line in the file is of the
                form <original_vido_id video_id frame_id rel_path labels>
            video_path_prefix (str): Path to be augumented to the each relative
                frame path to get the global frame path.
        Returns:
            (tuple): A tuple of the following,
                image_paths_list: List of list containing absolute frame paths.
                    Wherein the outer list is per video and inner list is per
                    timestamp.
                video_idx_to_name: A dictionary mapping video index to name
                video_name_to_idx: A dictionary maoping video name to index
        rF      r   r   r   N)r   rL   readlinerN   r   r   rT   r6   r7   joinr   sorted)r)   r,   r;   r=   r<   rV   rW   rX   rY   r%   data_keyframe_idimage_paths_listr!   sorted_keyskeyr&   r&   r'   r3      s:   



z*AvaLabeledVideoFramePaths.load_image_listsc                 C  s   i }t  }d}d}t| d;}|D ]-}|dr!|dd }q|ds+|dr?t| dd	 }|||< || qW d
   ||fS 1 sMw   Y  ||fS )a  
        Read label map and class ids.
        Args:
            label_map_file (str): Path to a .pbtxt containing class id's
                and class names
        Returns:
            (tuple): A tuple of the following,
                label_map (dict): A dictionary mapping class id to
                    the associated class names.
                class_ids (set): A set of integer unique class id's
        rJ   rF   z  name:"r   z  id:z  label_id: rH   N)setr   rL   
startswithrN   rT   rM   add)r-   	label_map	class_idsnameclass_idrV   rW   r&   r&   r'   r2      s$   


z(AvaLabeledVideoFramePaths.read_label_map)r   r   N)
r)   r*   r+   r*   r,   r*   r-   r.   r/   r   )r+   r*   r=   rD   r:   rE   )r)   r*   r,   r*   r/   r	   )r-   r*   r/   r	   )__name__
__module____qualname____doc__rR   r   rP   FPSrQ   classmethodr(   rC   staticmethodr4   r3   r2   r&   r&   r&   r'   r      s"    #?I4r   c                   @  s.   e Zd ZdZdddZdddZdddZdS )TimeStampClipSamplerz
    A sepcialized clip sampler for sampling video clips around specific
    timestamps. This is particularly used in datasets like Ava wherein only
    a specific subset of clips in the video have annotations
    clip_samplerr   r/   Nonec                 C  s
   || _ dS )z
        Args:
            clip_sampler (`pytorchvideo.data.ClipSampler`): Strategy used for sampling
                between the untrimmed clip boundary.
        N)ry   )selfry   r&   r&   r'   __init__"  s   
zTimeStampClipSampler.__init__last_clip_timerO   video_duration
annotationDict[str, Any]r   c                 C  s0   |d }|| j jd  }t||| j j dddS )a  
        Args:
            last_clip_time (float): Not used for TimeStampClipSampler.
            video_duration: (float): Not used for TimeStampClipSampler.
            annotation (Dict): Dict containing time step to sample aroud.
        Returns:
            clip_info (ClipInfo): includes the clip information of (clip_start_time,
            clip_end_time, clip_index, aug_index, is_last_clip). The times are in seconds.
            clip_index, aux_index and is_last_clip are always 0, 0 and True, respectively.
        r1   g       @r   T)ry   _clip_durationr   )r{   r}   r~   r   center_frame_secclip_start_secr&   r&   r'   __call__*  s   
zTimeStampClipSampler.__call__c                 C  s   d S rp   r&   )r{   r&   r&   r'   resetA  s   zTimeStampClipSampler.resetN)ry   r   r/   rz   )r}   rO   r~   rO   r   r   r/   r   )r/   rz   )rq   rr   rs   rt   r|   r   r   r&   r&   r&   r'   rx     s
    

rx   rJ   r)   r*   r+   r,   r-   r.   ry   r   video_samplerType[torch.utils.data.Sampler]	transformOptional[Callable[[dict], Any]]r/   rz   c                 C  s&   t | |||}t|t|||ddS )a  
    Args:
        frame_paths_file (str): Path to a file containing relative paths
            to all the frames in the video. Each line in the file is of the
            form <original_vido_id video_id frame_id rel_path labels>
        frame_labels_file (str): Path to the file containing containing labels
            per key frame. Acceptible file formats are,
            Type 1:
                <original_vido_id, frame_time_stamp, bbox_x_1, bbox_y_1, ...
                bbox_x_2, bbox_y_2, action_lable, detection_iou>
            Type 2:
                <original_vido_id, frame_time_stamp, bbox_x_1, bbox_y_1, ...
                bbox_x_2, bbox_y_2, action_lable, person_label>
        video_path_prefix (str): Path to be augumented to the each relative frame
            path to get the global frame path.
        label_map_file (str): Path to a .pbtxt containing class id's
            and class names. If not set, label_map is not loaded and bbox labels are
            not pruned based on allowable class_id's in label_map.
        clip_sampler (ClipSampler): Defines how clips should be sampled from each
                video.
        video_sampler (Type[torch.utils.data.Sampler]): Sampler for the internal
                video container. This defines the order videos are decoded and,
                if necessary, the distributed split.
        transform (Optional[Callable]): This callable is evaluated on the clip output
            and the corresponding bounding boxes before the clip and the bounding boxes
            are returned. It can be used for user defined preprocessing and
            augmentations to the clips. If transform is None, the clip and bounding
            boxes are returned as it is.
    F)r?   ry   r   r   decode_audio)r   rC   r   rx   )r)   r+   r,   r-   ry   r   r   r?   r&   r&   r'   AvaE  s   &r   )r)   r*   r+   r*   r,   r*   r-   r.   ry   r   r   r   r   r   r/   rz   )
__future__r   r6   collectionsr   typingr   r   r   r   r   r	   r
   torchiopath.common.file_ior   pytorchvideo.data.clip_samplingr   r   'pytorchvideo.data.labeled_video_datasetr   r   rx   utilsdataRandomSamplerr   r&   r&   r&   r'   <module>   s$   $  -