o
    پi>                     @   s|   d Z ddlZddlmZ ddlmZ ddlmZ ddlm	Z	m
Z
 e  ddlmZ dd	lmZ d
gZeG dd
 d
eZdS )zpCustomized dataloader for general video classification tasks.
Code adapted from https://github.com/dmlc/gluon-cv    N   )	use_mxnet)try_import_mxnet)VideoReader)cpugpu)numpy)use_npVideoClsCustomc                       s   e Zd ZdZdddddddddddd	d
d
ddddddedf fdd	Zdd Zdd Zdd Zdd Z	dd Z
dd Zdd Zdd  Zd!d" Zd#d$ Z  ZS )%r
   a  Load your own video classification dataset.

    Parameters
    ----------
    root : str, required.
        Path to the root folder storing the dataset.
    setting : str, required.
        A text file describing the dataset, each line per video sample.
        There are three items in each line: (1) video path; (2) video length and (3) video label.
    train : bool, default True.
        Whether to load the training or validation set.
    test_mode : bool, default False.
        Whether to perform evaluation on the test set.
        Usually there is three-crop or ten-crop evaluation strategy involved.
    name_pattern : str, default None.
        The naming pattern of the decoded video frames.
        For example, img_00012.jpg.
    video_ext : str, default 'mp4'.
        Please specify the video format accordinly.
    is_color : bool, default True.
        Whether the loaded image is color or grayscale.
    modality : str, default 'rgb'.
        Input modalities, we support only rgb video frames for now.
        Will add support for rgb difference image and optical flow image later.
    num_segments : int, default 1.
        Number of segments to evenly divide the video into clips.
        A useful technique to obtain global video-level information.
        Limin Wang, etal, Temporal Segment Networks: Towards Good Practices for Deep Action Recognition, ECCV 2016.
    num_crop : int, default 1.
        Number of crops for each image. default is 1.
        Common choices are three crops and ten crops during evaluation.
    new_length : int, default 1.
        The length of input video clip. Default is a single image, but it can be multiple video frames.
        For example, new_length=16 means we will extract a video clip of consecutive 16 frames.
    new_step : int, default 1.
        Temporal sampling rate. For example, new_step=1 means we will extract a video clip of consecutive frames.
        new_step=2 means we will extract a video clip of every other frame.
    new_width : int, default 340.
        Scale the width of loaded image to 'new_width' for later multiscale cropping and resizing.
    new_height : int, default 256.
        Scale the height of loaded image to 'new_height' for later multiscale cropping and resizing.
    target_width : int, default 224.
        Scale the width of transformed image to the same 'target_width' for batch forwarding.
    target_height : int, default 224.
        Scale the height of transformed image to the same 'target_height' for batch forwarding.
    temporal_jitter : bool, default False.
        Whether to temporally jitter if new_step > 1.
    transform : function, default None.
        A function that takes data and label and transforms them.
    slowfast : bool, default False.
        If set to True, use data loader designed for SlowFast network.
        Christoph Feichtenhofer, etal, SlowFast Networks for Video Recognition, ICCV 2019.
    slow_temporal_stride : int, default 16.
        The temporal stride for sparse sampling of video frames in slow branch of a SlowFast network.
    fast_temporal_stride : int, default 2.
        The temporal stride for sparse sampling of video frames in fast branch of a SlowFast network.
    lazy_init : bool, default False.
        If set to True, build a dataset instance without loading any dataset.
    ctx : decord.Context, default is cpu(0)
        Set the context used to load the video. Can be cpu() or gpu(xx)
    TFzimg_%05d.jpgmp4rgb   iT           r   Nr   c                    s  t t|   || _|| _|| _|| _|| _|| _|	| _	|
| _
|| _|| _|| _|| _| j| j | _|| _|| _|| _|| _|| _|| _|| _|| _|| _|| _|| _| jrn|| dks`J d|rfJ d|dksnJ d| js| ||| _t| jdkrtd| d d S d S )Nr   z]slow_temporal_stride needs to be multiples of slow_temporal_stride, please set it accordinly.zWSlowfast dataloader does not support temporal jitter. Please set temporal_jitter=False.r   zSSlowfast dataloader only support consecutive frames reading, please set new_step=1.z&Found 0 video clips in subfolders of: z*
Check your data directory (opt.data-dir).)superr
   __init__rootsettingtrain	test_modeis_colormodalitynum_segmentsnum_crop
new_height	new_width
new_lengthnew_stepskip_lengthtarget_heighttarget_width	transformtemporal_jittername_pattern	video_extslowfastslow_temporal_stridefast_temporal_stride	lazy_initctx_make_datasetclipslenRuntimeError)selfr   r   r   r   r$   r%   r   r   r   r   r   r   r   r   r!   r    r#   r&   r'   r(   r)   r"   r*   	__class__ K/home/ubuntu/.local/lib/python3.10/site-packages/decord/data/base_action.pyr   O   sF   zVideoClsCustom.__init__c                 C   s  t |tr
|\}}nd }| j| \}}}d|dd v r |}nd|| j}t|| j| j|d u r4| j	n|d}t
|}| jrJ| jsJ| |\}}	n| jsX| jsX| |\}}	n| |\}}	| jrl| |||||	}
n	| |||||	}
| jd ur| |
}
| jrt
|
| j| j  }tj|
dd}
|
d|d	| j| jf }
t|
d
}
ntj|
dd}
|
d| jd	| j| jf }
t|
d
}
| jdkrtj|
dd}
|
 |fS )N./z{}.{})widthheightr*   r   )axis)r6      )r   r   r   r:      r   r   )
isinstancetupler,   splitformatr%   r   r   r   r*   r-   r   r   _sample_train_indices_sample_val_indices_sample_test_indicesr&   !_video_TSN_decord_slowfast_loader_video_TSN_decord_batch_loaderr"   r   r   npstackreshaper    r!   	transposer   squeezeas_nd_ndarray)r/   indexr*   	directorydurationtarget
video_name	decord_vrsegment_indicesskip_offsets
clip_inputsparse_sampelsr2   r2   r3   __getitem__   s<   

"


zVideoClsCustom.__getitem__c                 C   s
   t | jS N)r-   r,   )r/   r2   r2   r3   __len__   s   
zVideoClsCustom.__len__c                    sB   fddt D      fddtt D } |fS )Nc                    s&   g | ]}t jt j |r|qS r2   )ospathisdirjoin).0d)rL   r2   r3   
<listcomp>   s   & z0VideoClsCustom._find_classes.<locals>.<listcomp>c                    s   i | ]} | |qS r2   r2   )r\   i)classesr2   r3   
<dictcomp>   s    z0VideoClsCustom._find_classes.<locals>.<dictcomp>)rX   listdirsortranger-   )r/   rL   class_to_idxr2   )r`   rL   r3   _find_classes   s   zVideoClsCustom._find_classesc                 C   s   t j|std| g }t|A}| }|D ]1}| }t|dk r+td| t j||d }t	|d }	t	|d }
||	|
f}|
| qW d    |S 1 sVw   Y  |S )NzFSetting file %s doesn't exist. Check opt.train-list and opt.val-list. r:   zBVideo input format is not correct, missing one or more element. %sr   r   r   )rX   rY   existsr.   open	readlinesr>   r-   r[   intappend)r/   rL   r   r,   split_fdataline	line_info	clip_pathrM   rN   itemr2   r2   r3   r+      s&   


zVideoClsCustom._make_datasetc                 C   s   || j  d | j }|dkr(tttt| j|}|tjj|| jd }n"|t	| j| j krCt
tjj|| j  d | jd}nt| jf}| jr[tjj| j| j | j d}ntj| j | j td}|d |fS )Nr   r   sizedtype)r   r   rE   multiplyarraylistrd   randomrandintmaxrc   zerosr#   r   rj   )r/   
num_framesaverage_durationoffsetsrR   r2   r2   r3   r@      s,   



z$VideoClsCustom._sample_train_indicesc                    s   || j | j d kr&|| j d t| j   t fddt| j D }nt| j f}| jr>tjj	| j
| j| j
 d}ntj| j| j
 td}|d |fS )Nr   c                        g | ]}t  d   |  qS g       @rj   r\   xtickr2   r3   r^          z6VideoClsCustom._sample_val_indices.<locals>.<listcomp>rr   rt   )r   r   floatrE   rw   rd   r|   r#   ry   rz   r   rj   r/   r}   r   rR   r2   r   r3   rA      s    
z"VideoClsCustom._sample_val_indicesc                    s   || j d kr#|| j  d t| j  t fddt| jD }nt| jf}| jr;tjj	| j
| j | j
 d}ntj| j | j
 td}|d |fS )Nr   c                    r   r   r   r   r   r2   r3   r^     r   z7VideoClsCustom._sample_test_indices.<locals>.<listcomp>rr   rt   )r   r   r   rE   rw   rd   r|   r#   ry   rz   r   rj   r   r2   r   r3   rB      s    
z#VideoClsCustom._sample_test_indicesc              
   C   s
  g }|D ]~}t |}ttd| j| jD ]m\}	}
zE|||	  |krAt  ||||	  d   }W d    n1 s;w   Y  nt  ||d   }W d    n1 sWw   Y  W n tye       td	|||
| || j |k r|| j7 }qq|S )Nr   r   z=Error occured in reading frames from video {} of duration {}.)rj   	enumeraterd   r   r   r   as_np_ndarrayKeyboardInterruptr.   r?   rk   )r/   rL   video_readerrM   indicesrR   sampled_listseg_indoffsetr_   _	vid_framer2   r2   r3   _video_TSN_decord_loader  s0   

z'VideoClsCustom._video_TSN_decord_loaderc              	      s   g }g }|D ];}t |}	ttd| j| jD ]*\}
}|	||
  |kr+|	||
  d }n|	d }|| |	| j |k r@|	| j7 }	qqz(t  ||  W d    n1 sXw   Y   fddt|D }W |S  t	yr       t
d|||)Nr   r   c                    ,   g | ]\}} |d d d d d d f qS rV   r2   r\   vidr   
video_datar2   r3   r^   3     , zAVideoClsCustom._video_TSN_decord_batch_loader.<locals>.<listcomp>@Error occured in reading frames {} from video {} of duration {}.)rj   r   rd   r   r   rk   r   	get_batchr   r   r.   r?   )r/   rL   r   rM   r   rR   r   frame_id_listr   r   r_   r   frame_idr2   r   r3   rD   #  s.   

z-VideoClsCustom._video_TSN_decord_batch_loaderc              	      sF  g }g }|D ]`}g }	g }
t |}ttd| j| jD ]A\}}|||  |kr/|||  d }n|d }|d | j dkrO|	| |d | j dkrO|
| || j |k r[|| j7 }q|	|
 ||	 qz(t	  |
|  W d    n1 s}w   Y   fddt|D }W |S  ty       td|||)Nr   r   c                    r   rV   r2   r   r   r2   r3   r^   U  r   zDVideoClsCustom._video_TSN_decord_slowfast_loader.<locals>.<listcomp>r   )rj   r   rd   r   r   r(   rk   r'   extendr   r   r   r   r.   r?   )r/   rL   r   rM   r   rR   r   r   r   fast_id_listslow_id_listr   r_   r   r   r2   r   r3   rC   :  s:   



z0VideoClsCustom._video_TSN_decord_slowfast_loader)__name__
__module____qualname____doc__r   r   rU   rW   rf   r+   r@   rA   rB   r   rD   rC   __classcell__r2   r2   r0   r3   r
      sD    @=/)r   rX   bridger   bridge.mxnetr   r   r   ndarrayr   r   mxnetr   rE   
mxnet.utilr	   __all__objectr
   r2   r2   r2   r3   <module>   s    