o
    ϯi!                     @   sv   d dl Z d dlmZ d dlmZmZmZ d dlZd dl	Z	ddl
mZmZmZ ddlmZ e eZG dd deZdS )	    N)Fraction)BinaryIODictOptional   )pts_to_secssecs_to_ptsthwc_to_cthw)Videoc                	   @   s   e Zd ZdZ	 dZ		ddedee deddfd	d
Z	e
dee fddZe
defddZdd Zdededeeeej f fddZ	ddededefddZdS )EncodedVideoTorchVisionz

    Accessing clips from an encoded video using Torchvision video reading API
    (torch.ops.video_reader.read_video_from_memory) as the decoding backend.
    g      ?NTfile
video_namedecode_audioreturnc              	   C   s   t tj| tjd| _|| _|| _| 	 \| _
| _| _}| _| _| _}|d urB|d urBtt|| j| jt|| j| j| _d S |d urQt|| j| j| _d S |d ur`t|| j| j| _d S d S )N)dtype)torchtensornp
frombuffergetvalueuint8_video_tensor_video_name_decode_audio_torch_vision_decode_video_video_video_time_base_video_start_pts_audio_audio_time_base_audio_start_ptsmaxr   	_duration)selfr   r   r   video_durationaudio_duration r&   _/home/ubuntu/.local/lib/python3.10/site-packages/pytorchvideo/data/encoded_video_torchvision.py__init__   sB   






z EncodedVideoTorchVision.__init__c                 C      | j S )zQ
        Returns:
            name: the name of the stored video if set.
        )r   r#   r&   r&   r'   nameI      zEncodedVideoTorchVision.namec                 C   r)   )zZ
        Returns:
            duration: the video's duration/end-time in seconds.
        )r"   r*   r&   r&   r'   durationQ   r,   z EncodedVideoTorchVision.durationc                 C   s   d S )Nr&   r*   r&   r&   r'   closeY   s   zEncodedVideoTorchVision.close	start_secend_secc              	      s  d}| j dur&t|| j| jddt|| j| jddfdd| j D }d}| jrZ| jrZt|| j| jddt|| j| jdd  fdd| jD }tj	|dd}|
tj}|du sdt|dkrvtd	| d
| d| j d d}|durtt|
tj}||dS )a  
        Retrieves frames from the encoded video at the specified start and end times
        in seconds (the video always starts at 0 seconds). Returned frames will be in
        [start_sec, end_sec). Note that 1) if you want to avoid float precision issue
        and need accurate frames, please use Fraction for start_sec and end_sec.
        2) As end_sec is exclusive, so you may need to use
        `get_clip(start_sec, duration + EPS)` to get the last frame.

        Args:
            start_sec (float): the clip start time in seconds
            end_sec (float): the clip end time in seconds
        Returns:
            clip_data:
                A dictionary mapping the entries at "video" and "audio" to a tensors.

                "video": A tensor of the clip's RGB frames with shape:
                (channel, time, height, width). The frames are of type torch.float32 and
                in the range [0 - 255].

                "audio": A tensor of the clip's audio samples with shape:
                (samples). The samples are of type torch.float32 and
                in the range [0 - 255].

            Returns None if no video or audio found within time range.

        Nceil)
round_modec                    $   g | ]\}}|kr| k r|qS r&   r&   .0fpts)video_end_ptsvideo_start_ptsr&   r'   
<listcomp>   
    z4EncodedVideoTorchVision.get_clip.<locals>.<listcomp>c                    r3   r&   r&   r4   )audio_end_ptsaudio_start_ptsr&   r'   r:      r;   r   )axiszNo video found within z and z- seconds. Video starts at time 0 and ends at .)videoaudio)r   r   r   r   r   r   r   r    r   cattofloat32lenloggerwarningr-   r	   stack)r#   r/   r0   video_framesaudio_samplesr&   )r<   r=   r8   r9   r'   get_clip\   s`   
z EncodedVideoTorchVision.get_clipr   	start_ptsend_ptsc           "      C   s  d}d}d\}}}}||}	}
d\}}d\}}||}}d\}}zt jj| j| jdd|||||	|
||| j||||||}W n tyX } zt	d| j
 d|  |d}~ww |\
}}}}}}}}}}|dk rpt|d	 }nt|}tt||}t|d }	tt|d t|d }d}d} d}d}!| jr|dk rt|d	 }!nt|}!tt||}t|d }tt|d t|d } |||	||| ||!fS )
zH
        Decode the video in the PTS range [start_pts, end_pts]
        N)r   r   r   r   )r   r   )r   r   r   r   zFailed to decode video of name z. rL   )r   opsvideo_readerread_video_from_memoryr   SEEK_FRAME_MARGINr   	ExceptionrF   rG   r   intlistzipr   )"r#   rM   rN   video_and_ptsaudio_and_ptswidthheightmin_dimensionmax_dimensionr9   r8   video_timebase_numvideo_timebase_densampleschannelsr=   r<   audio_timebase_numaudio_timebase_den	tv_resultevframesvframes_pts	vtimebase_	vdurationaframes
aframe_pts	atimebase	adurationr$   video_time_baseaudio_time_baser%   r&   r&   r'   r      s   

z2EncodedVideoTorchVision._torch_vision_decode_video)NT)r   rL   )__name__
__module____qualname____doc__rR   r   r   strboolr(   propertyr+   floatr-   r.   r   r   TensorrK   rT   r   r&   r&   r&   r'   r      sF    
+
Xr   )logging	fractionsr   typingr   r   r   numpyr   r   utilsr   r   r	   r@   r
   	getLoggerrp   rF   r   r&   r&   r&   r'   <module>   s   
