o
    ϯi,                     @   s   d dl Z d dlZd dlmZmZmZmZmZ d dlZd dl	Z
d dlZd dlmZ ddlmZmZmZ e eZG dd deZ	 ddejjjd	ed
edejjjdededeeef fddZdS )    N)BinaryIODictListOptionalTuple)EncodedVideo   )pts_to_secssecs_to_ptsthwc_to_cthwc                	   @   s   e Zd ZdZ		ddedee deddfdd	Ze	dee fd
dZ
e	defddZdededeeeej f fddZdd ZdejfdededefddZdS )EncodedVideoPyAVz
    EncodedVideoPyAV is an abstraction for accessing clips from an encoded video using
    PyAV as the decoding backend. It supports selective decoding when header information
    is available.
    NTfile
video_namedecode_audioreturnc              
   C   s  || _ || _zt|| _W n ty$ } z
td| d| d}~ww | jdu s3t| jjj	dkr:td| | jjj	d }|j
| _|j| _| jdu rQd| _|j}d}d| _| jr| jjj| _| jr| jjjd j
| _| jjjd j| _| jdu rd| _| jjjd j}d\| _| _| _|du r|du rd| _|  \| _| _| jdu rtd	| jd
 d }| jdur| jd
 d }|dur|durtt|| j| jt|| j| j| _dS |durt|| j| j| _dS |durt|| j| j| _dS dS )z
        Args:
            file (BinaryIO): a file-like object (e.g. io.BytesIO or io.StringIO) that
                contains the encoded video.
        zFailed to open video . Nr   zVideo stream not found         )NNTFzUnable to decode video streamr   )_video_name_decode_audioavopen
_container	ExceptionRuntimeErrorlenstreamsvideo	time_base_video_time_base
start_time_video_start_ptsduration
_has_audioaudio_audio_time_base_audio_start_pts_video_audio_selective_decoding_pyav_decode_videomaxr	   	_duration)selfr   r   r   evideo_streamvideo_durationaudio_duration r2   X/home/ubuntu/.local/lib/python3.10/site-packages/pytorchvideo/data/encoded_video_pyav.py__init__   sj   










zEncodedVideoPyAV.__init__c                 C      | j S )zQ
        Returns:
            name: the name of the stored video if set.
        )r   r-   r2   r2   r3   namef      zEncodedVideoPyAV.namec                 C   r5   )zZ
        Returns:
            duration: the video's duration/end-time in seconds.
        )r,   r6   r2   r2   r3   r"   n   r8   zEncodedVideoPyAV.duration	start_secend_secc              	      s2  | j r| ||\| _| _d}| jdur3t|| j| jddt|| j| jddfdd| jD }d}| jri| jdurit|| j| j	ddt|| j| j	dd  fdd| jD }t
j|dd}|t
j}|du sst|dkrtd	| d
| d| j d d}|durtt
|t
j}||dS )a  
        Retrieves frames from the encoded video at the specified start and end times
        in seconds (the video always starts at 0 seconds). Returned frames will be in
        [start_sec, end_sec). Note that 1) if you want to avoid float precision issue
        and need accurate frames, please use Fraction for start_sec and end_sec.
        2) As end_sec is exclusive, so you may need to use
        `get_clip(start_sec, duration + EPS)` to get the last frame.

        Args:
            start_sec (float): the clip start time in seconds
            end_sec (float): the clip end time in seconds
        Returns:
            clip_data:
                A dictionary mapping the entries at "video" and "audio" to a tensors.

                "video": A tensor of the clip's RGB frames with shape:
                (channel, time, height, width). The frames are of type torch.float32 and
                in the range [0 - 255].

                "audio": A tensor of the clip's audio samples with shape:
                (samples). The samples are of type torch.float32 and
                in the range [0 - 255].

            Returns None if no video or audio found within time range.

        Nceil
round_modec                    $   g | ]\}}|kr| k r|qS r2   r2   .0fpts)video_end_ptsvideo_start_ptsr2   r3   
<listcomp>   
    z-EncodedVideoPyAV.get_clip.<locals>.<listcomp>c                    r>   r2   r2   r?   )audio_end_ptsaudio_start_ptsr2   r3   rE      rF   r   axiszNo video found within z and z- seconds. Video starts at time 0 and ends at .)r   r$   )r)   r*   r'   r(   r
   r   r!   r#   r%   r&   torchcattofloat32r   loggerdebugr"   r   stack)r-   r9   r:   video_framesaudio_samplesr2   )rG   rH   rC   rD   r3   get_clipv   sd   
zEncodedVideoPyAV.get_clipc                 C   s   | j dur| j   dS dS )z6
        Closes the internal video container.
        N)r   closer6   r2   r2   r3   rV      s   
zEncodedVideoPyAV.closer   
start_secsend_secsc           	   
   C   s,  d}d}zpt | jt|| j| jddt|| j| jdd| jjjd ddi\}}t|dkr4dd |D }| jrkt | jt|| j	| j
ddt|| j	| j
dd| jjjd ddi\}}t|dkrpd	d |D }W ||fS W ||fS W ||fS  ty } ztd
| j d|  W Y d}~||fS d}~ww )z
        Selectively decodes a video between start_pts and end_pts in time units of the
        self._video's timebase.
        Nr;   r<   r   r   c                 S   s$   g | ]}t |  |jfqS r2   )rL   
from_numpyto_rgb
to_ndarrayrB   r@   framer2   r2   r3   rE      s    z7EncodedVideoPyAV._pyav_decode_video.<locals>.<listcomp>r$   c                 S   s*   g | ]}t tj| d d|jfqS )r   rI   )rL   rY   npmeanr[   rB   r\   r2   r2   r3   rE     s    zFailed to decode video: r   )_pyav_decode_streamr   r
   r   r!   r   r   r   r#   r%   r&   r$   r   rP   rQ   r   )	r-   rW   rX   video_and_ptsaudio_and_ptspyav_video_frames_pyav_audio_framesr.   r2   r2   r3   r*      sp   "z#EncodedVideoPyAV._pyav_decode_video)NT)__name__
__module____qualname____doc__r   r   strboolr4   propertyr7   floatr"   r   rL   TensorrU   rV   mathinfr*   r2   r2   r2   r3   r      sB    	
M
[r   	container	start_ptsend_ptsstreamstream_namebuffer_sizer   c                    s   d}t || d}| jt|dd|d i  d}| jd	i |D ]}	t ||	j}|	j|kr8|	j|k r8|	 |	j< q |	j|kr? nq  fddt D }
|
|fS )
aF  
    Decode the video with PyAV decoder.
    Args:
        container (container): PyAV container.
        start_pts (int): the starting Presentation TimeStamp to fetch the
            video frames.
        end_pts (int): the ending Presentation TimeStamp of the decoded frames.
        stream (stream): PyAV stream.
        stream_name (dict): a dictionary of streams. For example, {"video": 0}
            means video stream at stream index 0.
    Returns:
        result (list): list of decoded frames.
        max_pts (int): max Presentation TimeStamp of the video sequence.
    i   r   FT)	any_framebackwardrt   c                    s   g | ]} | qS r2   r2   )r@   rB   framesr2   r3   rE   @  s    z'_pyav_decode_stream.<locals>.<listcomp>Nr2   )r+   seekintdecoderB   sorted)rq   rr   rs   rt   ru   rv   marginseek_offsetmax_ptsr]   resultr2   ry   r3   r`     s   
r`   )r   ) loggingro   typingr   r   r   r   r   r   numpyr^   rL   pytorchvideo.data.encoded_videor   utilsr	   r
   r   	getLoggerrf   rP   r   rq   inputInputContainerr|   r   rt   VideoStreamdictrm   r`   r2   r2   r2   r3   <module>   s8   
  
