o
    0’×i‡=  ã                   @   sp  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZm	Z	m
Z
mZmZmZ d dlZd dlZddlmZ ddlmZ zd dlZej ejj¡ eejjjdƒsWed	ƒZW n eye   ed
ƒZY nw d<dd„Zdefdd„Z d a!da"						d=de#dej$de%de#dee	e#ef  deej$ dee% dee# dee	e#ef  ddfdd„Z&ddde%d e%d!e#d"d#d$e	e#eee'ee'd%f e
e' f  f de
d& fd'd(„Z(d)ej$d*e
d& d+e'd,e%dej$f
d-d.„Z)	 		/	0d>de#d1ee%ef d2eee%ef  d!e#d3e#deej$ej$e	e#ef f fd4d5„Z*dddefd6d7„Z+ddde
e' fd8d9„Z,d?de#d!e#dee
e' ee% f fd:d;„Z-dS )@é    N)ÚFraction)ÚAnyÚDictÚListÚOptionalÚTupleÚUnioné   )Ú_log_api_usage_onceé   )Ú
_video_optÚ	pict_typea+  Your version of PyAV is too old for the necessary video operations in torchvision.
If you are on Python 3.5, you will have to build from source (the conda-forge
packages are not up-to-date).  See
https://github.com/mikeboers/PyAV#installation for instructions on how to
install PyAV on your system.
z¼PyAV is not installed, and is necessary for the video operations in torchvision.
See https://github.com/mikeboers/PyAV#installation for instructions on how to
install PyAV on your system.
Úreturnc                   C   s   t ttƒrt‚d S ©N©Ú
isinstanceÚavÚ	Exception© r   r   úR/home/ubuntu/SoloSpeech/.venv/lib/python3.10/site-packages/torchvision/io/video.pyÚ_check_av_available'   s   
ÿr   c                   C   s   t ttƒ S r   r   r   r   r   r   Ú_av_available,   s   r   é
   Úlibx264ÚfilenameÚvideo_arrayÚfpsÚvideo_codecÚoptionsÚaudio_arrayÚ	audio_fpsÚaudio_codecÚaudio_optionsc	                 C   sâ  t j ¡ st j ¡ sttƒ tƒ  t j|t jd 	¡ }t
|tƒr%t |¡}tj| dd»}	|	j||d}
|jd |
_|jd |
_|dkrFdnd	|
_|pLi |
_|d
ur¶dddddddddddœ
}|	j||d}|pii |_|jd }|dkrvdnd}|	jjd jj}t || ¡}t  |¡ 	¡  |¡}tjj|||d}||_| |¡D ]}|	  |¡ q¢| ¡ D ]}|	  |¡ q®|D ]}tj!j|d	d}d|_"|
 |¡D ]}|	  |¡ qÊq¸|
 ¡ D ]}|	  |¡ q×W d
  ƒ d
S 1 sêw   Y  d
S )a|  
    Writes a 4d tensor in [T, H, W, C] format in a video file

    Args:
        filename (str): path where the video will be saved
        video_array (Tensor[T, H, W, C]): tensor containing the individual frames,
            as a uint8 tensor in [T, H, W, C] format
        fps (Number): video frames per second
        video_codec (str): the name of the video codec, i.e. "libx264", "h264", etc.
        options (Dict): dictionary containing options to be passed into the PyAV video stream
        audio_array (Tensor[C, N]): tensor containing the audio, where C is the number of channels
            and N is the number of samples
        audio_fps (Number): audio sample rate, typically 44100 or 48000
        audio_codec (str): the name of the audio codec, i.e. "mp3", "aac", etc.
        audio_options (Dict): dictionary containing options to be passed into the PyAV audio stream
    ©ÚdtypeÚw)Úmode)Úrater	   r   Ú
libx264rgbÚyuv420pÚrgb24Nz<f8z<f4z<i2z<i4Úu1)
ÚdblÚdblpÚfltÚfltpÚs16Ús16pÚs32Ús32pÚu8Úu8pr   ÚstereoÚmono)ÚformatÚlayout)r8   ÚNONE)#ÚtorchÚjitÚis_scriptingÚ
is_tracingr
   Úwrite_videor   Ú	as_tensorÚuint8Únumpyr   ÚfloatÚnpÚroundr   ÚopenÚ
add_streamÚshapeÚwidthÚheightÚpix_fmtr   ÚstreamsÚaudior8   Únamer$   ÚastypeÚ
AudioFrameÚfrom_ndarrayÚsample_rateÚencodeÚmuxÚ
VideoFramer   )r   r   r   r   r   r   r    r!   r"   Ú	containerÚstreamÚaudio_format_dtypesÚa_streamÚnum_channelsÚaudio_layoutÚaudio_sample_fmtÚformat_dtypeÚframeÚpacketÚimgr   r   r   r?   5   s^   


ö

ÿÿ"Ñr?   rV   zav.container.ContainerÚstart_offsetÚ
end_offsetÚpts_unitrW   zav.stream.StreamÚstream_name.zav.frame.Framec                    s  t d7 a t t td krt ¡  |dkr3tt ˆd|j  ¡ƒ‰ˆ tdƒkr2tt 	ˆ d|j  ¡ƒ‰ nt
 d¡ i ‰d}d}|jdkrs|jj}|rsd|v rs| d¡}	||	d … }
t d	|
¡}|d u rht d
|
¡}|d urs| d¡dk}ˆ}t|d dƒ}|r…t|| dƒ}z| j|dd|d W n tjyœ   g  Y S w d}z(t| jdi |¤ŽƒD ]\}}|ˆ|j< |jˆ krÅ|rÃ||k rÃ|d7 }qª nqªW n
 tjyÑ   Y nw ‡ ‡‡fdd„tˆƒD ƒ}tˆƒdkrˆdkrˆˆvr‡fdd„ˆD ƒ}t|ƒdkrt|ƒ}| dˆ| ¡ |S )Nr   ÚsecÚinfzBThe pts_unit 'pts' gives wrong results. Please use pts_unit 'sec'.Té   Úvideos   DivXs   DivX(\d+)Build(\d+)(\w)s   DivX(\d+)b(\d+)(\w)é   ó   pr   F)Ú	any_frameÚbackwardrW   c                    s2   g | ]}ˆˆ| j   krˆ krn nˆ| ‘qS r   ©Úpts©Ú.0Úi©rb   Úframesra   r   r   Ú
<listcomp>Ñ   s   2 z%_read_from_stream.<locals>.<listcomp>c                    s   g | ]}|ˆ k r|‘qS r   r   ro   )ra   r   r   rt   Ö   ó    r   )Ú_CALLED_TIMESÚ_GC_COLLECTION_INTERVALÚgcÚcollectÚintÚmathÚfloorÚ	time_baserC   ÚceilÚwarningsÚwarnÚtypeÚcodec_contextÚ	extradataÚfindÚreÚsearchÚgroupÚmaxÚseekr   ÚAVErrorÚ	enumerateÚdecodern   ÚsortedÚlenÚinsert)rV   ra   rb   rc   rW   rd   Úshould_bufferÚmax_buffer_sizerƒ   ÚposÚdÚoÚseek_offsetÚbuffer_countÚ_idxr^   ÚresultÚpreceding_framesÚfirst_frame_ptsr   rr   r   Ú_read_from_stream   sh   	€


ý

ü€þ"r›   ÚaframesÚaudio_framesÚ	ref_startÚref_endc           
      C   s|   |d j |d j }}| jd }|| d | }d}|}	||k r(t|| | ƒ}||kr4t|| | ƒ}	| d d …||	…f S )Nr   éÿÿÿÿr   )rn   rH   rz   )
rœ   r   rž   rŸ   ÚstartÚendÚtotal_aframesÚstep_per_aframeÚs_idxÚe_idxr   r   r   Ú_align_audio_framesÝ   s   
r§   rn   ÚTHWCÚ	start_ptsÚend_ptsÚoutput_formatc              	   C   sŒ  t j ¡ st j ¡ sttƒ | ¡ }|dvrtd|› dƒ‚ddlm	} |ƒ dkrBt
j | ¡s6td| › ƒ‚t | |||¡\}}}nòtƒ  |du rMtd	ƒ}||k r[td
|› d|› ƒ‚i }g }	g }
tj}zdtj| ddS}|jjrx|jjd j}|jjrœt|||||jjd ddiƒ}	|jjd j}|durœt|ƒ|d< |jjr¸t|||||jjd ddiƒ}
|jjd j|d< W d  ƒ n1 sÂw   Y  W n
 tjyÒ   Y nw dd„ |	D ƒ}dd„ |
D ƒ}|rìt  t |¡¡}nt j dt j!d}|r,t "|d¡}t  |¡}|dkr$t#t$ %|d|  ¡ƒ}|td	ƒkr$t#t$ &|d|  ¡ƒ}t'||
||ƒ}nt j dt j(d}|dkrA| )dddd¡}|||fS )aF  
    Reads a video from a file, returning both the video frames and the audio frames

    Args:
        filename (str): path to the video file. If using the pyav backend, this can be whatever ``av.open`` accepts.
        start_pts (int if pts_unit = 'pts', float / Fraction if pts_unit = 'sec', optional):
            The start presentation time of the video
        end_pts (int if pts_unit = 'pts', float / Fraction if pts_unit = 'sec', optional):
            The end presentation time
        pts_unit (str, optional): unit in which start_pts and end_pts values will be interpreted,
            either 'pts' or 'sec'. Defaults to 'pts'.
        output_format (str, optional): The format of the output video tensors. Can be either "THWC" (default) or "TCHW".

    Returns:
        vframes (Tensor[T, H, W, C] or Tensor[T, C, H, W]): the `T` video frames
        aframes (Tensor[K, L]): the audio frames, where `K` is the number of channels and `L` is the number of points
        info (Dict): metadata for the video and audio. Can contain the fields video_fps (float) and audio_fps (int)
    )r¨   ÚTCHWz5output_format should be either 'THWC' or 'TCHW', got Ú.r   ©Úget_video_backendÚpyavzFile not found: Nrf   z7end_pts should be larger than start_pts, got start_pts=z and end_pts=Úignore©Úmetadata_errorsrh   Ú	video_fpsrM   r    c                 S   s   g | ]}|  ¡  ¡ ‘qS r   )Úto_rgbÚ
to_ndarray©rp   r^   r   r   r   rt   C  ru   zread_video.<locals>.<listcomp>c                 S   s   g | ]}|  ¡ ‘qS r   )r¶   r·   r   r   r   rt   D  ó    )r   r   r   ri   r#   r   re   )r   r   r¬   ri   r	   )*r;   r<   r=   r>   r
   Ú
read_videoÚupperÚ
ValueErrorÚtorchvisionr¯   ÚosÚpathÚexistsÚRuntimeErrorr   Ú_read_videor   rC   Údefault_timebaser   rF   rL   rM   r}   rh   r›   Úaverage_rater'   rŠ   r@   rD   ÚstackÚemptyrA   Úconcatenaterz   r{   r|   r~   r§   Úfloat32Úpermute)r   r©   rª   rc   r«   r¯   Úvframesrœ   ÚinfoÚvideo_framesr   Úaudio_timebaserV   r´   Úvframes_listÚaframes_listr   r   r   r¹   ì   sŒ   
ÿ
ú
ú€æ€þ



r¹   c                 C   s*   | j d jj}|d u rdS d|v rdS dS )Nr   Fs   LavcT)rL   r‚   rƒ   )rV   rƒ   r   r   r   Ú!_can_read_timestamps_from_packets]  s   rÏ   c                 C   s4   t | ƒrdd„ | jddD ƒS dd„ | jddD ƒS )Nc                 S   ó   g | ]
}|j d ur|j ‘qS r   rm   ©rp   Úxr   r   r   rt   i  ó    z,_decode_video_timestamps.<locals>.<listcomp>r   )rh   c                 S   rÐ   r   rm   rÑ   r   r   r   rt   k  rÓ   )rÏ   ÚdemuxrŒ   )rV   r   r   r   Ú_decode_video_timestampsf  s   rÕ   c           	   
      sB  t j ¡ st j ¡ sttƒ ddlm} |ƒ dkrt 	| |¡S t
ƒ  d}g }zDtj| dd3}|jjrZ|jjd }|j‰ zt|ƒ}W n tjyT   t d| › ¡ Y nw t|jƒ}W d  ƒ n1 sdw   Y  W n! tjy‹ } zd| › d	|› }t |t¡ W Y d}~nd}~ww | ¡  |d
kr‡ fdd„|D ƒ}||fS )a   
    List the video frames timestamps.

    Note that the function decodes the whole video frame-by-frame.

    Args:
        filename (str): path to the video file
        pts_unit (str, optional): unit in which timestamp values will be returned
            either 'pts' or 'sec'. Defaults to 'pts'.

    Returns:
        pts (List[int] if pts_unit = 'pts', List[Fraction] if pts_unit = 'sec'):
            presentation timestamps for each one of the frames in the video.
        video_fps (float, optional): the frame rate for the video

    r   r®   r°   Nr±   r²   z Failed decoding frames for file zFailed to open container for z; Caught error: re   c                    s   g | ]}|ˆ  ‘qS r   r   rÑ   ©Úvideo_time_baser   r   rt   œ  r¸   z)read_video_timestamps.<locals>.<listcomp>)r;   r<   r=   r>   r
   Úread_video_timestampsr¼   r¯   r   Ú_read_video_timestampsr   r   rF   rL   rh   r}   rÕ   rŠ   r   r€   rC   rÃ   ÚRuntimeWarningÚsort)	r   rc   r¯   r´   rn   rV   Úvideo_streamÚeÚmsgr   rÖ   r   rØ   n  s>   
ÿ
€ø€	€þrØ   )r   N)r   NNNNN)r   Nrn   r¨   rm   ).rx   r{   r½   r…   r   Ú	fractionsr   Útypingr   r   r   r   r   r   rB   rD   r;   Úutilsr
   Ú r   r   ÚloggingÚ	set_levelÚERRORÚhasattrrh   r^   rU   ÚImportErrorr   Úboolr   rv   rw   ÚstrÚTensorrC   r?   rz   r›   r§   r¹   rÏ   rÕ   rØ   r   r   r   r   Ú<module>   sÊ     ÿ€	ÿÿ

÷ÿþýüûúùø	÷

öXÿþýüû$ú
ùPÿÿÿÿ
þûÿ
þýüû
úq	,