o
    Si{4                     @   s   d dl Z d dlZd dlZd dlmZ d dl mZmZ d dlmZ d dl	m
Z
mZ d dlmZmZmZmZ d dlZd dlZd dlZd dlmZ d dlmZmZmZmZ d d	lmZ d d
lm Z m!Z!m"Z"m#Z#m$Z$m%Z% ee&eef Z'eG dd dZ(dS )    N)	dataclass)BytesIOFileIO)Path)PIPErun)ListOptionalTupleUnion)
read_audio)DurationMismatchError	VideoInfoVideoLoadingError%get_audio_duration_mismatch_tolerance)
AudioCache)PathlikeSeconds	SmartOpenasdict_nonullcompute_num_samplesfastcopyc                   @   sH  e Zd ZU dZeed< 	 ee ed< 	 eee	f ed< 	 dZ
ee ed< 	 edefdd	Zedefd
dZ			d'dedee dee dejfddZ			d(dedee dedeejeej f fddZdededd fddZdedd fddZdefddZed)dd Zd!d" Z dedee de!fd#d$Z"defd%d&Z#dS )*AudioSourcezQ
    AudioSource represents audio data that can be retrieved from somewhere.
    typechannelssourceNvideoreturnc                 C   s
   | j d uS Nr   self r"   G/home/ubuntu/.local/lib/python3.10/site-packages/lhotse/audio/source.py	has_videoB   s   
zAudioSource.has_videoc                 C   s   |   S r   )_get_formatr    r"   r"   r#   formatF   s   zAudioSource.format        offsetdurationforce_opus_sampling_ratec           	      C   s   | j ||d}t||||d\}}|dur<t|jdkr!|jd n|jd }|| }||t  k r<td| d| d|tjS )	a8  
        Load the AudioSource (from files, commands, or URLs) with soundfile,
        accounting for many audio formats and multi-channel inputs.
        Returns numpy array with shapes: (n_samples,) for single-channel,
        (n_channels, n_samples) for multi-channel.

        Note: The elements in the returned array are in the range [-1.0, 1.0]
        and are of dtype `np.float32`.

        :param force_opus_sampling_rate: This parameter is only used when we detect an OPUS file.
            It will tell ffmpeg to resample OPUS to this sampling rate.
        )r(   r)   )r(   r)   r*   N   r   zRequested more audio (zs) than available (zs))	_prepare_for_readingr   lenshaper   r   astypenpfloat32)	r!   r(   r)   r*   r   samplessampling_ratenum_samplesavailable_durationr"   r"   r#   
load_audioJ   s    
 zAudioSource.load_audioT
with_audioc                 C   s  dd l }z;|j| j}|j}i }i }t|D ]!}	||	}
|
jdkr)|
||	< q|
jdkr3|
||	< qtd|
 t	|dksDJ dt	|dk sTJ dt	| dt	|dk sdJ d	t	| d
t
| \\}}t|j}|| jj }|j||| jj| jjd |rt	|dkrt
| \\}}t|j| }|j||d || g }g }d}| D ]b}|d ur||kr nW|d }|d}||j }|d ur|| |krt|| |j}|d | }|| |r|d }|d ur|| |krt|| |j}|d | }||j ||7 }q|s)tjdd|j|jtjdd fW S tj|dd}d }|r<tj|dd}||fW S  tyh } zt dt!| jt"sU| jnd dt#| dt$| d }~ww )Nr   r   audiozUnexpected media_type: z1The file does not seem to have any video streams.   zNLhotse currently does not support more than one video stream in a file (found z).zNLhotse currently does not support more than one audio stream in a file (found zH). Note: it's different than multi-channel which is generally supported.)stream_indexheightwidth)r:   r'   r+      )dtype)dimzReading video from 'memoryz' failed. Details: z: )%
torchaudioioStreamReaderr   num_src_streamsrangeget_src_stream_info
media_typeRuntimeErrorr-   listitemsround
frame_rater   frame_lengthadd_basic_video_streamr;   r<   sample_rateadd_basic_audio_streamseekstreamsizer   appendTtorchzerosuint8cat	Exceptionr   
isinstancebytesr   str)r!   r(   r)   r7   rA   rR   num_streamsaudio_streamsvideo_streams
stream_idxinfovideo_stream_idxvideo_streamframes_per_chunkvideo_chunk_durationaudio_stream_idxaudio_streamsamples_per_chunkvideo_chunksaudio_chunksdecoded_durationchunkvideo_chunk
chunk_sizecurrent_chunk_durationkeep_framesaudio_chunkkeep_samplesoutput_videooutput_audioer"   r"   r#   
load_videot   s   












zAudioSource.load_videor<   r;   c                 C   s   t | | jj||ddS )N)r<   r;   r   )r   r   	copy_with)r!   r<   r;   r"   r"   r#   with_video_resolution   s   z!AudioSource.with_video_resolutionpathc                 C   s(   | j dkr| S t| tt|| j dS )Nfile)r   )r   r   r]   r   r   )r!   rz   r"   r"   r#   with_path_prefix   s   
zAudioSource.with_path_prefixc                 C   s   t | S r   )r   r    r"   r"   r#   to_dict   s   zAudioSource.to_dictc                 C   s(   d| v rt | d | d< tdi | S )Nr   r"   )r   	from_dictr   )datar"   r"   r#   r~      s   zAudioSource.from_dictc                 C   s4   d| j  d| j dt| jtr| j dS d dS )NzAudioSource(type='z', channels=z
, source='z<binary-data>z'))r   r   r[   r   r]   r    r"   r"   r#   __repr__   s   zAudioSource.__repr__c                 C   sh  | j dv sJ d| j  d| j}| j dkrE|dks|dur't s'td t| j}|s?t| jdtd	j	}t
| j| t|}|S | j d
kr|dksV|dur[t s[td t| j}|st| jd}| }W d   n1 syw   Y  t
| j| t|}|S | j dkrt| jtsJ dt | jj dt| j}|S | j dkrtd|S )z
        Validates `self.type` and prepares the actual source for audio reading.
        Returns either a path or a file-like object opened in binary mode,
        that can be handled by :func:`lhotse.audio.backend.read_audio`.
        )r{   commandurlr@   sharzUnexpected AudioSource type: ''r   r'   Na(  You requested a subset of a recording that is read from disk via a bash command. Expect large I/O overhead if you are going to read many chunks like these, since every time we will read the whole file rather than its subset.You can use `lhotse.set_caching_enabled(True)` to mitigate the overhead.T)shellstdoutr   a  You requested a subset of a recording that is read from URL. Expect large I/O overhead if you are going to read many chunks like these, since every time we will download the whole file rather than its subset.You can use `lhotse.set_caching_enabled(True)` to mitigate the overhead.rbr@   zxCorrupted manifest: specified AudioSource type is 'memory', but 'self.source' attribute is not of type 'bytes' (found: 'z').r   zqInconsistent state: found an AudioSource with Lhotse Shar placeholder that was not filled during deserialization.)r   r   r   enabledwarningswarn	try_cacher   r   r   add_to_cacher   r   openreadr[   r\   __name__rH   )r!   r(   r)   r   audio_bytesfr"   r"   r#   r,     sR   
	$
	



	
z AudioSource._prepare_for_readingc                 C   sz   | j dv rtj| jd dd }| S | j dkr5tt	| j}|j
dkr0|jdkr0dS |j
 S td	| j  )
zGet format for the audio source.
        If using 'file' or 'url' types, the format is inferred from the file extension, as in soundfile.
        If using 'memory' type, the format is inferred from the binary data.
        )r{   r   r+   Nr@   OGGOPUSopusz/Getting format not implemented for source type )r   osrz   splitextr   lowersfrb   rB   r   r&   subtypeNotImplementedError)r!   r&   sf_infor"   r"   r#   r%   I  s   



zAudioSource._get_format)r'   NN)r'   NT)r   r   )$r   
__module____qualname____doc__r]   __annotations__r   intr   r\   r   r	   r   propertyboolr$   r&   r   r0   ndarrayr6   r
   rV   Tensorrw   ry   r   r|   dictr}   staticmethodr~   r   PathOrFileliker,   r%   r"   r"   r"   r#   r   "   sh   
 	
,
v
Hr   ))rB   r   r   dataclassesr   r   r   pathlibr   
subprocessr   r   typingr   r	   r
   r   numpyr0   	soundfiler   rV   lhotse.audio.backendr   lhotse.audio.utilsr   r   r   r   lhotse.cachingr   lhotse.utilsr   r   r   r   r   r   r]   r   r   r"   r"   r"   r#   <module>   s$     	