o
    Si                     @   s  U d dl Z d dlZd dlZd dlZd dlZd dlZd dl mZ d dlmZ d dl	m
Z
mZ d dlmZ d dlmZmZmZ d dlmZmZmZmZmZmZmZ d dlZd dlZd dlmZmZm Z m!Z! d d	l"m#Z# d d
l$m%Z%m&Z&m'Z'm(Z( e( a)e*e+d< da,ed e+d< dZ-dee. fddZ/edede.f ded fddZ0d|ddZ1dede.f ddfddZ2eddd|ddZ3de*ddfdd Z4de*fd!d"Z5eZ6G d#d dZ7G d$d% d%e7Z8G d&d' d'e7Z9G d(d) d)e7Z:G d*d+ d+e7Z;G d,d- d-e7Z<G d.d/ d/e7Z=G d0d1 d1e7Z>G d2d3 d3e7Z?G d4d5 d5eZ@eddde*fd6d7ZAeddde*fd8d9ZBeddde*fd:d;ZCd<e.de*fd=d>ZDd?eee.e
f de@fd@dAZEd?eee.e
f de@fdBdCZF	d}dDe%dEe&dFee& deejGeHf fdGdHZI	 		d~dDee%e
f dEe&dFee& dIeeH deejGeHf f
dJdKZJ	 	d}d?eee.e
f dEe&dFee& deejGeHf fdLdMZK	d}dDe%dEe&dFee& deejGeHf fdNdOZLdPe%de@fdQdRZMedddSdT ZNdUdejOfdVee%e6f dEe&dFe&fdWdXZPdYejOfdZd[ZQ	ddPe%d\eeH de@fd]d^ZR	U		ddPe%dEe&dFee& d\eeH deejGeHf f
d_d`ZS	U		ddPe%dEe&dFee& d\eeH deejGeHf f
dadbZT	U		ddPe%dEe&dFee& d\eeH deejGeHf f
dcddZUdeeVde.fdfdgZWdPe%de@fdhdiZXdPe%de@fdjdkZY	ddle%dEe&dFee& deejGeHf fdmdnZZ		ddoee.ee
f dpeej[ejGf dqeHdree. dsee. ddfdtduZ\	U		ddDee%e6f dEe&dFee& d\eeH deejGeHf f
dvdwZ]		xddPee%e
f d\eeH dye*de@fdzd{Z^dS )    N)contextmanager)	lru_cache)BytesIOIOBase)Path)PIPECalledProcessErrorrun)Any	GeneratorList
NamedTupleOptionalTupleUnion)AudioLoadingErrorAudioSavingError	VideoInfo verbose_audio_loading_exceptions)Resample)PathlikeSecondscompute_num_samplesis_torchaudio_available_FFMPEG_TORCHAUDIO_INFO_ENABLEDAudioBackendCURRENT_AUDIO_BACKEND)z.aviz.mov.mp4.m4az.wmvz.mkvz.webmz.flvreturnc                   C   s   dgt tj  S )zR
    Return a list of names of available audio backends, including "default".
    default)sortedr   KNOWN_BACKENDSkeys r$   r$   H/home/ubuntu/.local/lib/python3.10/site-packages/lhotse/audio/backend.pyavailable_audio_backends)   s   r&   backend)r   NNc                 c   s"    t  }t| }|V  t| dS )a  
    Context manager that sets Lhotse's audio backend to the specified value
    and restores the previous audio backend at the end of its scope.

    Example::

        >>> with audio_backend("LibsndfileBackend"):
        ...     some_audio_loading_fn()
    N)get_current_audio_backendset_current_audio_backend)r'   previousbr$   r$   r%   audio_backend0   s
   r,   c                  C   s8   t durt S tjd} | durt|  t S td t S )zI
    Return the audio backend currently set by the user, or default.
    NLHOTSE_AUDIO_BACKENDr    )r   osenvirongetr)   )maybe_backendr$   r$   r%   r(   C   s   r(   c                 C   sb   | dkrt  } | atS t| trt| } | atS t| tr!|  } t| ts-J d|  | atS )a5  
    Force Lhotse to use a specific audio backend to read every audio file,
    overriding the default behaviour of educated guessing + trial-and-error.

    Example forcing Lhotse to use ``audioread`` library for every audio loading operation::

        >>> set_current_audio_backend(AudioreadBackend())
    r    z#Expected str or AudioBackend, got: )get_default_audio_backend
isinstancestrr   newtyper   r'   r$   r$   r%   r)   X   s"   
	


r)      )maxsizec                  C   sF   g } t jdddkr| t  | t t t t t	 g7 } t
| S )a  
    Return a backend that can be used to read all audio formats supported by Lhotse.

    It first looks for special cases that need very specific handling
    (such as: opus, sphere/shorten, in-memory buffers)
    and tries to match them against relevant audio backends.

    Then, it tries to use several audio loading libraries (torchaudio, soundfile, audioread).
    In case the first fails, it tries the next one, and so on.
    LHOTSE_LEGACY_OPUS_LOADING01)r.   r/   r0   appendFfmpegSubprocessOpusBackendSph2pipeSubprocessBackendLibsndfileBackendTorchaudioFFMPEGBackendTorchaudioDefaultBackendAudioreadBackendCompositeAudioBackendbackendsr$   r$   r%   r2   p   s   r2   enabledc                 C   s(   | t krtdt  d|  d | a dS )a  
    Override Lhotse's global setting for whether to use ffmpeg-torchaudio to
    compute the duration of audio files. If disabled, we fall back to using a different
    backend such as sox_io or soundfile.

    .. note:: See this issue for more details: https://github.com/lhotse-speech/lhotse/issues/1026

    Example::

        >>> import lhotse
        >>> lhotse.set_ffmpeg_torchaudio_info_enabled(False)  # don't use ffmpeg-torchaudio

    :param enabled: Whether to use torchaudio to compute audio file duration.
    zThe user overrided the global setting for whether to use ffmpeg-torchaudio to compute the duration of audio files. Old setting: z. New setting: .N)r   logginginfo)rG   r$   r$   r%   "set_ffmpeg_torchaudio_info_enabled   s   rK   c                   C   s   t S )a  
    Return FFMPEG_TORCHAUDIO_INFO_ENABLED, which is Lhotse's global setting for whether to
    use ffmpeg-torchaudio to compute the duration of audio files.

    Example::

        >>> import lhotse
        >>> lhotse.get_ffmpeg_torchaudio_info_enabled()
    )r   r$   r$   r$   r%   "get_ffmpeg_torchaudio_info_enabled   s   
rL   c                       s2  e Zd ZdZi Z fddZededd fddZ					d!d
e	e
ef dedee dee deejef f
ddZ		d"d
e	e
ef dee fddZd
e	e
ef defddZd
e	e
ef defddZdefddZdefddZ				d#de	eeef de	ejejf dedee dee dd	fdd Z  ZS )$r   a  
    Internal Lhotse abstraction. An AudioBackend defines three methods:
    one for reading audio, and two filters that help determine if it should be used.

    ``handles_special_case`` means this backend should be exclusively
    used for a given type of input path/file.

    ``is_applicable`` means this backend most likely can be used for a given type of input path/file,
    but it may also fail. Its purpose is more to filter out formats that definitely are not supported.
    c                    s.   | j tjvr| tj| j < t jdi | d S )Nr$   )__name__r   r"   super__init_subclass__)clskwargs	__class__r$   r%   rO      s   zAudioBackend.__init_subclass__namer   c                 C   s$   || j vrtd| | j |  S )NzUnknown audio backend name: )r"   RuntimeError)rP   rT   r$   r$   r%   r5      s   
zAudioBackend.new        N
path_or_fdoffsetdurationforce_opus_sampling_ratec                 C      t  NNotImplementedErrorselfrW   rX   rY   rZ   r$   r$   r%   
read_audio   s   zAudioBackend.read_audioc                 C   r[   r\   r]   r`   rW   rZ   r$   r$   r%   rJ      s   zAudioBackend.infoc                 C      dS NFr$   r`   rW   r$   r$   r%   handles_special_case      z!AudioBackend.handles_special_casec                 C   rc   NTr$   re   r$   r$   r%   is_applicable   rg   zAudioBackend.is_applicablec                 C   rc   rd   r$   r`   r$   r$   r%   supports_save   rg   zAudioBackend.supports_savec                 C   rc   rd   r$   rj   r$   r$   r%   supports_info   rg   zAudioBackend.supports_infodestsrcsampling_rateformatencodingc                 C   r[   r\   r]   )r`   rm   rn   ro   rp   rq   r$   r$   r%   
save_audio   s   zAudioBackend.save_audiorV   NNr\   NN)rM   
__module____qualname____doc__r"   rO   classmethodr4   r5   r   r   
FileObjectr   r   intr   npndarrayra   rJ   boolrf   ri   rk   rl   r   r   torchTensorrr   __classcell__r$   r$   rR   r%   r      sZ    



c                
   @   s   e Zd Z			ddeeef dedee dee fddZ	deeef d	e
fd
dZdeeef d	e
fddZd	e
fddZ	ddeeef dee fddZdS )r>   rV   NrW   rX   rY   rZ   c                 C   s2   t |ttfsJ dt| dt||||dS )Nz<Cannot use an ffmpeg subprocess to read from path of type: ''pathrX   rY   rZ   )r3   r4   r   r6   read_opus_ffmpegr_   r$   r$   r%   ra      s   z&FfmpegSubprocessOpusBackend.read_audior   c                 C       t |ttfot| dS )N.opusr3   r4   r   lowerendswithre   r$   r$   r%   rf        z0FfmpegSubprocessOpusBackend.handles_special_casec                 C   
   |  |S r\   rf   re   r$   r$   r%   ri        
z)FfmpegSubprocessOpusBackend.is_applicablec                 C   rc   rh   r$   rj   r$   r$   r%   rl     rg   z)FfmpegSubprocessOpusBackend.supports_infoc                 C   s
   t ||S r\   )	opus_inforb   r$   r$   r%   rJ     s   
z FfmpegSubprocessOpusBackend.infors   r\   )rM   ru   rv   r   r   ry   r   r   rz   ra   r}   rf   ri   rl   rJ   r$   r$   r$   r%   r>      s,    


r>   c                   @      e Zd Z			ddeeef dedee dee de	e
jef f
dd	Zdeeef defd
dZdeeef defddZdefddZ	ddeeef dee fddZdS )r?   rV   NrW   rX   rY   rZ   r   c                 C   s0   t |ttfsJ dt| dt|||dS )Nz>Cannot use an sph2pipe subprocess to read from path of type: 'r   )sph_pathrX   rY   )r3   r4   r   r6   read_sphr_   r$   r$   r%   ra   "  s   z$Sph2pipeSubprocessBackend.read_audioc                 C   r   )Nz.sphr   re   r$   r$   r%   rf   2  r   z.Sph2pipeSubprocessBackend.handles_special_casec                 C   r   r\   r   re   r$   r$   r%   ri   7  r   z'Sph2pipeSubprocessBackend.is_applicablec                 C   rc   rh   r$   rj   r$   r$   r%   rl   :  rg   z'Sph2pipeSubprocessBackend.supports_infoc                 C      t |S r\   )sph_inforb   r$   r$   r%   rJ   =     zSph2pipeSubprocessBackend.infors   r\   rM   ru   rv   r   r   ry   r   r   rz   r   r{   r|   ra   r}   rf   ri   rl   rJ   r$   r$   r$   r%   r?   !  s0    


r?   c                   @   r   )FfmpegTorchaudioStreamerBackendrV   NrW   rX   rY   rZ   r   c                 C      t |||dS )N)path_or_fileobjrX   rY   )torchaudio_ffmpeg_loadr_   r$   r$   r%   ra   F  
   z*FfmpegTorchaudioStreamerBackend.read_audioc                 C      t  o
t o
t|tS r\   r   torchaudio_supports_ffmpegr3   r   re   r$   r$   r%   rf   S  s
   z4FfmpegTorchaudioStreamerBackend.handles_special_casec                 C   r   r\   r   re   r$   r$   r%   ri   Z  s
   z-FfmpegTorchaudioStreamerBackend.is_applicablec                 C   rc   rh   r$   rj   r$   r$   r%   rl   c  rg   z-FfmpegTorchaudioStreamerBackend.supports_infoc                 C   r   r\   torchaudio_inforb   r$   r$   r%   rJ   f  r   z$FfmpegTorchaudioStreamerBackend.infors   r\   r   r$   r$   r$   r%   r   E  s0    

	
r   c                   @   s   e Zd Z			ddeeef dedee dee de	e
jef f
dd	Zdeeef defd
dZdefddZ		ddeeeef deeje
jf dedee dee ddfddZdefddZ	ddeeef dee fddZdS )rB   rV   NrW   rX   rY   rZ   r   c                 C   r   NrW   rX   rY   )torchaudio_loadr_   r$   r$   r%   ra   o  r   z#TorchaudioDefaultBackend.read_audioc                 C   s   t  S r\   )r   re   r$   r$   r%   ri   |  s   z&TorchaudioDefaultBackend.is_applicablec                 C   rc   rh   r$   rj   r$   r$   r%   rk     rg   z&TorchaudioDefaultBackend.supports_saverm   rn   ro   rp   rq   c                 C   s   dd l }t sJ dt|}t|trt|}| dkp)t|t o)|	d}|r:|j
jj|||ddd d S |j|||||d d S )Nr   z8We don't support saving audio with torchaudio pre v0.9.0flacz.flac   )sample_raterp   bits_per_sample)r   rp   rq   )
torchaudio$torchaudio_soundfile_supports_formatr~   	as_tensorr3   r   r4   r   r   r   r'   soundfile_backendsave)r`   rm   rn   ro   rp   rq   r   saving_flacr$   r$   r%   rr     s,   


	

z#TorchaudioDefaultBackend.save_audioc                 C   rc   rh   r$   rj   r$   r$   r%   rl     rg   z&TorchaudioDefaultBackend.supports_infoc                 C   r   r\   r   rb   r$   r$   r%   rJ     r   zTorchaudioDefaultBackend.infors   rt   r\   )rM   ru   rv   r   r   ry   r   r   rz   r   r{   r|   ra   r}   ri   rk   r4   r   r   r~   r   rr   rl   rJ   r$   r$   r$   r%   rB   n  sN    


$
rB   c                   @     e Zd ZdZ			ddeeef dedee dee	 de
eje	f f
d	d
Zdeeef defddZdeeef defddZdefddZ		ddeeeef deejejf de	dee dee ddfddZdefddZ	ddeeef dee	 fddZdS )rA   z
    A new FFMPEG backend available in torchaudio 2.0.
    It should be free from many issues of soundfile and sox_io backends.
    rV   NrW   rX   rY   rZ   r   c                 C   s   t ||||dS )N)rW   rX   rY   resample_rate)torchaudio_2_ffmpeg_loadr_   r$   r$   r%   ra     s   z"TorchaudioFFMPEGBackend.read_audioc                    s(   t  t }| ot fddtD S )Nc                 3       | ]
}t  |V  qd S r\   r4   r   .0extrW   r$   r%   	<genexpr>      
z?TorchaudioFFMPEGBackend.handles_special_case.<locals>.<genexpr>)r3   r   anySUPPORTED_VIDEO_EXTENSIONS)r`   rW   
is_fileobjr$   r   r%   rf     s   z,TorchaudioFFMPEGBackend.handles_special_casec                 C   s   t  ot S )z
        FFMPEG backend requires at least Torchaudio 2.0.
        For version == 2.0.x, we also need env var TORCHAUDIO_USE_BACKEND_DISPATCHER=1
        For version >= 2.1.x, this will already be the default.
        )r   #torchaudio_ffmpeg_backend_availablere   r$   r$   r%   ri     s   z%TorchaudioFFMPEGBackend.is_applicablec                 C   rc   rh   r$   rj   r$   r$   r%   rk     rg   z%TorchaudioFFMPEGBackend.supports_saverm   rn   ro   rp   rq   c                 C   sH   dd l }t|st|}t|trt|}|j|||||dd d S )Nr   ffmpeg)r   rp   rq   r'   )r   r~   	is_tensorr   r3   r   r4   r   )r`   rm   rn   ro   rp   rq   r   r$   r$   r%   rr     s   



z"TorchaudioFFMPEGBackend.save_audioc                 C   rc   rh   r$   rj   r$   r$   r%   rl     rg   z%TorchaudioFFMPEGBackend.supports_infoc                 C   r   r\   )torchaudio_ffmpeg_streamer_inforb   r$   r$   r%   rJ     r   zTorchaudioFFMPEGBackend.infors   rt   r\   rM   ru   rv   rw   r   r   ry   r   r   rz   r   r{   r|   ra   r}   rf   ri   rk   r4   r   r   r~   r   rr   rl   rJ   r$   r$   r$   r%   rA     sR    



rA   c                   @   r   )r@   a  
    A backend that uses PySoundFile.

    .. note:: PySoundFile has issues on MacOS because of the way its CFFI bindings are implemented.
        For now, we disable it on this platform.
        See: https://github.com/bastibe/python-soundfile/issues/331
    rV   NrW   rX   rY   rZ   r   c                 C   r   r   )soundfile_loadr_   r$   r$   r%   ra     r   zLibsndfileBackend.read_audioc                 C   s8   t |tr
t s
dS t |ttfrt|drdS dS )NTr   F)r3   r   r   r   r4   r   re   r$   r$   r%   rf     s   z&LibsndfileBackend.handles_special_casec                    s,   t  ttfrt fdddD rdS dS )Nc                 3   r   r\   r   r   r   r$   r%   r     r   z2LibsndfileBackend.is_applicable.<locals>.<genexpr>)r   r   z.m4bFT)r3   r   r4   r   re   r$   r   r%   ri     s
   zLibsndfileBackend.is_applicablec                 C   rc   rh   r$   rj   r$   r$   r%   rk      rg   zLibsndfileBackend.supports_saverm   rn   ro   rp   rq   c                 C   sV   dd l }t|r| }t|trt|}|dkrd}d}|j||j|||d d S )Nr   opusogg)filedata
sampleraterp   subtype)		soundfiler~   r   numpyr3   r   r4   writeT)r`   rm   rn   ro   rp   rq   sfr$   r$   r%   rr   #  s   


zLibsndfileBackend.save_audioc                 C   rc   rh   r$   rj   r$   r$   r%   rl   <  rg   zLibsndfileBackend.supports_infoc                 C   r   r\   )soundfile_inforb   r$   r$   r%   rJ   ?  r   zLibsndfileBackend.infors   rt   r\   r   r$   r$   r$   r%   r@     sR    




r@   c                   @   sv   e Zd Z			ddeeef dedee dee de	e
jef f
dd	Zd
d Z	ddeeef dee fddZdS )rC   rV   NrW   rX   rY   rZ   r   c                 C   r   )N)path_or_filerX   rY   )audioread_loadr_   r$   r$   r%   ra   H  r   zAudioreadBackend.read_audioc                 C   rc   rh   r$   rj   r$   r$   r%   rl   U  rg   zAudioreadBackend.supports_infoc                 C   r   r\   )audioread_inforb   r$   r$   r%   rJ   X  r   zAudioreadBackend.infors   r\   )rM   ru   rv   r   r   ry   r   r   rz   r   r{   r|   ra   rl   rJ   r$   r$   r$   r%   rC   G  s,    


rC   c                   @   s   e Zd ZdZdee fddZ			ddeee	f de
d	ee
 d
ee deejef f
ddZdefddZ		ddeeeef deejejf dedee dee ddfddZdefddZ	ddeee	f d
ee fddZdS )rD   a  
    Combines multiple audio backends.
    It will try each out sequentially, and back off to the next one in the list if the current one fails.
    It uses the special filter methods to prioritize special case backends,
    and skip backends that are not applicable.
    rF   c                 C   s
   || _ d S r\   rE   )r`   rF   r$   r$   r%   __init__h  r   zCompositeAudioBackend.__init__rV   NrW   rX   rY   rZ   r   c                 C   s  g }| j D ]}||r|| qt|dk s J d| dt|dkrPz|d j||||dW S  tyO } ztd| dt| d	t| d }~ww g }| j D ]R}|	|rz|j||||dW   S  ty } z2d
t| dt| d}	t
 r||	 t   n||	 t| d	t|  W Y d }~qUd }~ww qU|std| dd}
t
 rdnd}td| d|
 |
| | )N   aCompositeAudioBackend has more than one sub-backend that handles a given special case for input 'r   r8   r   rW   rX   rY   rZ   zReading audio from '' failed. Details: : Exception # (): (No applicable backend found for input: '
 Y
Set LHOTSE_AUDIO_LOADING_EXCEPTION_VERBOSE=1 environment variable for full stack traces.' failed. Details:)rF   rf   r=   lenra   	Exceptionr   r6   r4   ri   r   	traceback
format_excjoin)r`   rW   rX   rY   rZ   
candidatesr+   e
exceptionsmsgNL
maybe_infor$   r$   r%   ra   k  sl   




 
z CompositeAudioBackend.read_audioc                 C   s   t dd | jD S )Nc                 s   s    | ]}|  V  qd S r\   rk   r   r+   r$   r$   r%   r     s    z6CompositeAudioBackend.supports_save.<locals>.<genexpr>)r   rF   rj   r$   r$   r%   rk     s   z#CompositeAudioBackend.supports_saverm   rn   ro   rp   rq   c                 C   s   dd | j D }g }|D ]S}||sqz|j|||||dW   S  ty_ }	 z2dt| dt| d}
t rE||
 t	   n||
 t|	 dt
|	  W Y d }	~	qd }	~	ww |sftdd	}t rmd
nd}td| || | )Nc                 S      g | ]}|  r|qS r$   r   r   r$   r$   r%   
<listcomp>      z4CompositeAudioBackend.save_audio.<locals>.<listcomp>rm   rn   ro   rp   rq   r   r   r   r   z-No applicable backend found for saving audio.r   r   r   zSaving audio failed. Details:)rF   ri   rr   r   r   r6   r   r=   r   r   r4   r   r   )r`   rm   rn   ro   rp   rq   r   r   r+   r   r   r   r   r$   r$   r%   rr     s<   
 z CompositeAudioBackend.save_audioc                 C   rc   rh   r$   rj   r$   r$   r%   rl     rg   z#CompositeAudioBackend.supports_infoc                 C   s  dd | j D }g }|D ]}||r|| qt|dk s'J d| dt|dkrTz	|d j|dW S  tyS } ztd	| d
t| dt| d }~ww g }|D ]O}|	|rz	|j|dW   S  ty } z2dt| dt| d}t
 r|| t   n|| t| dt|  W Y d }~qXd }~ww qX|std| dd}	t
 rdnd}
td	| d|	 |	| |
 )Nc                 S   r   r$   )rl   r   r$   r$   r%   r     r   z.CompositeAudioBackend.info.<locals>.<listcomp>r   r   r   r8   r   r   z Fetching info about audio from 'r   r   r   r   r   r   r   r   r   r   )rF   rf   r=   r   rJ   r   r   r6   r4   ri   r   r   r   r   )r`   rW   rZ   rF   r   r+   r   r   r   r   r   r$   r$   r%   rJ     sZ   


 

zCompositeAudioBackend.infors   rt   r\   )rM   ru   rv   rw   r   r   r   r   r   ry   r   r   rz   r   r{   r|   ra   r}   rk   r4   r   r   r~   r   rr   rl   rJ   r$   r$   r$   r%   rD   `  sP    

>
*
rD   c                   @   s>   e Zd ZU eed< eed< eed< eed< dZee ed< dS )LibsndfileCompatibleAudioInfochannelsframesr   rY   Nvideo)	rM   ru   rv   rz   __annotations__floatr   r   r   r$   r$   r$   r%   r     s   
 r   c                   C   s   t sdS tdS )zu
    Returns ``True`` when torchaudio version is at least 0.12.0, which
    has support for FFMPEG streamer API.
    Fz0.12.0)r   check_torchaudio_version_gtr$   r$   r$   r%   r     s   r   c                   C      t  otdS )zr
    Returns ``True`` when torchaudio.load supports "ffmpeg" backend.
    This requires either version 2.1.x+
    z2.1.0r   r   r$   r$   r$   r%   r   !     r   c                   C   r   )z
    Returns ``True`` when torchaudio version is at least 0.9.0, which
    has support for ``format`` keyword arg in ``torchaudio.save()``.
    z0.9.0r   r$   r$   r$   r%   r   *  r   r   versionc                 C   s4   t  sdS dd l}ddlm} ||j|| kS )NFr   )r   )r   r   	packagingr   parse__version__)r   r   _versionr$   r$   r%   r   3  s
   r   r   c           
         sL  ddl }t r)d| v rdnd}|j |d}t|j|jt|j|j|j dS t	 t
tfo:t fdddD }t	 t}|sD|rt rdd	lm} ||rTt
 n d
}|jdksaJ d||j}|jt|jd d}| D ]
\}	||	jd 7 }qvt|j|t|j||j dS | }t|j|jt|j|j|j dS )
    Return an audio info data structure that's a compatible subset of ``pysoundfile.info()``
    that we need to create a ``Recording`` manifest.
    r   Nr   r7   r   r   r   rY   c                 3   r   r\   r   r   r   r$   r%   r   Q  r   z"torchaudio_info.<locals>.<genexpr>).mp3r   StreamReaderrn   r8   jLhotse doesn't support files with more than one source stream yet (not to be confused with multi-channel).frames_per_chunk)r   r   list_audio_backendsrJ   r   num_channels
num_framesrz   r   r3   r4   r   r   r   r   torchaudio.ior  num_src_streamsget_src_stream_infodefault_audio_streamadd_basic_audio_streamstreamshape)
r   r   r'   rJ   is_mpegr   r  streamertot_sampleschunkr$   r   r%   r   =  sT   



r   c                    s  ddl m} tt  otfdddt D s"t|}|j}i }i }t|D ]!}|	|}|j
dkrB|||< q1|j
dkrL|||< q1td| t|d	k scJ d
t| dt|d	k ssJ dt| di |rt| \\}}	|	j}
|
dkr|jt|	j|d | D ]
\}|
|jd 7 }
q|d t|	j|	j|	j|
dd< |rt| \\} |jt j|d  fdd}| }|dkr| D ]
\}||jd 7 }qۈj j|t j| j d njdddd jd tdi S )Nr   r  c                 3   r   r\   r   r   r   r$   r%   r     s
    
z2torchaudio_ffmpeg_streamer_info.<locals>.<genexpr>)r  r   audiozUnexpected media_type: r   zNLhotse currently does not support more than one video stream in a file (found z).z_Lhotse currently does not support files with more than a single FFMPEG audio stream yet (found zP). Note that this is not the same as multi-channel which is generally supported.)stream_index)fpsheightwidthr
  )r  r  c                     sJ   sr" dd } | d ur  j j }t|| j dk r  jS dS  jS )Nr   gMbP?r   )r0   r
  r   absrY   )
video_infoaudio_duration)audio_streamr   r  metar$   r%   _try_read_num_samples  s   z>torchaudio_ffmpeg_streamer_info.<locals>._try_read_num_samplesr   r$   ) r  r  r3   r   r   r   r4   r  ranger  
media_typerU   r   listitemsr
  add_basic_video_streamround
frame_rater  r  remove_streamr   r  r  r  rz   r   updater	  rY   r   )r   r  r  num_streamsaudio_streamsvideo_streams
stream_idxrJ   video_stream_idxvideo_stream
tot_framesr  audio_stream_idxr   r  r$   )r  r   r  r  r   r%   r     s   







r   rW   rX   rY   c           	      C   s   dd l }t| trt| } d}d}|dks|d ur1t| }|dkr't||j}|d ur1t||j}t| tr;| d |j	| ||d\}}|
 t|fS )Nr   )frame_offsetr
  )r   r3   r   r4   r   r   r   r   seekloadr   rz   )	rW   rX   rY   r   r3  r
  
audio_infor  ro   r$   r$   r%   r     s&   



r   r   c           
      C   s   dd l }t| trt| } d}d}|dks|d ur4|j| dd}|dkr*t||j}|d ur4t||j}t| tr>| d |j	| ||dd\}}	|
 t|	fS )Nr   r2  r   r7   )r3  r
  r'   )r   r3   r   r4   rJ   r   r   r   r4  r5  r   rz   )
rW   rX   rY   r   r   r3  r
  r6  r  ro   r$   r$   r%   r   
  s(   



r   c                 C   s   dd l }t stdt| trt| } |jj| d}|jdks$J d|	|j
}t|j}|d urO|jt||d || t| \}|dd}n|j|d || tjdd | D dd	}| |fS )
Nr   zcUsing FFMPEG streamer backend for reading is supported only with PyTorch 1.12+ and torchaudio 0.12+r  r8   r  r  c                 S   s   g | ]	\}| d dqS )r   r8   )	transpose)r   tr$   r$   r%   r   P  s    z*torchaudio_ffmpeg_load.<locals>.<listcomp>)dim)r   r   rU   r3   r   r4   ior  r  r  r  rz   r   r  r   r4  nextr  r7  r~   catr   )r   rX   rY   r   r  rJ   ro   r  r$   r$   r%   r   ,  s0   



r   c                 C   s   dd l }t| ttfrdt| v rddlm} | | } || 1}|j}|dkr2|	t
|| |d ur<t
||}nd}|j|tjddjt|fW  d    S 1 sVw   Y  d S )Nr   .tar/TarAsDirBackendr2  T)r   dtype	always_2d)r   r3   r4   r   lhotse.serializationr?  open	SoundFiler   r4  r   readr{   float32r   rz   )rW   rX   rY   r   r?  sf_descro   frame_durationr$   r$   r%   r   V  s   $r   r   c                 C   s   ddl }|jt| t d-}t|d j}t|dkr!|d }n|d }t|j|t	|j
||j
 dW  d   S 1 s>w   Y  dS )r   r   NrE   r8   r   )	audioread
audio_openr4   _available_audioread_backendsr   r  r   r   r   rz   r   )r   rI  
input_filer  num_samplesr$   r$   r%   r   p  s    

$r   c                  C   s$   ddl } |  }td|  |S )z
    Reduces the overhead of ``audioread.audio_open()`` when called repeatedly
    by caching the results of scanning for FFMPEG etc.
    r   Nz%Using audioread. Available backends: )rI  available_backendsrI   rJ   )rI  rF   r$   r$   r%   rK    s   rK  rV   r   c                    s\  ddl  t fdd}g }| s}|j}|j}tt|| | }	|du r,tj}
n|	tt|| |  }
d}|D ]@}t||d}|}|t	| }||	k rRq=|
|k rX n&|
|k rd|d|
|  }||	  krn|krxn n||	| d }|
| q=W d   n1 sw   Y  |rt|}|dkr|d|fj}ntjd|d}|t|fS )zLoad an audio buffer using audioread.
    This loads one block at a time, and then concatenates the results.

    This function is based on librosa:
    https://github.com/librosa/librosa/blob/main/librosa/core/audio.py#L180
    r   Nc                   3   s0    t ttfr jt dV  d S V  d S )NrE   )r3   r4   r   rJ  rK  r$   rI  r   r$   r%   file_handle  s   
z#audioread_load.<locals>.file_handler@  r8   r2  )rI  r   r   r   rz   r{   r&  inf_buf_to_floatr   r=   concatenatereshaper   empty)r   rX   rY   r@  rP  yrL  	sr_native
n_channelss_starts_endnframen_prevr$   rO  r%   r     sD   &
r   r   c                 C   s8   dt dd| d >  }d|}|t| || S )a{  Convert an integer buffer to floating point values.
    This is primarily useful when loading integer-valued wav data
    into numpy arrays.

    This function is based on librosa:
    https://github.com/librosa/librosa/blob/main/librosa/util/utils.py#L1312

    Parameters
    ----------
    x : np.ndarray [dtype=int]
        The integer-valued data buffer
    n_bytes : int [1, 2, 4]
        The number of bytes per sample in ``x``
    dtype : numeric type
        The target output type (default: 32-bit float)
    Returns
    -------
    x_float : np.ndarray [dtype=float]
        The input data buffer cast to floating point
    g      ?r8      z<i{:d})r   rp   r{   
frombufferastype)xn_bytesr@  scalefmtr$   r$   r%   rS    s   
rS  rZ   c                 C   s:   t | |d\}}t|jd |jd t||jd | dS )N)rZ   r   r8   r   )	read_opusr   r  rz   )r   rZ   samplesro   r$   r$   r%   r     s   
r   c                 C   s   t | |||dS )z
    Reads OPUS files either using torchaudio or ffmpeg.
    Torchaudio is faster, but if unavailable for some reason,
    we fallback to a slower ffmpeg-based implementation.

    :return: a tuple of audio samples and the sampling rate.
    r   )r   r   r$   r$   r%   rf    s   rf  c                 C   sF   t | ||d\}}|du s||kr||fS t||d}||}||fS )z
    Reads OPUS files using torchaudio.
    This is just running ``tochaudio.load()``, but we take care of extra resampling if needed.

    :return: a tuple of audio samples and the sampling rate.
    r   N)source_sampling_ratetarget_sampling_rate)r   r   )r   rX   rY   rZ   r  ro   	resamplerresampled_audior$   r$   r%   read_opus_torchaudio-  s   
rl  c              
   C   sb  d}d}|dkr|d| 7 }|dur|d| 7 }|d|  d7 }|dur(|}|d	| 7 }|d
7 }t |dttd}|j}tj|tjd}zQt|j}	|	dkr|tjd|j	d d ftjd}
|ddd |
dddf< |ddd |
dddf< |
}n|	dkr|
dd}ntd|	 W ||fS W ||fS  ty } zt| d| d|j d}~ww )a9  
    Reads OPUS files using ffmpeg in a shell subprocess.
    Unlike audioread, correctly supports offsets and durations for reading short chunks.
    Optionally, we can force ffmpeg to resample to the true sampling rate (if we know it up-front).

    :return: a tuple of audio samples and the sampling rate.
    zffmpeg -threads 1i  r   z -ss Nz -t z -i 'r   z -ar z -f f32le -threads 1 pipe:1T)shellstdoutstderrrQ  stereor   r8   monor2  z)Unknown channel description from ffmpeg: z6
The ffmpeg command for which the program failed is: 'z', error code: )r	   r   rn  r{   r`  rF   parse_channel_from_ffmpeg_outputro  rV  r  rU  r^   
ValueErrorr   
returncode)r   rX   rY   rZ   cmdro   proc	raw_audior  channel_string	new_audior   r$   r$   r%   r   H  sH   
	r   ffmpeg_stderrc              	   C   sj   t d}|  D ]"}z| }W n	 ty   Y q	w ||}|d ur+|d  S q	tdt|  )Nz8^\s*Stream #0:0.*: Audio: pcm_f32le.+(mono|stereo).+\s*$r8   zCould not determine the number of channels for OPUS file from the following ffmpeg output (shown as bytestring due to avoid possible encoding issues):
)	recompile
splitlinesdecodeUnicodeDecodeErrormatchgrouprs  r4   )rz  patternliner  r$   r$   r%   rr    s    

rr  c                 C   sf   dd l }t| trt| } t| tr"d| v r"ddlm} | | } || }t|j	|j
|j|jdS )Nr   r=  r>  r   )r   r3   r   r4   rB  r?  rC  rJ   r   r   r   r   rY   )r   r   r?  info_r$   r$   r%   r     s   

r   c                 C   s6   t | \}}t|jd |jd t||jd | dS )Nr   r8   r   )r   r   r  rz   )r   rg  ro   r$   r$   r%   r     s   r   r   c           
   
   C   s   t | } d| d}|dur|t|| d 7 }|d|  7 }ztt|ddttdj}W n tyC } z
|jdkr>td	 d}~ww d
dl	}|
|$}|jtjd|j}}	|jdkrd|ddn|j}W d   ||	fS 1 stw   Y  ||	fS )z
    Reads SPH files using sph2pipe in a shell subprocess.
    Unlike audioread, correctly supports offsets and durations for reading short chunks.

    :return: a tuple of audio samples and the sampling rate.
    zsph2pipe -f wav -p -t :N    T)rm  checkrn  ro     zXIt seems that 'sph2pipe' binary is not installed; did you run 'lhotse install-sph2pipe'?r   rQ  r8   r2  )r   r&  r   r	   r   rn  r   rt  rs  r   rD  rE  r{   rF  r   r   rU  r   )
r   rX   rY   ru  rv  r   r   rG  r  ro   r$   r$   r%   r     s2   

	
r   rm   rn   ro   rp   rq   c                 C   s   t  j| ||||dS )Nr   )r(   rr   r   r$   r$   r%   rr     s   rr   c                 C   s   t  j| |||dS )Nr   )r(   ra   r   r$   r$   r%   ra     s   ra   Fforce_read_audioc                 C   s8   |rt | ttfsJ dt j| dS t j| |dS )Nz:force_read_audio=True does not work with file-like objectsr   )rW   rZ   )r3   r4   r   rC   rJ   r(   )r   rZ   r  r$   r$   r%   rJ     s   rJ   )r   r   )r   N)r   NNr\   rs   )rV   Nrt   rd   )_
contextlibrI   r.   r{  sysr   r   	functoolsr   r:  r   r   pathlibr   
subprocessr   r   r	   typingr
   r   r   r   r   r   r   r   r{   r~   lhotse.audio.utilsr   r   r   r   lhotse.augmentationr   lhotse.utilsr   r   r   r   r   r}   r   r   r   r4   r&   r,   r(   r)   r2   rK   rL   ry   r   r>   r?   r   rB   rA   r@   rC   rD   r   r   r   r   r   r   r   r|   rz   r   r   r   r   r   rK  rF  r   rS  r   rf  rl  r   bytesrr  r   r   r   r   rr   ra   rJ   r$   r$   r$   r%   <module>   s  
 $

!@%$)CHN .

G
i
 

$
+



H!

#

7
/



