o
    i%                     @   sF  d dl Z d dlZd dlZd dlmZ d dlmZ d dlZ	d dl
Z
d dlZd dlZd dlZd dlZd dlmZ zd dlmZ W n   ed Y d dlZd dlZd dlmZmZ zd dlmZ W n   Y dd	 Zd
Ze rpdZned 				ddedefddZdd Z	ddefddZd de fddZ!dde defddZ"dS )!    N)BytesIO)pad_sequence)download_from_urlzHurllib is not installed, if you infer from url, please install it first.)CalledProcessErrorrun)AudioSegmentc               	   C   s@   zt jddgt jd} d| dv W S  t jtfy   Y dS w )Nffmpegz-version)stderrzffmpeg versionzutf-8F)
subprocesscheck_outputSTDOUTdecoder   FileNotFoundError)output r   K/home/ubuntu/.local/lib/python3.10/site-packages/funasr/utils/load_utils.pyis_ffmpeg_installed   s   r   FTzNotice: ffmpeg is not installed. torchaudio is used to load audio
If you want to use ffmpeg backend to load audio, please install it by:
	sudo apt install ffmpeg # ubuntu
	# brew install ffmpeg # mac>  soundfsaudio_fsc              	      s  t | ttfr^d urRt ttfrRgt|  }dd D }tt|| D ](\}\}	}
tt|	|
D ]\}\}}t|f ||d}|| | q4q'|S  fdd| D S t | trl| 	drlt
| } t | trwtj| s|t| drd u sdkrt| drt| dr| d	 zt| \}  d
dr| d	} W nC   t| d} t|  } Y n1dkr|d urt| d}||  } W d    n1 sw   Y  n
dkrndkr	 dv rdd d< dd d< nht | trdkr|d ur|| } nRt | tjrt| } nEt | trcdkrct| }t |tr:|\ }n|}|j dksH|j dkrR|!tj"}|d }|j#dkr`|d d d	f }|} n	  krdkrtj$% }|| d d d f d	d d f } | S )Nc                 S   s   g | ]}g qS r   r   ).0dr   r   r   
<listcomp>;   s    z/load_audio_text_image_video.<locals>.<listcomp>)r   r   	data_type	tokenizerc                    s$   g | ]}t |f d qS ))r   r   r   )load_audio_text_image_video)r   audior   r   r   kwargsr   r   r   N   s    )zhttp://zhttps://readr   seekr   reduce_channelsT)srtextrimagevideocacheis_finalFis_streaming_input	kaldi_arkint16int32i      )&
isinstancelisttuplelen	enumeratezipr   appendstr
startswithr   ospathexistshasattrr!   
torchaudioloadgetmean_load_audio_ffmpegtorch
from_numpysqueezeopenencoder    stripnpndarraykaldiioload_matdtypeastypefloat64ndim
transformsResample)data_or_path_or_listr   r   r   r   r   
data_typesdata_or_path_or_list_retidata_type_idata_or_path_or_list_ijdata_type_jdata_or_path_or_list_jfdata_matmat	resamplerr   r   r   r   0   s    

 

 r   c                 C   s   zt | } W n   Y tj| tjd}t|}|jjdvr"tdtd}|jdkr0tdt|j}d|j	d  }|j
| }tj||| | tjd}|S )	N)rK   iuz*'middle_data' must be an array of integersfloat32rZ   z%'dtype' must be a floating point typer.      )validate_frame_raterG   
frombufferr,   asarrayrK   kind	TypeErroriinfobitsminrL   r_   )inputmiddle_datarK   rT   abs_maxoffsetarrayr   r   r   
load_bytes   s    



rn   c                 C   sd   t | }zt|}W n   td|j|kr0||}t  }|j|dd |d | } | S )NzSYou are decoding the pcm data, please install pydub first. via `pip install pydub`.wav)formatr   )	r   r   	from_fileRuntimeError
frame_rateset_frame_rateexportr!   r    )ri   r   	byte_datar   r   r   r   r   ra      s   


ra   r   c                 K   s8  t | tjr't| } t| jdk r| d d d f } |d u r$| jd gn|}nQt | tjrIt| jdk r<| d d d f } |d u rF| jd gn|}n/t | tt	frxg g }}| D ]}t |tjrdt|}|
| |
|jd  qWt|dd} || |fi |\} }t |tt	frt|g}| tj|tjfS )Nr.   r`   r   T)batch_first)r/   rG   rH   rA   rB   r2   shapeTensorr0   r1   r5   r   tensortor_   r-   )datadata_lenr   frontendr   	data_listdata_ir   r   r   extract_fbank   s*   



r   filer#   c              
   C   s   g }|   drdddt|ddg}ddd	d
g|d| dddddddt|d}z
t|dddj}W n tyT } ztd|j  |d}~ww t	
|t	j t	jd S )a?  
    Open an audio file and read as mono waveform, resampling as necessary

    Parameters
    ----------
    file: str
        The audio file to open

    sr: int
        The sample rate to resample the audio if necessary

    Returns
    -------
    A NumPy array containing the audio waveform, in float32 dtype.
    z.pcmz-fs16lez-arz-ac1r   z-nostdinz-threads0z-iz-acodec	pcm_s16le-T)capture_outputcheckzFailed to load audio: Ng      @)lowerendswithr6   r   stdoutr   rr   r	   r   rG   rb   r,   flattenrL   r_   )r   r#   
pcm_paramscmdouter   r   r   r@      sR   		
r@   )r   r   r   N)r   )Nr   N)#r8   rA   jsonior   torch.distributeddistributeddistnumpyrG   rI   librosar<   timeloggingtorch.nn.utils.rnnr   funasr.download.filer   printpdbr
   r   r   pydubr   r   
use_ffmpegintr   rn   ra   r6   r   r@   r   r   r   r   <module>   s\    


c
