o
    ॵi                     @   s   d dl mZmZmZ d dlZd dlZ			ddeejejf de	de
de
dee	 d	eeejejf e	f fd
dZ	ddejde	d	eej fddZ	ddejd	eej fddZdS )    )OptionalTupleUnionNFwaveformsample_ratenormalize_volumeto_monoto_sample_ratereturnc                 C   s   zddl m} W n ty   tdw g }|r|ddg |dur/||kr/|d| g |r?| jd dkr?|dd	g t|dkrgt| tj}|rRt	
| n| }||||\}	}
|rc|	 }	|	|
fS | |fS )
aj  convert a waveform:
    - to a target sample rate
    - from multi-channel to mono channel
    - volume normalization

    Args:
        waveform (numpy.ndarray or torch.Tensor): 2D original waveform
            (channels x length)
        sample_rate (int): original sample rate
        normalize_volume (bool): perform volume normalization
        to_mono (bool): convert to mono channel if having multiple channels
        to_sample_rate (Optional[int]): target sample rate
    Returns:
        waveform (numpy.ndarray): converted 2D waveform (channels x length)
        sample_rate (float): target sample rate
    r   Nz1Please install torchaudio: pip install torchaudiogainz-nrate   channels1)torchaudio.sox_effectssox_effectsImportErrorappendshapelen
isinstancenpndarraytorch
from_numpyapply_effects_tensornumpy)r   r   r   r   r	   ta_soxeffectsis_np_input	_waveform	convertedconverted_sample_rate r#   c/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/preprocessors/ofa/utils/audio_helper.pyconvert_waveform	   s,   r%   P   c                 C   s   zCddl m}m} ddlm} ddlm} ddlm} | }||_	| }	||	_
| }
||
_|	|
_||
d}|||  d }|W S  tyM   Y dS w )	z)Get mel-filter bank features via PyKaldi.r   )FbankFbankOptions)MelBanksOptions)FrameExtractionOptions)Vector)optsg      ?N)kaldi.feat.fbankr'   r(   kaldi.feat.melr)   kaldi.feat.windowr*   kaldi.matrixr+   num_bins	samp_freqmel_opts
frame_optscomputesqueezer   r   )r   r   n_binsr'   r(   r)   r*   r+   r3   r4   r,   fbankfeaturesr#   r#   r$   _get_kaldi_fbank7   s$   
r:   c                 C   sL   zddl m  m} t| } |j| ||d}| W S  ty%   Y dS w )z,Get mel-filter bank features via TorchAudio.r   N)num_mel_binssample_frequency)torchaudio.compliance.kaldi
compliancekaldir   r   r8   r   r   )r   r   r7   ta_kaldir9   r#   r#   r$   _get_torchaudio_fbankO   s   

rA   )FFN)r&   )typingr   r   r   r   r   r   r   Tensorintboolr%   r:   rA   r#   r#   r#   r$   <module>   s:   
0


