o
    c۷i/                     @   sx  d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlZd dlZd dlmZmZmZmZ erDd dlmZ eeZe r_d dlZe Zedd	d
 eD ZnedddiZe rmd dlZdd ej D Z G dd dZ!de"ej#B de"ej#B fddZ$dej#dej#fddZ%dej#dej#dej#fddZ&ede'de'de"de"de'dej#fd d!Z(dS )"    N)Enum)cache)Path)TYPE_CHECKING)assert_soundfile_installedassert_soxr_installedis_soundfile_installedis_soxr_installed)RawAudioAudioFormatc                 C   s   i | ]}||qS  r   ).0format_namer   r   J/home/ubuntu/vllm_env/lib/python3.10/site-packages/mistral_common/audio.py
<dictcomp>    s    r   nonec                 C   s   g | ]}|j  qS r   )valuelower)r   vr   r   r   
<listcomp>'   s    r   c                   @   s   e Zd ZdejdededdfddZdefdd	Zd'd
dZ	e
defddZed(dededd fddZed(dededd fddZed(dededd fddZed(dededd fddZd)dededefddZed*d"d#Zd$eddfd%d&ZdS )+Audioaudio_arraysampling_rateformatreturnNc                 C   s   || _ || _|| _|   dS )a  Initialize an Audio instance with audio data, sampling rate, and format.

        Args:
            audio_array: The audio data as a numpy array.
            sampling_rate: The sampling rate of the audio in Hz.
            format: The format of the audio file.
        N)r   r   r   _check_valid)selfr   r   r   r   r   r   __init__+   s   zAudio.__init__c                 C   s*   d| j  dt| j| j  dd| jj S )NzAudio - sampling_rate=z Hz, duration=z.2fz	s, shape=)r   lenr   shaper   r   r   r   __repr__8   s   
zAudio.__repr__c                 C   sd   t | jtjsJ ttj| jjdksJ d| jjt  | jtv s0J d| jdtd S )N   zself.audio_array.ndim=zself.format= not in EXPECTED_FORMAT_VALUES=)	
isinstancer   npndarraytypendimr   r   EXPECTED_FORMAT_VALUESr    r   r   r   r   ?   s   $zAudio._check_validc                 C   s   | j jd | j }|S )z{Calculate the duration of the audio in seconds.

        Returns:
           The duration of the audio in seconds.
        r   )r   r   r   )r   durationr   r   r   r*   E   s   zAudio.durationTurlstrictc              
   C   sx   zt | }|  tj|j|dW S  t jy' } ztd|  |d}~w ty; } z	td|  d|d}~ww )zCreate an Audio instance from a URL.

        Args:
            url: The URL of the audio file.
            strict: Whether to strictly enforce mono audio.

        Returns:
            An instance of the Audio class.
        r,   z#Failed to download audio from URL: Nz*Failed to create Audio instance from URL: z .)	requestsgetraise_for_statusr   
from_bytescontentRequestException
ValueError	Exception)r+   r,   responseer   r   r   from_urlP   s   
zAudio.from_urlaudio_base64c              
   C   s`   t   td| r| dd } zt| }W n ty( } ztd|d}~ww tj	||dS )a  Create an Audio instance from a base64 encoded string.

        Args:
            audio_base64: The base64 encoded audio data.
            strict: Whether to strictly enforce mono audio. Defaults to True.

        Returns:
            An instance of the Audio class.
        z^data:audio/\w+;base64,,r"   zHbase64 decoding failed. Please check the input string is a valid base64.Nr-   )
r   rematchsplitbase64	b64decoder5   r4   r   r1   )r9   r,   audio_bytesr7   r   r   r   from_base64d   s   
zAudio.from_base64filec                 C   s   t   t| tr| dr| dd } t|  s!td| dt| d}| }W d   n1 s5w   Y  t	j
||dS )zCreate an Audio instance from an audio file.

        Args:
            file: Path to the audio file.
            strict: Whether to strictly enforce mono audio. Defaults to True.

        Returns:
            An instance of the Audio class.
        zfile://   Nzfile=z does not existrbr-   )r   r$   str
startswithr   existsFileNotFoundErroropenreadr   r1   )rB   r,   fr@   r   r   r   	from_file{   s   
zAudio.from_filer@   c           	   	   C   s   t | )}t|}|jdd}|j}|j}W d   n1 s"w   Y  W d   n1 s1w   Y  t|}|j	 }|j
dkrT|rNtd|j
|jdd}t|||dS )zCreate an Audio instance from bytes.

        Args:
            audio_bytes: The audio data as bytes.
            strict: Whether to strictly enforce mono audio. Defaults to True.

        Returns:
            An instance of the Audio class.
        float32)dtypeNr"   zaudio_array.ndim=axis)r   r   r   )ioBytesIOsf	SoundFilerJ   
samplerater   r   r   r   r(   r4   meanr   )	r@   r,   
audio_filerK   r   r   audio_formatformat_enumr   r   r   r   r1      s   

zAudio.from_bytesFprefixc                 C   s   t   |tv sJ d|dtt $}tj|| j| j| d |	d t
| d}W d   n1 s<w   Y  |rMd|  d| }|S )	a  Convert the audio data to a base64 encoded string.

        Args:
            format: The format to encode the audio in.
            prefix: Whether to add a data prefix to the base64 encoded string.

        Returns:
            The base64 encoded audio data.
        zformat=r#   )r   r   zutf-8Nzdata:audio/z;base64,)r   r)   rQ   rR   rS   writer   r   upperseekr>   	b64encoderJ   decoder   )r   r   rZ   rW   
base64_strr   r   r   	to_base64   s   


zAudio.to_base64audior
   c                 C   sD   t | jtrt| jS t | jtrt| jS tdt| j )zCreate an Audio instance from a RawAudio object.

        Args:
            audio: The RawAudio object containing audio data.

        Returns:
            An instance of the Audio class.
        zUnsupported audio data type: )	r$   databytesr   r1   rE   rA   r4   r'   )rb   r   r   r   from_raw_audio   s
   
zAudio.from_raw_audionew_sampling_ratec                 C   s6   | j |krdS t  tj| j| j |dd| _|| _ dS )zResample audio data to a new sampling rate.

        Args:
            new_sampling_rate: The new sampling rate to resample the audio to.
        NHQ)quality)r   r   soxrresampler   )r   rf   r   r   r   rj      s
   

zAudio.resample)r   N)T)F)rb   r
   r   r   )__name__
__module____qualname__r%   r&   intrE   r   r!   r   propertyfloatr*   staticmethodboolr8   rA   rL   rd   r1   ra   re   rj   r   r   r   r   r   *   s$    

r   freqr   c                 C   s   d}d}dt d }d|  d }t| t jr8t|t js#J t|| |k}|t | | | |  ||< |S | |krG|t | | |  }|S )zConvert frequency from hertz to mels using the "slaney" mel-scale.

    Args:
        freq: The frequency, or multiple frequencies, in hertz (Hz).

    Returns:
        The frequencies on the mel scale.
         @@      .@      ;@皙@      @      i@)r%   logr$   r&   r'   )rs   min_log_hertzmin_log_mellogstepmels
log_regionr   r   r   hertz_to_mel   s   	r   r~   c                 C   sL   d}d}t dd }d|  d }| |k}|t || | |   ||< |S )zConvert frequency from mels to hertz using the "slaney" mel-scale.

    Args:
        mels: The frequency, or multiple frequencies, in mels.

    Returns:
        The frequencies in hertz.
    rt   ru   rw   rv   ry   rx   )r%   rz   exp)r~   r{   r|   r}   rs   r   r   r   r   mel_to_hertz   s   	r   	fft_freqsfilter_freqsc                 C   s   t |}t |dt | d }|ddddf  |dd  }|ddddf |dd  }t t dt ||}|S )a@  Creates a triangular filter bank.

    Adapted from *torchaudio* and *librosa*.

    Args:
        fft_freqs: Discrete frequencies of the FFT bins in Hz.
        filter_freqs: Center frequencies of the triangular filters to create, in Hz.

    Returns:
        array of shape `(num_frequency_bins, num_mel_filters)`
    r   r"   N   )r%   diffexpand_dimsmaximumzerosminimum)r   r   filter_diffslopesdown_slopes	up_slopesfilter_bankr   r   r   _create_triangular_filter_bank  s   
" r   num_frequency_binsnum_mel_binsmin_frequencymax_frequencyr   c                 C   s   | dk rt d|  d||krt d| d| t|}t|}t|||d }t|}td|d | }	t|	|}
d|d|d  |d|   }|
t|d9 }
|
jdd	d
k rjt d| d|  d|
S )aV  Create a Mel filter bank matrix for converting frequency bins to the Mel scale.

    This function generates a filter bank matrix that can be used to transform a
    spectrum represented in frequency bins to the Mel scale. The Mel scale is a
    perceptual scale of pitches judged by listeners to be equal in distance from one another.

    Args:
        num_frequency_bins: The number of frequency bins in the input spectrum.
        num_mel_bins: The number of desired Mel bins in the output.
        min_frequency: The minimum frequency (in Hz) to consider.
        max_frequency: The maximum frequency (in Hz) to consider.
        sampling_rate: The sampling rate of the audio signal.

    Returns:
        A filter bank matrix of shape (num_mel_bins, num_frequency_bins)
        that can be used to project frequency bin energies onto Mel bins.
    r   zRequire num_frequency_bins: z >= 2zRequire min_frequency: z <= max_frequency: r   g       @NrO   g        zNAt least one mel filter has all zero values. The value for `num_mel_filters` (z?) may be set too high. Or, the value for `num_frequency_bins` (z) may be set too low.)	r4   r   r%   linspacer   r   r   maxany)r   r   r   r   r   mel_minmel_max	mel_freqsr   r   mel_filtersenormr   r   r   mel_filter_bank&  s*   
 r   ))r>   rQ   loggingr;   enumr   	functoolsr   pathlibr   typingr   numpyr%   r.   mistral_common.importsr   r   r   r	   &mistral_common.protocol.instruct.chunkr
   	getLoggerrk   logger	soundfilerS   available_formatsr   ri   __members__valuesr)   r   rp   r&   r   r   r   rn   r   r   r   r   r   <module>   sR    
 >