o
    ۷i                  
   @   s   d dl ZddlmZmZ ddlmZ z
d dlZdZdZ	W n e
y6 Z zdZde d	Z	W Y dZ[ndZ[ww d d
lmZ G dd deeZdS )    N   )ConfigMixinregister_to_config)SchedulerMixinT FzCannot import librosa because zB. Make sure to correctly install librosa to be able to install it.)Imagec                   @   s   e Zd ZdZdZe							d'd	ed
edededededefddZd	ed
efddZd(de	de
jfddZdefddZd)dede
jfddZdefd d!Zdedejfd"d#Zd$ejde
jfd%d&ZdS )*Mela+  
    Parameters:
        x_res (`int`):
            x resolution of spectrogram (time).
        y_res (`int`):
            y resolution of spectrogram (frequency bins).
        sample_rate (`int`):
            Sample rate of audio.
        n_fft (`int`):
            Number of Fast Fourier Transforms.
        hop_length (`int`):
            Hop length (a higher number is recommended if `y_res` < 256).
        top_db (`int`):
            Loudest decibel value.
        n_iter (`int`):
            Number of iterations for Griffin-Lim Mel inversion.
    zmel_config.json   "V        P       x_resy_ressample_raten_fft
hop_lengthtop_dbn_iterc                 C   s@   || _ || _|| _|| _|| _| || d | _tstt	d S )N)
r   srr   r   r   set_resolutionaudio_librosa_can_be_imported
ValueError_import_error)selfr   r   r   r   r   r   r    r   h/home/ubuntu/vllm_env/lib/python3.10/site-packages/diffusers/pipelines/deprecated/audio_diffusion/mel.py__init__:   s   zMel.__init__c                 C   s*   || _ || _| j| _| j | j d | _dS )zSet resolution.

        Args:
            x_res (`int`):
                x resolution of spectrogram (time).
            y_res (`int`):
                y resolution of spectrogram (frequency bins).
           N)r   r   n_melsr   
slice_size)r   r   r   r   r   r   r   P   s   	zMel.set_resolutionN
audio_file	raw_audioc                 C   sr   |durt j|d| jd\| _}n|| _t| j| j| j k r7t| jt	| j| j t| j fg| _dS dS )a  Load audio.

        Args:
            audio_file (`str`):
                An audio file that must be on disk due to [Librosa](https://librosa.org/) limitation.
            raw_audio (`np.ndarray`):
                The raw audio file as a NumPy array.
        NT)monor   )
librosaloadr   r   lenr   r   npconcatenatezeros)r   r#   r$   _r   r   r   
load_audio^   s   	0zMel.load_audioreturnc                 C   s   t | j| j S )zGet number of slices in audio.

        Returns:
            `int`:
                Number of spectograms audio can be sliced into.
        )r(   r   r"   r   r   r   r   get_number_of_slicesp   s   zMel.get_number_of_slicesr   slicec                 C   s   | j | j| | j|d   S )zGet slice of audio.

        Args:
            slice (`int`):
                Slice number of audio (out of `get_number_of_slices()`).

        Returns:
            `np.ndarray`:
                The audio slice as a NumPy array.
        r    )r   r"   )r   r1   r   r   r   get_audio_slicey   s   zMel.get_audio_slicec                 C   s   | j S )zdGet sample rate.

        Returns:
            `int`:
                Sample rate of audio.
        )r   r/   r   r   r   get_sample_rate   s   zMel.get_sample_ratec                 C   sn   t jj| || j| j| j| jd}t j|t	j
| jd}|| j d | j ddd t	j}t|}|S )a  Convert slice of audio to spectrogram.

        Args:
            slice (`int`):
                Slice number of audio to convert (out of `get_number_of_slices()`).

        Returns:
            `PIL Image`:
                A grayscale image of `x_res x y_res`.
        )yr   r   r   r!   )refr      r   g      ?)r&   featuremelspectrogramr2   r   r   r   r!   power_to_dbr)   maxr   clipastypeuint8r   	fromarray)r   r1   Slog_Sbytedataimager   r   r   audio_slice_to_image   s   (
zMel.audio_slice_to_imagerB   c                 C   sh   t j| dd|j|jf}|d| j d | j }t	|}tj
jj|| j| j| j| jd}|S )zConverts spectrogram to audio.

        Args:
            image (`PIL Image`):
                An grayscale image of `x_res x y_res`.

        Returns:
            audio (`np.ndarray`):
                The audio as a NumPy array.
        r=   )dtypefloatr6   )r   r   r   r   )r)   
frombuffertobytesreshapeheightwidthr<   r   r&   db_to_powerr7   inversemel_to_audior   r   r   r   )r   rB   rA   r@   r?   r   r   r   r   image_to_audio   s    
zMel.image_to_audio)r	   r	   r
   r   r   r   r   )NN)r   )__name__
__module____qualname____doc__config_namer   intr   r   strr)   ndarrayr-   r0   r2   r3   r   rC   rN   r   r   r   r   r   %   sB    		r   )numpyr)   configuration_utilsr   r   schedulers.scheduling_utilsr   r&   r   r   	ExceptionePILr   r   r   r   r   r   <module>   s   
