o
    %ݫi                     @   s   d Z ddlZddlZddlmZmZ ddlmZ ddlm	Z	 e	e
ZedddZeddd	Zedd
dZdd Zdd Zdd Zdd ZdS )zLibrary for Downloading and Preparing Datasets for Data Augmentation,
This library provides functions for downloading datasets from the web and
preparing the necessary CSV data manifest files for use by data augmenters.

Authors:
* Mirco Ravanelli 2023

    N)download_fileget_all_files)main_process_only)
get_loggerc                 C   sf   t j|d}t j|st| |dd nt| | t j|s1t|d| gd}t||| dS dS )a}  Downloads a dataset containing recordings (e.g., noise sequences)
    from the provided URL and prepares the necessary CSV files for use by the noise augmenter.

    Arguments
    ---------
    URL : str
        The URL of the dataset to download.
    dest_folder : str
        The local folder where the noisy dataset will be downloaded.
    ext : str
        File extensions to search for within the downloaded dataset.
    csv_file : str
        The path to store the prepared noise CSV file.
    max_length : float
        The maximum length in seconds.
        Recordings longer than this will be automatically cut into pieces.
    zdata.zipT)unpack.)	match_andN)ospathjoinisdirr   isfiler   prepare_csv)URLdest_folderextcsv_file
max_length	data_filefilelist r   S/home/ubuntu/.local/lib/python3.10/site-packages/speechbrain/augment/preparation.pyprepare_dataset_from_URL   s   
r   c              
   C   sl   z	t | || W dS  ty5 } z tjd|d tj|r*t| W Y d}~dS W Y d}~dS d}~ww )a}  Iterate a set of wavs and write the corresponding csv file.

    Arguments
    ---------
    filelist : str
        A list containing the paths of files of interest.
    csv_file : str
        The path to store the prepared noise CSV file.
    max_length : float
        The maximum length in seconds.
        Recordings longer than this will be automatically cut into pieces.
    z
Exception:)exc_infoN)	write_csv	Exceptionloggererrorr	   r
   existsremove)r   r   r   er   r   r   r   8   s   r   c                 C   s^   t |ddd}|d t| D ]\}}t|||| qW d   dS 1 s(w   Y  dS )a  
    Iterate through a list of audio files and write the corresponding CSV file.

    Arguments
    ---------
    filelist : list of str
        A list containing the paths of audio files of interest.
    csv_file : str
        The path where to store the prepared noise CSV file.
    max_length : float (optional)
        The maximum recording length in seconds.
        Recordings longer than this will be automatically cut into pieces.
    wzutf-8)encodingz$ID,duration,wav,wav_format,wav_opts
N)openwrite	enumerate_write_csv_row)r   r   r   r!   ifilenamer   r   r   r   Q   s   
"r   c           	   
   C   s   t |\}}t|||}tj|d\}}|jd | }|dur5||kr5t| ||||||||	 dS t	| ||||| dS )am  
    Write a single row to the CSV file based on the audio file information.

    Arguments
    ---------
    w : file
        The open CSV file for writing.
    filename : str
        The path to the audio file.
    index : int
        The index of the audio file in the list.
    max_length : float (optional)
        The maximum recording length in seconds.
    r      N)

torchaudioload_ensure_single_channelr	   r
   basenamesplitshape_handle_long_waveform_write_short_waveform_csv)	r!   r(   indexr   signalrateIDr   durationr   r   r   r&   f   s   r&   c                 C   s.   | j d dkr| d d} t|| | | S )ad  
    Ensure that the audio signal has only one channel.

    Arguments
    ---------
    signal : torch.Tensor
        The audio signal.
    filename : str
        The path to the audio file.
    rate : int
        The sampling frequency of the signal.

    Returns
    -------
    signal : Torch.Tensor
        The audio signal with a single channel.
    r   r)   )r/   	unsqueezer*   save)r3   r(   r4   r   r   r   r,      s   r,   c	                 C   s   t | tt|| D ]Z}	t||	 | }
tt||	d  || }|dd }|d| dt|	 d | }t	||dd|
|f | | d| d|	 t||
 | ||df}| 
d| qdS )a|  
    Handle long audio waveforms by cutting them into pieces and writing to the CSV.

    Arguments
    ---------
    w : file
        The open CSV file for writing.
    filename : str
        The path to the audio file.
    ID : str
        The unique identifier for the audio.
    ext :  str
        The audio file extension.
    signal : torch.Tensor
        The audio signal.
    rate : int
        The audio sample rate.
    duration :  float
        The duration of the audio in seconds.
    max_length :  float
        The maximum recording length in seconds.
    index : int
        The index of the audio file in the list.
    r)   r   _N
,)r	   r   rangeintminr.   replacestrr*   r8   r$   r   )r!   r(   r5   r   r3   r4   r6   r   r2   jstartstopnew_filenamecsv_rowr   r   r   r0      s   
 r0   c              	   C   s,   |  d| d| t|||df dS )a  
    Write a CSV row for a short audio waveform.

    Arguments
    ---------
    w : file
        The open CSV file for writing.
    ID : str
        The unique identifier for the audio.
    ext : str
        The audio file extension.
    duration : float
        The duration of the audio in seconds.
    filename : str
        The path to the audio file.
    index : int
        The index of the audio file in the list.
    r;   r9   r:   N)r$   r   r@   )r!   r5   r   r6   r(   r2   r   r   r   r1      s   ,r1   )N)__doc__r	   r*   speechbrain.utils.data_utilsr   r   speechbrain.utils.distributedr   speechbrain.utils.loggerr   __name__r   r   r   r   r&   r,   r0   r1   r   r   r   r   <module>   s"    	!-