o
    %ݫi                     @   s  d Z ddlZddlZddlZddlZddlZddlZddlZddlm	Z	 ddl
mZ ddlZddlZddlZddlmZ ddlmZmZ e  eeZi fddZd	d
 Zi fddZ	dI	dJddZdIddZdIddZdd Zdd Zdeee ej!f fddZ"deee ej!f fddZ#deee ej!f fddZ$d d! Z%d"d# Z&G d$d% d%Z'dId&d'Z(dKd(d)Z)dLd*d+Z*d,d- Z+d.d/ Z,d0d1 Z-d2d3 Z.d4d5 Z/d6d7 Z0d8d9 Z1dMd;d<Z2d=d> Z3dMd?d@Z4dNdCdDZ5dNdEdFZ6dGdH Z7dS )Ou   
Data reading and writing.

Authors
 * Mirco Ravanelli 2020
 * Aku Rouhe 2020
 * Ju-Chieh Chou 2020
 * Samuele Cornell 2020
 * Abdel HEBA 2020
 * Gaëlle Laperrière 2021
 * Sahar Ghannay 2021
 * Sylvain de Langen 2022
 * Adel Moumen 2025
    N)BytesIO)Union)
get_logger)check_torchaudio_backendvalidate_backendc                 C   sF   t | ddd}t|}W d   n1 sw   Y  t|| |S )a  Loads JSON and recursively formats string values.

    Arguments
    ---------
    json_path : str
        Path to CSV file.
    replacements : dict
        (Optional dict), e.g., {"data_folder": "/home/speechbrain/data"}.
        This is used to recursively format all string values in the data.

    Returns
    -------
    dict
        JSON data with replacements applied.

    Example
    -------
    >>> json_spec = '''{
    ...   "ex1": {"files": ["{ROOT}/mic1/ex1.wav", "{ROOT}/mic2/ex1.wav"], "id": 1},
    ...   "ex2": {"files": [{"spk1": "{ROOT}/ex2.wav"}, {"spk2": "{ROOT}/ex2.wav"}], "id": 2}
    ... }
    ... '''
    >>> tmpfile = getfixture('tmpdir') / "test.json"
    >>> with open(tmpfile, "w", encoding="utf-8") as fo:
    ...     _ = fo.write(json_spec)
    >>> data = load_data_json(tmpfile, {"ROOT": "/home"})
    >>> data["ex1"]["files"][0]
    '/home/mic1/ex1.wav'
    >>> data["ex2"]["files"][1]["spk2"]
    '/home/ex2.wav'

    rutf-8encodingN)openjsonload_recursive_format)	json_pathreplacementsfout_json r   M/home/ubuntu/.local/lib/python3.10/site-packages/speechbrain/dataio/dataio.pyload_data_json(   s
   !
r   c                 C   s   t | tr*|  D ] \}}t |tst |trt|| q	t |tr)||| |< q	t | trTt| D ]"\}}t |tsAt |trGt|| q3t |trS||| |< q3d S d S N)
isinstancedictitemslistr   str
format_map	enumerate)datar   keyitemir   r   r   r   O   s    



r   c           
         s  t | dddr}i }tj|dd}td}|D ]X}z	|d }|d= W n ty.   tdw ||v r:td	| | D ]!\}}	z| fd
d|	||< W q> ty_   td|	 dw d|v rlt	|d |d< |||< qW d   |S 1 s|w   Y  |S )a  Loads CSV and formats string values.

    Uses the SpeechBrain legacy CSV data format, where the CSV must have an
    'ID' field.
    If there is a field called duration, it is interpreted as a float.
    The rest of the fields are left as they are (legacy _format and _opts fields
    are not used to load the data in any special way).

    Bash-like string replacements with $to_replace are supported.

    Arguments
    ---------
    csv_path : str
        Path to CSV file.
    replacements : dict
        (Optional dict), e.g., {"data_folder": "/home/speechbrain/data"}
        This is used to recursively format all string values in the data.

    Returns
    -------
    dict
        CSV data with replacements applied.

    Example
    -------
    >>> csv_spec = '''ID,duration,wav_path
    ... utt1,1.45,$data_folder/utt1.wav
    ... utt2,2.0,$data_folder/utt2.wav
    ... '''
    >>> tmpfile = getfixture("tmpdir") / "test.csv"
    >>> with open(tmpfile, "w", encoding="utf-8") as fo:
    ...     _ = fo.write(csv_spec)
    >>> data = load_data_csv(tmpfile, {"data_folder": "/home"})
    >>> data["utt1"]["wav_path"]
    '/home/utt1.wav'
     r   newliner
   T)skipinitialspacez
\$([\w.]+)IDzBCSV has to have an 'ID' field, with unique ids for all data pointszDuplicate id: c                    s   t  | d  S )N   r   )matchr   r   r   <lambda>   s    zload_data_csv.<locals>.<lambda>z	The item z/ requires replacements which were not supplied.durationN)
r   csv
DictReaderrecompileKeyError
ValueErrorr   subfloat)
csv_pathr   csvfileresultreadervariable_finderrowdata_idr   valuer   r*   r   load_data_csvd   sB   &




r=   return'torchaudio.backend.common.AudioMetaDatac                 C   st   t | tj| \}}|dkrtj| d|d}ntj| |d}|jdkr8tj| d|d\}}|d|_||_	|S )	a  Retrieves audio metadata from a file path. Behaves identically to
    torchaudio.info, but attempts to fix metadata (such as frame count) that is
    otherwise broken with certain torchaudio version and codec combinations.

    Note that this may cause full file traversal in certain cases!

    Arguments
    ---------
    path : str
        Path to the audio file to examine.
    backend : str, optional
        Audio backend to use for loading the audio file. Must be one of
        'ffmpeg', 'sox', 'soundfile' or None. If None, uses torchaudio's default backend.

    Raises
    ------
    ValueError
        If the `backend` is not one of the allowed values.
        Must be one of [None, 'ffmpeg', 'sox', 'soundfile'].

    Returns
    -------
    torchaudio.backend.common.AudioMetaData
        Same value as returned by `torchaudio.info`, but may eventually have
        `num_frames` corrected if it otherwise would have been `== 0`.

    NOTE
    ----
    Some codecs, such as MP3, require full file traversal for accurate length
    information to be retrieved.
    In these cases, you may as well read the entire audio file to avoid doubling
    the processing time.
    z.mp3mp3)formatbackendrB   r   F)	normalizerB   r'   )
r   ospathsplitext
torchaudioinfo
num_framesr   sizesample_rate)rF   rB   _path_no_extpath_extrI   channels_datarL   r   r   r   read_audio_info   s   $

rP   c           	      C   s
  t | t| dr| d t| tttfr.t| tr$t| } | d tj| |d\}}nL| d }| 	dd}| 	d|}|dk rMt
d| d| d	||k r\t
d
| d| d||krp|| }tj||||d\}}n
tj|||d\}}|dd}|dS )a1  General audio loading, based on a custom notation.

    Expected use case is in conjunction with Datasets
    specified by JSON.

    The parameter may just be a path to a file:
    `read_audio("/path/to/wav1.wav")`

    Alternatively, you can specify more options in a dict, e.g.:
    ```
    # load a file from sample 8000 through 15999
    read_audio({
        "file": "/path/to/wav2.wav",
        "start": 8000,
        "stop": 16000
    })
    ```

    Which codecs are supported depends on your torchaudio backend.
    Refer to `torchaudio.load` documentation for further details.

    Arguments
    ---------
    waveforms_obj : str, dict
        Path to audio or dict with the desired configuration.

        Keys for the dict variant:
        - `"file"` (str): Path to the audio file.
        - `"start"` (int, optional): The first sample to load.
        If unspecified, load from the very first frame.
        - `"stop"` (int, optional): The last sample to load (exclusive).
        If unspecified or equal to start, load from `start` to the end.
        Will not fail if `stop` is past the sample count of the file and will
        return less frames.
    backend : str, optional
        Audio backend to use for loading the audio file. Must be one of
        'ffmpeg', 'sox', 'soundfile' or None. If None, uses torchaudio's default backend.

    Returns
    -------
    torch.Tensor
        1-channel: audio tensor with shape: `(samples, )`.
        >=2-channels: audio tensor with shape: `(samples, channels)`.

    Raises
    ------
    ValueError
        If the `backend` is not one of the allowed values.
        Must be one of [None, 'ffmpeg', 'sox', 'soundfile'].

    Example
    -------
    >>> dummywav = torch.rand(16000)
    >>> import os
    >>> tmpfile = str(getfixture('tmpdir') / "wave.wav")
    >>> write_audio(tmpfile, dummywav, 16000)
    >>> asr_example = { "wav": tmpfile, "spk_id": "foo", "words": "foo bar"}
    >>> loaded = read_audio(asr_example["wav"])
    >>> loaded.allclose(dummywav.squeeze(0),atol=1e-4) # replace with eq with sox_io backend
    True
    seekr   rC   filestartstopz"Invalid sample range (start < 0): z..!z%Invalid sample range (stop < start): z;!
Hint: Omit "stop" if you want to read to the end of file.rJ   frame_offsetrB   )rW   rB   r'   )r   hasattrrQ   r   r   r   bytesrH   r   getr2   	transposesqueeze)	waveforms_objrB   audio_rF   rS   rT   rJ   fsr   r   r   
read_audio   s:   >





ra   c                 C   s   t | t| dr| d t| tttfr3t| tr$t| } | d tj| |d\}}|	ddS | d }t|t
s?|g}g }| dd}| d|d }|| }|D ]}	tj|	|||d\}}
|| qUt|d}|	ddS )	a  General audio loading, based on a custom notation.

    Expected use case is in conjunction with Datasets
    specified by JSON.

    The custom notation:

    The annotation can be just a path to a file:
    "/path/to/wav1.wav"

    Multiple (possibly multi-channel) files can be specified, as long as they
    have the same length:
    {"files": [
        "/path/to/wav1.wav",
        "/path/to/wav2.wav"
        ]
    }

    Or you can specify a single file more succinctly:
    {"files": "/path/to/wav2.wav"}

    Offset number samples and stop number samples also can be specified to read
    only a segment within the files.
    {"files": [
        "/path/to/wav1.wav",
        "/path/to/wav2.wav"
        ]
    "start": 8000
    "stop": 16000
    }

    Arguments
    ---------
    waveforms_obj : str, dict
        Audio reading annotation, see above for format.
    backend : str, optional
        Audio backend to use for loading the audio file. Must be one of
        'ffmpeg', 'sox', 'soundfile' or None. If None, uses torchaudio's default backend.

    Raises
    ------
    ValueError
        If the `backend` is not one of the allowed values.
        Must be one of [None, 'ffmpeg', 'sox', 'soundfile'].

    Returns
    -------
    torch.Tensor
        Audio tensor with shape: (samples, ).

    Example
    -------
    >>> dummywav = torch.rand(16000, 2)
    >>> import os
    >>> tmpfile = str(getfixture('tmpdir') / "wave.wav")
    >>> write_audio(tmpfile, dummywav, 16000)
    >>> asr_example = { "wav": tmpfile, "spk_id": "foo", "words": "foo bar"}
    >>> loaded = read_audio(asr_example["wav"])
    >>> loaded.allclose(dummywav.squeeze(0),atol=1e-4) # replace with eq with sox_io backend
    True
    rQ   r   rC   r'   filesrS   rT   rV   )r   rX   rQ   r   r   r   rY   rH   r   r[   r   rZ   appendtorchcat)r]   rB   r^   r_   rb   	waveformsrS   rT   rJ   r   r`   outr   r   r   read_audio_multichannele  s.   >





rh   c                 C   sF   t |jdkr|dd}nt |jdkr|d}t| || dS )a  Write audio on disk. It is basically a wrapper to support saving
    audio signals in the speechbrain format (audio, channels).

    Arguments
    ---------
    filepath: path
        Path where to save the audio file.
    audio : torch.Tensor
        Audio file in the expected speechbrain format (signal, channels).
    samplerate: int
        Sample rate (e.g., 16000).


    Example
    -------
    >>> import os
    >>> tmpfile = str(getfixture('tmpdir') / "wave.wav")
    >>> dummywav = torch.rand(16000, 2)
    >>> write_audio(tmpfile, dummywav, 16000)
    >>> loaded = read_audio(tmpfile)
    >>> loaded.allclose(dummywav,atol=1e-4) # replace with eq with sox_io backend
    True
       r   r'   N)lenshaper[   	unsqueezerH   save)filepathr^   
samplerater   r   r   write_audio  s
   
rp   c                 C   s:   t | d}t|}W d   |S 1 sw   Y  |S )zUtility function for loading .pkl pickle files.

    Arguments
    ---------
    pickle_path : str
        Path to pickle file.

    Returns
    -------
    out : object
        Python object loaded from pickle.
    rbN)r   pickler   )pickle_pathr   rg   r   r   r   load_pickle  s   
rt   xc                 C   >   t | tjr
|  S t | tjrt|  S tj| tjdS )z
    Arguments
    ---------
    x : (list, tuple, np.ndarray)
        Input data to be converted to torch float.

    Returns
    -------
    tensor : torch.Tensor
        Data now in torch.tensor float datatype.
    dtype)r   rd   Tensorr4   npndarray
from_numpytensorru   r   r   r   to_floatTensor  
   r   c                 C   rv   )z
    Arguments
    ---------
    x : (list, tuple, np.ndarray)
        Input data to be converted to torch double.

    Returns
    -------
    tensor : torch.Tensor
        Data now in torch.tensor double datatype.
    rw   )r   rd   ry   doublerz   r{   r|   r}   r~   r   r   r   to_doubleTensor  r   r   c                 C   rv   )z
    Arguments
    ---------
    x : (list, tuple, np.ndarray)
        Input data to be converted to torch long.

    Returns
    -------
    tensor : torch.Tensor
        Data now in torch.tensor long datatype.
    rw   )r   rd   ry   longrz   r{   r|   r}   r~   r   r   r   to_longTensor!  r   r   c                    s    fdd| D S )a  Convert a batch of integer IDs to string labels.

    Arguments
    ---------
    batch : list
        List of lists, a batch of sequences.
    ind2lab : dict
        Mapping from integer IDs to labels.

    Returns
    -------
    list
        List of lists, same size as batch, with labels from ind2lab.

    Example
    -------
    >>> ind2lab = {1: "h", 2: "e", 3: "l", 4: "o"}
    >>> out = convert_index_to_lab([[4,1], [1,2,3,3,4]], ind2lab)
    >>> for seq in out:
    ...     print("".join(seq))
    oh
    hello
    c                    s   g | ]} fd d|D qS )c                    s   g | ]} t | qS r   )int).0indexind2labr   r   
<listcomp>M  s    z3convert_index_to_lab.<locals>.<listcomp>.<listcomp>r   )r   seqr   r   r   r   M  s    z(convert_index_to_lab.<locals>.<listcomp>r   )batchr   r   r   r   convert_index_to_lab5  s   r   c                 C   s    | j d }t|| | }|S )a  Converts SpeechBrain style relative length to the absolute duration.

    Operates on batch level.

    Arguments
    ---------
    batch : torch.Tensor
        Sequences to determine the duration for.
    relative_lens : torch.Tensor
        The relative length of each sequence in batch. The longest sequence in
        the batch needs to have relative length 1.0.
    rate : float
        The rate at which sequence elements occur in real-world time. Sample
        rate, if batch is raw wavs (recommended) or 1/frame_shift if batch is
        features. This has to have 1/s as the unit.

    Returns
    -------
    torch.Tensor
        Duration of each sequence in seconds.

    Example
    -------
    >>> batch = torch.ones(2, 16000)
    >>> relative_lens = torch.tensor([3./4., 1.0])
    >>> rate = 16000
    >>> print(relative_time_to_absolute(batch, relative_lens, rate))
    tensor([0.7500, 1.0000])
    r'   )rk   rd   round)r   relative_lensratemax_len	durationsr   r   r   relative_time_to_absoluteP  s   
r   c                   @   s@   e Zd ZdZi fddZdd Zdd Zdd	 Zed
d Z	dS )IterativeCSVWritera%  Write CSV files a line at a time.

    Arguments
    ---------
    outstream : file-object
        A writeable stream
    data_fields : list
        List of the optional keys to write. Each key will be expanded to the
        SpeechBrain format, producing three fields: key, key_format, key_opts.
    defaults : dict
        Mapping from CSV key to corresponding default value.

    Example
    -------
    >>> import io
    >>> f = io.StringIO()
    >>> writer = IterativeCSVWriter(f, ["phn"])
    >>> print(f.getvalue())
    ID,duration,phn,phn_format,phn_opts
    >>> writer.write("UTT1",2.5,"sil hh ee ll ll oo sil","string","")
    >>> print(f.getvalue())
    ID,duration,phn,phn_format,phn_opts
    UTT1,2.5,sil hh ee ll ll oo sil,string,
    >>> writer.write(ID="UTT2",phn="sil ww oo rr ll dd sil",phn_format="string")
    >>> print(f.getvalue())
    ID,duration,phn,phn_format,phn_opts
    UTT1,2.5,sil hh ee ll ll oo sil,string,
    UTT2,,sil ww oo rr ll dd sil,string,
    >>> writer.set_default('phn_format', 'string')
    >>> writer.write_batch(ID=["UTT3","UTT4"],phn=["ff oo oo", "bb aa rr"])
    >>> print(f.getvalue())
    ID,duration,phn,phn_format,phn_opts
    UTT1,2.5,sil hh ee ll ll oo sil,string,
    UTT2,,sil ww oo rr ll dd sil,string,
    UTT3,,ff oo oo,string,
    UTT4,,bb aa rr,string,
    c                 C   s8   || _ ddg| | | _|| _| j d| j d S )Nr&   r,   ,)
_outstream_expand_data_fieldsfieldsdefaultswritejoin)self	outstreamdata_fieldsr   r   r   r   __init__  s   zIterativeCSVWriter.__init__c                 C   s&   || j vrt| d|| j|< dS )zSets a default value for the given CSV field.

        Arguments
        ---------
        field : str
            A field in the CSV.
        value : str
            The default value.
        z is not a field in this CSV!N)r   r2   r   )r   fieldr<   r   r   r   set_default  s   

zIterativeCSVWriter.set_defaultc                    s   |r|rt d|rt|t| jkrt ddd |D }|r<d|vr(t d| j   |  fdd| jD }| jd | jd	| d
S )a>  Writes one data line into the CSV.

        Arguments
        ---------
        *args : tuple
            Supply every field with a value in positional form OR.
        **kwargs : dict
            Supply certain fields by key. The ID field is mandatory for all
            lines, but others can be left empty.
        ;Use either positional fields or named fields, but not both.Need consistent fieldsc                 S   s   g | ]}t |qS r   r(   )r   argr   r   r   r     s    z,IterativeCSVWriter.write.<locals>.<listcomp>r&   I'll need to see some IDc                    s   g | ]
}t  |d qS )r"   )r   rZ   )r   r   	full_valsr   r   r     s    
r   N)	r2   rj   r   r   copyupdater   r   r   )r   argskwargsto_writer   r   r   r     s    

zIterativeCSVWriter.writec                 O   s   |r|rt d|r#t|t| jkrt dt| D ]}| j|  q|rId|vr-t d| }t|  D ]}tt||}| jdi | q7dS dS )a  Writes a batch of lines into the CSV.

        Here each argument should be a list with the same length.

        Arguments
        ---------
        *args : tuple
            Supply every field with a value in positional form OR.
        **kwargs : dict
            Supply certain fields by key. The ID field is mandatory for all
            lines, but others can be left empty.
        r   r   r&   r   Nr   )r2   rj   r   zipr   keysvaluesr   )r   r   r   arg_rowr   	value_row	kwarg_rowr   r   r   write_batch  s$   zIterativeCSVWriter.write_batchc                 C   s8   g }| D ]}| | | |d  | |d  q|S )N_format_opts)rc   )r   expanded
data_fieldr   r   r   r     s   
z&IterativeCSVWriter._expand_data_fieldsN)
__name__
__module____qualname____doc__r   r   r   r   staticmethodr   r   r   r   r   r   s  s    &r   c                 C   s   ~t jt j|dd t|ddd@}t| tjr|  } t| t	j
r(|  } t| tr8| D ]}t||d q/t| trKt| |d W d   dS W d   dS 1 sVw   Y  dS )a  Write data in text format.

    Arguments
    ---------
    data : str, list, torch.Tensor, numpy.ndarray
        The data to write in the text file.
    filename : str
        Path to file where to write the data.
    sampling_rate : None
        Not used, just here for interface compatibility.

    Example
    -------
    >>> tmpdir = getfixture('tmpdir')
    >>> signal=torch.tensor([1,2,3,4])
    >>> write_txt_file(signal, tmpdir / 'example.txt')
    T)exist_okwr   r	   )rR   N)rE   makedirsrF   dirnamer   r   rd   ry   tolistrz   r{   r   printr   )r   filenamesampling_ratefoutliner   r   r   write_txt_file  s   

"r   c                 C   s^   t | tjr
|  } t | tjr|  } t | tr"| D ]}t| qt | tr-t|  dS dS )a  Write data to standard output.

    Arguments
    ---------
    data : str, list, torch.Tensor, numpy.ndarray
        The data to write in the text file.
    filename : None
        Not used, just here for compatibility.
    sampling_rate : None
        Not used, just here for compatibility.

    Example
    -------
    >>> tmpdir = getfixture('tmpdir')
    >>> signal = torch.tensor([[1,2,3,4]])
    >>> write_stdout(signal, tmpdir / 'example.txt')
    [1, 2, 3, 4]
    N)	r   rd   ry   r   rz   r{   r   r   r   )r   r   r   r   r   r   r   write_stdout  s   


r   c                 C   s   t | jdks	J |du r|    }tj|| j| jd	t | || 
dk }|du r1| j}|du r8| j}tj|||d}|S )a  Creates a binary mask for each sequence.

    Reference: https://discuss.pytorch.org/t/how-to-generate-variable-length-mask/23397/3

    Arguments
    ---------
    length : torch.LongTensor
        Containing the length of each sequence in the batch. Must be 1D.
    max_len : int
        Max length for the mask, also the size of the second dimension.
    dtype : torch.dtype, default: None
        The dtype of the generated mask.
    device: torch.device, default: None
        The device to put the mask variable.

    Returns
    -------
    mask : tensor
        The binary mask.

    Example
    -------
    >>> length=torch.Tensor([1,2,3])
    >>> mask=length_to_mask(length)
    >>> mask
    tensor([[1., 0., 0.],
            [1., 1., 0.],
            [1., 1., 1.]])
    r'   N)devicerx   )rx   r   )rj   rk   maxr   r    rd   aranger   rx   expandrl   	as_tensor)lengthr   rx   r   maskr   r   r   length_to_mask3  s   
r   c                 C   sT   zddl }W n ty   tdw dd |d|  d | d |  d	 D }|S )
a  Read labels in kaldi format.

    Uses kaldi IO.

    Arguments
    ---------
    kaldi_ali : str
        Path to directory where kaldi alignments are stored.
    kaldi_lab_opts : str
        A string that contains the options for reading the kaldi alignments.

    Returns
    -------
    lab : dict
        A dictionary containing the labels.

    Note
    ----
    This depends on kaldi-io-for-python. Install it separately.
    See: https://github.com/vesis84/kaldi-io-for-python

    Example
    -------
    This example requires kaldi files.
    ```
    lab_folder = '/home/kaldi/egs/TIMIT/s5/exp/dnn4_pretrain-dbn_dnn_ali'
    read_kaldi_lab(lab_folder, 'ali-to-pdf')
    ```
    r   Nz2Could not import kaldi_io. Install it to use this.c                 S   s   i | ]\}}||qS r   r   )r   kvr   r   r   
<dictcomp>  s    z"read_kaldi_lab.<locals>.<dictcomp>z
gunzip -c z/ali*.gz |  z/final.mdl ark:- ark:-|)kaldi_ioImportErrorread_vec_int_ark)	kaldi_alikaldi_lab_optsr   labr   r   r   read_kaldi_labc  s.   r   c                 C   sb   d}t  }t| d}	 ||}|sn|| qW d   | S 1 s(w   Y  | S )aO  Get the md5 checksum of an input file.

    Arguments
    ---------
    file : str
        Path to file for which compute the checksum.

    Returns
    -------
    md5
        Checksum for the given filepath.

    Example
    -------
    >>> get_md5('tests/samples/single-mic/example1.wav')
    'c482d0081ca35302d30d12f1136c34e5'
    i   rq   TN)hashlibmd5r   readr   	hexdigest)rR   BUF_SIZEr   r   r   r   r   r   get_md5  s   


r   c                 C   s(   i }| D ]}t |||< qt|| dS )a  Saves the md5 of a list of input files as a pickled dict into a file.

    Arguments
    ---------
    files : list
        List of input files from which we will compute the md5.
    out_file : str
        The path where to store the output pkl file.

    Example
    -------
    >>> files = ['tests/samples/single-mic/example1.wav']
    >>> tmpdir = getfixture('tmpdir')
    >>> save_md5(files, tmpdir / "md5.pkl")
    N)r   save_pkl)rb   out_filemd5_dictrR   r   r   r   save_md5  s   r   c                 C   s<   t |d}t| | W d   dS 1 sw   Y  dS )aM  Save an object in pkl format.

    Arguments
    ---------
    obj : object
        Object to save in pkl format
    file : str
        Path to the output file

    Example
    -------
    >>> tmpfile = getfixture('tmpdir') / "example.pkl"
    >>> save_pkl([1, 2, 3, 4, 5], tmpfile)
    >>> load_pkl(tmpfile)
    [1, 2, 3, 4, 5]
    wbN)r   rr   dump)objrR   r   r   r   r   r     s   "r   c              
   C   s   d}|dkrt j| d rtd |d8 }nn|dkszJt| d ddd  t| d}t|W  d	   W t j| d rKt 	| d  S S 1 sOw   Y  W t j| d rft 	| d  d	S d	S t j| d rxt 	| d  w w )
zLoads a pkl file.

    For an example, see `save_pkl`.

    Arguments
    ---------
    file : str
        Path to the input pkl file.

    Returns
    -------
    The loaded object.
    d   r   z.lockr'   r   r   r	   rq   N)
rE   rF   isfiletimesleepr   closerr   r   remove)rR   countr   r   r   r   load_pkl  s*   

r   c                 C   s>   |    }| jd }||d|}tj||gdd}|S )aI  Create labels with <bos> token at the beginning.

    Arguments
    ---------
    label : torch.IntTensor
        Containing the original labels. Must be of size: [batch_size, max_length].
    bos_index : int
        The index for <bos> token.

    Returns
    -------
    new_label : tensor
        The new label with <bos> at the beginning.

    Example
    -------
    >>> label=torch.LongTensor([[1,0,0], [2,3,0], [4,5,6]])
    >>> new_label=prepend_bos_token(label, bos_index=7)
    >>> new_label
    tensor([[7, 1, 0, 0],
            [7, 2, 3, 0],
            [7, 4, 5, 6]])
    r   r'   dim)r   clonerk   	new_zerosfill_rd   re   )label	bos_index	new_label
batch_sizebosr   r   r   prepend_bos_token  s
   
r   c                 C   sN   |    }| jd }||d}tj||gdd}||t|| f< |S )a  Create labels with <eos> token appended.

    Arguments
    ---------
    label : torch.IntTensor
        Containing the original labels. Must be of size: [batch_size, max_length]
    length : torch.LongTensor
        Containing the original length of each label sequences. Must be 1D.
    eos_index : int
        The index for <eos> token.

    Returns
    -------
    new_label : tensor
        The new label with <eos> appended.

    Example
    -------
    >>> label=torch.IntTensor([[1,0,0], [2,3,0], [4,5,6]])
    >>> length=torch.LongTensor([1,2,3])
    >>> new_label=append_eos_token(label, length, eos_index=7)
    >>> new_label
    tensor([[1, 7, 0, 0],
            [2, 3, 7, 0],
            [4, 5, 6, 7]], dtype=torch.int32)
    r   r'   r   )r   r   rk   r   rd   re   r   r   )r   r   	eos_indexr   r   padr   r   r   append_eos_token%  s   
r   r_   c                 C   s,   g }| D ]}d ||}|| q|S )a  Merge characters sequences into word sequences.

    Arguments
    ---------
    sequences : list
        Each item contains a list, and this list contains a character sequence.
    space : string
        The token represents space. Default: _

    Returns
    -------
    The list contains word sequences for each sentence.

    Example
    -------
    >>> sequences = [["a", "b", "_", "c", "_", "d", "e"], ["e", "f", "g", "_", "h", "i"]]
    >>> results = merge_char(sequences)
    >>> results
    [['ab', 'c', 'de'], ['efg', 'hi']]
    r"   )r   splitrc   )	sequencesspaceresultsr   wordsr   r   r   
merge_charI  s
   r  c           
   	   C   sV  t j| |}t j|rtd tt j| |d ddd}| }W d   n1 s/w   Y  g }|D ]B}tt j| |ddd,}t|D ]\}}	|dkre|	|krdt	d|d  dt
 d	qK||	 qKW d   n1 suw   Y  q8t|d
dd}|| |D ]}	||	 qW d   n1 sw   Y  t| d dS )a4  Merging several csv files into one file.

    Arguments
    ---------
    data_folder : string
        The folder to store csv files to be merged and after merging.
    csv_lst : list
        Filenames of csv file to be merged.
    merged_csv : string
        The filename to write the merged csv file.

    Example
    -------
    >>> tmpdir = getfixture('tmpdir')
    >>> os.symlink(os.path.realpath("tests/samples/annotation/speech.csv"), tmpdir / "speech.csv")
    >>> merge_csvs(tmpdir,
    ... ["speech.csv", "speech.csv"],
    ... "test_csv_merge.csv")
    z,Skipping merging. Completed in previous run.r   r"   r   r#   NzDifferent header for z and .r   r	   z is created.)rE   rF   r   r   loggerrI   r   readliner   r2   r-   rc   r   )
data_foldercsv_lst
merged_csv
write_pathr   headerlinescsv_filer!   r   r   r   r   
merge_csvse  sB   


r  c                 C   s*   g }| D ]}t ||}|| q|S )a  Split word sequences into character sequences.

    Arguments
    ---------
    sequences: list
        Each item contains a list, and this list contains a words sequence.
    space: string
        The token represents space. Default: _

    Returns
    -------
    The list contains word sequences for each sentence.

    Example
    -------
    >>> sequences = [['ab', 'c', 'de'], ['efg', 'hi']]
    >>> results = split_word(sequences)
    >>> results
    [['a', 'b', '_', 'c', '_', 'd', 'e'], ['e', 'f', 'g', '_', 'h', 'i']]
    )r   r   rc   )r  r  r  r   charsr   r   r   
split_word  s
   r  r'           c                 C   sT   |  |}t|| | }|dd|  d    }|d|| }|| | < dS )a  Sets the value of any padding on the specified tensor to mask_value.

    For instance, this can be used to zero out the outputs of an autoencoder
    during training past the specified length.

    This is an in-place operation

    Arguments
    ---------
    tensor: torch.Tensor
        a tensor of arbitrary dimension
    length: torch.Tensor
        a 1-D tensor of lengths
    len_dim: int
        the dimension representing the length
    mask_value: mixed
        the value to be assigned to padding positions

    Example
    -------
    >>> import torch
    >>> x = torch.arange(5).unsqueeze(0).repeat(3, 1)
    >>> x = x + torch.arange(3).unsqueeze(-1)
    >>> x
    tensor([[0, 1, 2, 3, 4],
            [1, 2, 3, 4, 5],
            [2, 3, 4, 5, 6]])
    >>> length = torch.tensor([0.4, 1.0, 0.6])
    >>> clean_padding_(x, length=length, mask_value=10.)
    >>> x
    tensor([[ 0,  1, 10, 10, 10],
            [ 1,  2,  3,  4,  5],
            [ 2,  3,  4, 10, 10]])
    >>> x = torch.arange(5)[None, :, None].repeat(3, 1, 2)
    >>> x = x + torch.arange(3)[:, None, None]
    >>> x = x * torch.arange(1, 3)[None, None, :]
    >>> x = x.transpose(1, 2)
    >>> x
    tensor([[[ 0,  1,  2,  3,  4],
             [ 0,  2,  4,  6,  8]],
    <BLANKLINE>
            [[ 1,  2,  3,  4,  5],
             [ 2,  4,  6,  8, 10]],
    <BLANKLINE>
            [[ 2,  3,  4,  5,  6],
             [ 4,  6,  8, 10, 12]]])
    >>> clean_padding_(x, length=length, mask_value=10., len_dim=2)
    >>> x
    tensor([[[ 0,  1, 10, 10, 10],
             [ 0,  2, 10, 10, 10]],
    <BLANKLINE>
            [[ 1,  2,  3,  4,  5],
             [ 2,  4,  6,  8, 10]],
    <BLANKLINE>
            [[ 2,  3,  4, 10, 10],
             [ 4,  6,  8, 10, 10]]])
    ).r   ri   r'   N)rK   r   boolr   r[   	expand_as)r}   r   len_dim
mask_valuer   r   	mask_unsqmask_tr   r   r   clean_padding_  s
   
:r  c                 C   s   |   }t|||| |S )a_  Sets the value of any padding on the specified tensor to mask_value.

    For instance, this can be used to zero out the outputs of an autoencoder
    during training past the specified length.

    This version of the operation does not modify the original tensor

    Arguments
    ---------
    tensor: torch.Tensor
        a tensor of arbitrary dimension
    length: torch.Tensor
        a 1-D tensor of lengths
    len_dim: int
        the dimension representing the length
    mask_value: mixed
        the value to be assigned to padding positions

    Returns
    -------
    result: torch.Tensor
        Tensor with updated padding.

    Example
    -------
    >>> import torch
    >>> x = torch.arange(5).unsqueeze(0).repeat(3, 1)
    >>> x = x + torch.arange(3).unsqueeze(-1)
    >>> x
    tensor([[0, 1, 2, 3, 4],
            [1, 2, 3, 4, 5],
            [2, 3, 4, 5, 6]])
    >>> length = torch.tensor([0.4, 1.0, 0.6])
    >>> x_p = clean_padding(x, length=length, mask_value=10.)
    >>> x_p
    tensor([[ 0,  1, 10, 10, 10],
            [ 1,  2,  3,  4,  5],
            [ 2,  3,  4, 10, 10]])
    >>> x = torch.arange(5)[None, :, None].repeat(3, 1, 2)
    >>> x = x + torch.arange(3)[:, None, None]
    >>> x = x * torch.arange(1, 3)[None, None, :]
    >>> x = x.transpose(1, 2)
    >>> x
    tensor([[[ 0,  1,  2,  3,  4],
             [ 0,  2,  4,  6,  8]],
    <BLANKLINE>
            [[ 1,  2,  3,  4,  5],
             [ 2,  4,  6,  8, 10]],
    <BLANKLINE>
            [[ 2,  3,  4,  5,  6],
             [ 4,  6,  8, 10, 12]]])
    >>> x_p = clean_padding(x, length=length, mask_value=10., len_dim=2)
    >>> x_p
    tensor([[[ 0,  1, 10, 10, 10],
             [ 0,  2, 10, 10, 10]],
    <BLANKLINE>
            [[ 1,  2,  3,  4,  5],
             [ 2,  4,  6,  8, 10]],
    <BLANKLINE>
            [[ 2,  3,  4, 10, 10],
             [ 4,  6,  8, 10, 10]]])
    )r   r  )r}   r   r  r  r7   r   r   r   clean_padding  s   @r  c                 C   s@  g }| D ]}d |}||}g }g }d}	d}
|D ]X}t||rJ|
r<|r<t|dkr5|	dd | 7 }	d}
||	 |}	g }d}
|sI||	 qt||rk|
rk|rkt|dkrc|	dd | 7 }	d}
||	 q|
rr|| q|
r|rt|dkr|	dd | 7 }	d}
||	 t|dkr|d || q|S )a  keep the semantic concepts and values for evaluation.

    Arguments
    ---------
    sequences: list
        Each item contains a list, and this list contains a character sequence.
    keep_values: bool
        If True, keep the values. If not don't.
    tag_in: char
        Indicates the start of the concept.
    tag_out: char
        Indicates the end of the concept.
    space: string
        The token represents space. Default: _

    Returns
    -------
    The list contains concept and value sequences for each sentence.

    Example
    -------
    >>> sequences = [['<response>','_','n','o','_','>','_','<localisation-ville>','_','L','e','_','M','a','n','s','_','>'], ['<response>','_','s','i','_','>'],['v','a','_','b','e','n','e']]
    >>> results = extract_concepts_values(sequences, True, '<', '>', '_')
    >>> results
    [['<response> no', '<localisation-ville> Le Mans'], ['<response> si'], ['']]
    r"   Fr   r   T)r   r   r/   r)   rj   rc   )r  keep_valuestag_intag_outr  r  sequenceprocessed_sequencer<   keptconcept_openwordr   r   r   extract_concepts_values7  sR   






r$  r   )r>   r?   )NN)NNN)r_   )r'   r  )8r   r-   r   r   rE   rr   r/   r   ior   typingr   numpyrz   rd   rH   speechbrain.utils.loggerr   %speechbrain.utils.torch_audio_backendr   r   r   r  r   r   r=   rP   ra   rh   rp   rt   r   tupler{   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r  r$  r   r   r   r   <module>   sd    'I

H
qb #

!
 03" 
$
0

AE