o
    -ig                     @   s   d dl Z d dlZd dlmZ d dlmZmZmZ d dlZd dl	m
Z
 d dlmZ d dlmZmZ dZdZd	Zg d
ZG dd de
ZdededefddZdS )    N)Path)OptionalTupleUnion)Dataset)download_url_to_file)_extract_tar_load_waveformz6https://speech.fit.vutbr.cz/files/quesst14Database.tgzi@  @4f869e06bc066bbe9c5dde31dbd3909a0870d70291110ebbb38878dcbc2fc5e4)albanianbasqueczech	nnenglishromanianslovakc                   @   s   e Zd ZdZ		ddeeef dedee dedd	f
d
dZ	de
deee
ef fddZde
deeje
ef fddZde
fddZd	S )QUESST14a  *QUESST14* :cite:`Mir2015QUESST2014EQ` dataset.

    Args:
        root (str or Path): Root directory where the dataset's top level directory is found
        subset (str): Subset of the dataset to use. Options: [``"docs"``, ``"dev"``, ``"eval"``].
        language (str or None, optional): Language to get dataset for.
            Options: [``None``, ``albanian``, ``basque``, ``czech``, ``nnenglish``, ``romanian``, ``slovak``].
            If ``None``, dataset consists of all languages. (default: ``"nnenglish"``)
        download (bool, optional): Whether to download the dataset if it is not found at root path.
            (default: ``False``)
    r   FrootsubsetlanguagedownloadreturnNc                 C   s  |dvrt d|d ur|tvrt dtt t|}tjt}tj||}|	ddd }tj||| _
tj| j
sZtj|sU|sNtdtt|td t|| |d	krht| j
|d
| _d S |dkrvt| j
|d| _d S |dkrt| j
|d| _d S d S )N)docsdevevalz/`subset` must be one of ['docs', 'dev', 'eval']z"`language` must be None or one of .   r   z9Dataset not found. Please use `download=True` to download)hash_prefixr   zlanguage_key_utterances.lstr   zlanguage_key_dev.lstr   zlanguage_key_eval.lst)
ValueError
_LANGUAGESstrosfspathpathbasenameURLjoinrsplit_pathisdirisfileRuntimeErrorr   	_CHECKSUMr   filter_audio_pathsdata)selfr   r   r   r   r#   archive r0   U/home/ubuntu/LTX-2/.venv/lib/python3.10/site-packages/torchaudio/datasets/quesst14.py__init__&   s,   

zQUESST14.__init__nc                 C   s,   | j | }tj|| j}|t|djfS )a  Get metadata for the n-th sample from the dataset. Returns filepath instead of waveform,
        but otherwise returns the same fields as :py:func:`__getitem__`.

        Args:
            n (int): The index of the sample to be loaded

        Returns:
            Tuple of the following items;

            str:
                Path to audio
            int:
                Sample rate
            str:
                File name
         )r-   r    r"   relpathr'   SAMPLE_RATEwith_suffixname)r.   r3   
audio_pathr5   r0   r0   r1   get_metadataJ   s   
zQUESST14.get_metadatac                 C   s2   |  |}t| j|d |d }|f|dd  S )a:  Load the n-th sample from the dataset.

        Args:
            n (int): The index of the sample to be loaded

        Returns:
            Tuple of the following items;

            Tensor:
                Waveform
            int:
                Sample rate
            str:
                File name
        r      N)r:   r	   r'   )r.   r3   metadatawaveformr0   r0   r1   __getitem___   s   
zQUESST14.__getitem__c                 C   s
   t | jS )N)lenr-   )r.   r0   r0   r1   __len__s   s   
zQUESST14.__len__)r   F)__name__
__module____qualname____doc__r   r   r   r   boolr2   intr   r:   torchTensorr>   r@   r0   r0   r0   r1   r      s$    

$r   r"   r   lst_namec                 C   s   g }t | } t| d | -}|D ]!}|  \}}|dur$||kr$qtdd|}|| |  qW d   |S 1 s>w   Y  |S )z+Extract audio paths for the given language.scoringNz^.*?\/r4   )r   openstripsplitresubappend)r"   r   rI   audio_pathsfliner9   langr0   r0   r1   r,   w   s   
r,   )r    rN   pathlibr   typingr   r   r   rG   torch.utils.datar   torchaudio._internalr   torchaudio.datasets.utilsr   r	   r$   r6   r+   r   r   r   r,   r0   r0   r0   r1   <module>   s(    
^