o
    iC                     @   s~   d dl Z d dlmZ d dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZ dZddiZeeeeeef ZG d	d
 d
eZdS )    N)Tuple)Tensor)Dataset)download_url_to_file)_extract_zipzNhttps://datashare.is.ed.ac.uk/bitstream/handle/10283/3443/VCTK-Corpus-0.92.zip@f96258be9fdc2cbff6559541aae7ea4f59df3fcaf5cf963aae5ca647357e359cc                	   @   s   e Zd ZdZddedfdedededefd	d
ZdefddZde	e
ef fddZdedededefddZdedefddZdefddZdS )VCTK_092a:  *VCTK 0.92* :cite:`yamagishi2019vctk` dataset

    Args:
        root (str): Root directory where the dataset's top level directory is found.
        mic_id (str, optional): Microphone ID. Either ``"mic1"`` or ``"mic2"``. (default: ``"mic2"``)
        download (bool, optional):
            Whether to download the dataset if it is not found at root path. (default: ``False``).
        url (str, optional): The URL to download the dataset from.
            (default: ``"https://datashare.is.ed.ac.uk/bitstream/handle/10283/3443/VCTK-Corpus-0.92.zip"``)
        audio_ext (str, optional): Custom audio extension if dataset is converted to non-default audio format.

    Note:
        * All the speeches from speaker ``p315`` will be skipped due to the lack of the corresponding text files.
        * All the speeches from ``p280`` will be skipped for ``mic_id="mic2"`` due to the lack of the audio files.
        * Some of the speeches from speaker ``p362`` will be skipped due to the lack of  the audio files.
        * See Also: https://datashare.is.ed.ac.uk/handle/10283/3443
    mic2Fz.flacrootmic_iddownloadurlc              
   C   s  |dvrt d| tj|d}tj|d| _tj| jd| _tj| jd| _|| _|| _|rTtj	| jsTtj
|sNt|d }t|||d t|| j tj	| js_t dtt| j| _g | _	 | jD ]O}|d	krz|d
krzqotj| j|}	tdd t|	D D ]/}
tj|
d }tj| j|| d| | j }|dkrtj
|sq| j|d qqod S )N)mic1r	   z3`mic_id` has to be either "mic1" or "mic2". Found: zVCTK-Corpus-0.92.zipzVCTK-Corpus-0.92txtwav48_silence_trimmed)hash_prefixz=Dataset not found. Please use `download=True` to download it.p280r	   c                 s   s    | ]
}| d r|V  qdS ).txtN)endswith).0f r   N/home/ubuntu/vllm_env/lib/python3.10/site-packages/torchaudio/datasets/vctk.py	<genexpr>U   s    z$VCTK_092.__init__.<locals>.<genexpr>r   _p362)RuntimeErrorospathjoin_path_txt_dir
_audio_dir_mic_id
_audio_extisdirisfile
_CHECKSUMSgetr   r   sortedlistdir_speaker_ids_sample_idssplitextappendsplit)selfr
   r   r   r   	audio_extarchivechecksum
speaker_idutterance_dirutterance_fileutterance_idaudio_path_micr   r   r   __init__&   sF   

zVCTK_092.__init__returnc                 C   s8   t |}| d W  d    S 1 sw   Y  d S )Nr   )open	readlinesr0   	file_pathr   r   r   
_load_text`   s   

$zVCTK_092._load_textc                 C   s
   t |S N)
torchaudioloadr=   r   r   r   _load_audiod      
zVCTK_092._load_audior4   r7   c           	   
   C   sl   t j| j|| d| d}t j| j|| d| d| | j }| |}| |\}}|||||fS )Nr   r   )r   r   r   r!   r"   r$   r?   rC   )	r0   r4   r7   r   transcript_path
audio_path
transcriptwaveformsample_rater   r   r   _load_sampleg   s   
zVCTK_092._load_samplenc                 C   s   | j | \}}| ||| jS )a  Load the n-th sample from the dataset.

        Args:
            n (int): The index of the sample to be loaded

        Returns:
            Tuple of the following items;

            Tensor:
                Waveform
            int:
                Sample rate
            str:
                Transcript
            str:
                Speaker ID
            std:
                Utterance ID
        )r,   rJ   r#   )r0   rK   r4   r7   r   r   r   __getitem__w   s   zVCTK_092.__getitem__c                 C   s
   t | jS r@   )lenr,   )r0   r   r   r   __len__   rD   zVCTK_092.__len__N)__name__
__module____qualname____doc__URLstrboolr9   r?   r   r   intrC   
SampleTyperJ   rL   rN   r   r   r   r   r      s(    
:r   )r   typingr   rA   torchr   torch.utils.datar   torchaudio._internalr   torchaudio.datasets.utilsr   rS   r'   rV   rT   rW   r   r   r   r   r   <module>   s    