o
    Sia3                     @   sz  d Z ddlZddlZddlZddlZddlmZ ddlmZm	Z	m
Z
mZ ddlmZ ddlmZmZmZmZmZ ddlmZmZ ddlmZmZmZ d	Z	
			ddede	eee
e f  de	e de	e def
ddZ	
				d dedede	eee
e f  de	e de	e defddZ				d!dedeee
e f de	e dededeeeeeeef f f fddZ e Z!dS )"a 	  
This recipe supports two corpora: LibriTTS and LibriTTS-R.

---

LibriTTS is a multi-speaker English corpus of approximately 585 hours of read English speech at 24kHz sampling rate, prepared by Heiga Zen with the assistance of Google Speech and Google Brain team members. The LibriTTS corpus is designed for TTS research. It is derived from the original materials (mp3 audio files from LibriVox and text files from Project Gutenberg) of the LibriSpeech corpus. The main differences from the LibriSpeech corpus are listed below:
The audio files are at 24kHz sampling rate.
The speech is split at sentence breaks.
Both original and normalized texts are included.
Contextual information (e.g., neighbouring sentences) can be extracted.
Utterances with significant background noise are excluded.
For more information, refer to the paper "LibriTTS: A Corpus Derived from LibriSpeech for Text-to-Speech", Heiga Zen, Viet Dang, Rob Clark, Yu Zhang, Ron J. Weiss, Ye Jia, Zhifeng Chen, and Yonghui Wu, arXiv, 2019. If you use the LibriTTS corpus in your work, please cite this paper where it was introduced.

---

LibriTTS-R [1] is a sound quality improved version of the LibriTTS corpus (http://www.openslr.org/60/) which is a multi-speaker English corpus of approximately 585 hours of read English speech at 24kHz sampling rate, published in 2019. The constituent samples of LibriTTS-R are identical to those of LibriTTS, with only the sound quality improved. To improve sound quality, a speech restoration model, Miipher proposed by Yuma Koizumi [2], was used.

For more information, refer to the paper [1]. If you use the LibriTTS-R corpus in your work, please cite the dataset paper [1] where it was introduced.

Audio samples of the ground-truth and TTS generated samples are available at the demo page: https://google.github.io/df-conformer/librittsr/

[1] Yuma Koizumi, Heiga Zen, Shigeki Karita, Yifan Ding, Kohei Yatabe, Nobuyuki Morioka, Michiel Bacchiani, Yu Zhang, Wei Han, and Ankur Bapna, "LibriTTS-R: A Restored Multi-Speaker Text-to-Speech Corpus," arXiv, 2023.
[2] Yuma Koizumi, Heiga Zen, Shigeki Karita, Yifan Ding, Kohei Yatabe, Nobuyuki Morioka, Yu Zhang, Wei Han, Ankur Bapna, and Michiel Bacchiani, "Miipher: A Robust Speech Restoration Model Integrating Self-Supervised Speech and Text Representations," arXiv, 2023.

    N)Path)DictOptionalSequenceUnion)tqdm)RecordingSetSupervisionSegmentSupervisionSetfix_manifests$validate_recordings_and_supervisions)manifests_existread_manifests_if_cached)Pathlikeresumable_downloadsafe_extract)z	dev-cleanz	dev-otherz
test-cleanz
test-otherztrain-clean-100ztrain-clean-360ztrain-other-500.allF http://www.openslr.org/resources
target_dirdataset_partsforce_downloadbase_urlreturnc                 C   s   t | |||ddS )a  
    Download and untar the LibriTTS-R dataset.

    :param target_dir: Pathlike, the path of the dir to storage the dataset.
    :param dataset_parts: "all", or a list of splits (e.g. "dev-clean") to download.
    :param force_download: Bool, if True, download the tars no matter if the tars exist.
    :param base_url: str, the url of the OpenSLR resources.
    :return: the path to downloaded and extracted directory with data.
    T)r   r   r   r   use_librittsr)download_libritts)r   r   r   r    r   K/home/ubuntu/.local/lib/python3.10/site-packages/lhotse/recipes/libritts.pydownload_librittsr8   s   r   r   c              	   C   sh  t | } | jddd |dks|d dkrt}nt|tr |g}|r'd}d}nd}d}t|d	| d
dD ]|}|tvrGtd| dt d | d| }| d}	| |	 }
| | d|  }|d }| rut	d| d| d q5| d|	 }|r|
dd}t||
|d tj|dd t|
}t|| d W d   n1 sw   Y  |  q5| S )a	  
    Download and untar the LibriTTS dataset.

    :param target_dir: Pathlike, the path of the dir to storage the dataset.
    :param use_librittsr: Bool, if True, we'll download the LibriTTS-R dataset instead.
    :param dataset_parts: "all", or a list of splits (e.g. "dev-clean") to download.
    :param force_download: Bool, if True, download the tars no matter if the tars exist.
    :param base_url: str, the url of the OpenSLR resources.
    :return: the path to downloaded and extracted directory with data.
    Tparentsexist_okr   r   
LibriTTS_R141LibriTTS60zDownloading z partsdescz$Skipping invalid dataset part name: z (possible choices: )/z.tar.gzz
.completedz	Skipping z	 because z exists.-_)filenamer   )ignore_errors)pathN)r   mkdirLIBRITTS
isinstancestrr   loggingwarningis_fileinforeplacer   shutilrmtreetarfileopenr   touch)r   r   r   r   r   nameopenslr_corpus_idparturltar_nametar_pathpart_dircompleted_detectorfull_urltarr   r   r   r   P   sD   


r      
corpus_dir
output_dirnum_jobslink_previous_uttc                 C   s  t | } |  sJ d|  |dks|d dkrt}nt|tr*|tv s'J |g}i }|durBt |}|jddd t||dd}i }| d	  r\d
d dd | d	  	 D D }t
|ddD ]}t||ddrutd| d qb| | }tj|d|dd}	g }
t
|ddddD ]}td|jdurqdd ttj|j|jdd  	 D }dd |D }t|}|rtt|dg dg| }d}| 	 D ]Y}|d \}}}||	vrtd!| d"| d# q|d$d }|||d%}|r||d}||kr|nd}||d&< |}|
t||d'|	| jd|d(||||d)
 qqt |
}
t!|	|
\}	}
t"|	|
 |durZ|
#|d*| d+  |	#|d,| d+  |	|
d-||< qb|S ).a  
    Returns the manifests which consist of the Recordings and Supervisions.
    When all the manifests are available in the ``output_dir``, it will simply read and return them.

    :param corpus_dir: Pathlike, the path of the data dir.
    :param dataset_parts: string or sequence of strings representing dataset part names, e.g. 'train-clean-100', 'train-clean-5', 'dev-clean'.
        By default we will infer which parts are available in ``corpus_dir``.
    :param output_dir: Pathlike, the path where to write the manifests.
    :param num_jobs: the number of parallel workers parsing the data.
    :param link_previous_utt: If true adds previous utterance id to supervisions.
        Useful for reconstructing chains of utterances as they were read.
        If previous utterance was skipped from LibriTTS datasets previous_utt label is None.
    :return: a Dict whose key is the dataset part, and the value is Dicts with the keys 'audio' and 'supervisions'.
    zNo such directory: r   r   NTr   libritts)r   rI   prefixzSPEAKERS.txtc                 S   s    i | ]^}}}|  |  qS r   )strip).0spk_idgenderr+   r   r   r   
<dictcomp>   s    z$prepare_libritts.<locals>.<dictcomp>c                 s   s$    | ]}| d s|dV  qdS );|N)
startswithsplit)rO   liner   r   r   	<genexpr>   s    
z#prepare_libritts.<locals>.<genexpr>zPreparing LibriTTS partsr&   )r?   rI   rM   zLibriTTS subset: z already prepared - skipping.z*.wavz(^(\._.+|1092_134562_000013_000004\.wav)$)rJ   exclude_patternz*.trans.tsvz/Scanning transcript files (progbar per speaker)F)r'   leavez^\._.+$c                 S   s    g | ]^}}}|t |fqS r   )float)rO   rec_idr+   snrr   r   r   
<listcomp>   s    

z$prepare_libritts.<locals>.<listcomp>z
.trans.tsvz	.book.tsvc                 S   s   g | ]\}}|qS r   r   )rO   rr+   r   r   r   r^      s    	z%No recording exists for utterance id z, skipping (in r(   r+   )	orig_textr]   prev_uttg        English)
idrecording_idstartdurationchanneltextlanguagespeakerrQ   customlibritts_supervisions_z	.jsonl.gzlibritts_recordings_)
recordingssupervisions)$r   is_dirr0   r1   r2   r/   r   r5   	read_text
splitlinesr   r   r3   r6   r   from_dirrglobrematchr=   maprV   parentr7   dictzipr4   getappendr	   rg   r
   from_segmentsr   r   to_file)rH   r   rI   rJ   rK   	manifests
spk2genderr?   	part_pathro   rp   
trans_pathutt2snruttidsutt2prevuttprev_rec_idrW   r\   ra   	norm_textrP   customdrb   r   r   r   prepare_libritts   s   
		


 

r   )r   r   Fr   )r   Fr   Fr   )r   NrG   F)"__doc__r3   rv   r8   r:   pathlibr   typingr   r   r   r   r   lhotser   r	   r
   r   r   lhotse.recipes.utilsr   r   lhotse.utilsr   r   r   r0   r2   boolr   r   intr   prepare_librittsrr   r   r   r   <module>   s    

;
 