o
    Si/                     @   sz  d Z ddlZddlZddlZddlmZmZ ddlmZ ddl	m
Z
mZmZ ddlmZ ddlmZmZmZmZmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZmZmZ ddddddddddd
Z ddddddddddd
Z!			d0dedee" dee# defd d!Z$		"d1d#ed$ee d%e%de
e#e
e#eeef f f fd&d'Z&d(ed#ed)e#d*e#fd+d,Z'd-ede#fd.d/Z(dS )2a8  
Hi-Fi Multi-Speaker English TTS Dataset (Hi-Fi TTS) is a multi-speaker English dataset
for training text-to-speech models.
The dataset is based on public audiobooks from LibriVox and texts from Project Gutenberg.
The Hi-Fi TTS dataset contains about 291.6 hours of speech from 10 speakers
with at least 17 hours per speaker sampled at 44.1 kHz.

For more information and the latest dataset statistics, please refer to the paper:
"Hi-Fi Multi-Speaker English TTS Dataset" Bakhturina, E., Lavrukhin, V., Ginsburg, B.
and Zhang, Y., 2021: arxiv.org/abs/2104.01497.

BibTeX entry for citations:

@article{bakhturina2021hi,
  title={{Hi-Fi Multi-Speaker English TTS Dataset}},
  author={Bakhturina, Evelina and Lavrukhin, Vitaly and Ginsburg, Boris and Zhang, Yang},
  journal={arXiv preprint arXiv:2104.01497},
  year={2021}
}
    N)ProcessPoolExecutoras_completed)Path)DictOptionalUnion)tqdm)	RecordingRecordingSetSupervisionSegmentSupervisionSet$validate_recordings_and_supervisions)fix_manifests)manifests_existread_manifests_if_cached)
load_jsonl)Pathlikeresumable_downloadsafe_extractzCori SamuelzPhil BensonzJohn Van StanzMike Peltonz
Tony OlivazMaria KasperzHelen TaylorSylviambzCeline MajorLikeManyWaters)
92609790176670667180519136116141169712787FM.F http://www.openslr.org/resources
target_dirforce_downloadbase_urlreturnc           	      C   s   t | } | jddd | d}d}| | }| d }|d }| r-td| d |S t| d	| ||d
 tj|dd t	|}t
|| d W d   n1 sVw   Y  |  |S )aZ  
    Download and untar the HiFi TTS dataset.

    :param target_dir: Pathlike, the path of the dir to store the dataset.
    :param force_download: Bool, if True, download the tars no matter if the tars exist.
    :param base_url: str, the url of the OpenSLR resources.
    :return: the path to downloaded and extracted directory with data.
    Tparentsexist_okz/109zhi_fi_tts_v0.tar.gzhi_fi_tts_v0z
.completedz%Skipping HiFiTTS preparation because z exists./)filenamer&   )ignore_errors)pathN)r   mkdiris_filelogginginfor   shutilrmtreetarfileopenr   touch)	r%   r&   r'   urltar_nametar_pathpart_dircompleted_detectortar r@   J/home/ubuntu/.local/lib/python3.10/site-packages/lhotse/recipes/hifitts.pydownload_hifittsE   s*   

rB      
corpus_dir
output_dirnum_jobsc                 C   s~  t | } |  sJ d|  i }t| d}dd |D }|dur5t |}|jddd t||dd	}t||}g }g }|D ]2}	|	jd
\}
}}}t	|	}t
||ddratd| d q@||t|	| |
| || q@ttt||dt|dD ]+\}}| \}}|dur||d| d  ||d| d  ||d||< qW d   |S 1 sw   Y  |S )a  
    Prepare manifests for the HiFiTTS dataset.

    :param corpus_dir: Path or str, the path to the downloaded corpus main directory.
    :param output_dir: Path or str, the path where to write the manifests.
    :param num_jobs: How many concurrent workers to use for preparing each dataset partition.
    :return: a dict with manifests for all the partitions
        (example query: ``manifests['92_clean_train']['recordings']``).
    zNo such directory: z*.jsonc                 S   s   g | ]}t |qS r@   )to_partition_id).0pr@   r@   rA   
<listcomp>~   s    z#prepare_hifitts.<locals>.<listcomp>NTr)   hifitts)dataset_partsrE   prefix_)partrE   rM   zHiFiTTS subset: z already prepared - skipping.zPreparing HiFiTTS parts)desctotalhifitts_supervisions_z	.jsonl.gzhifitts_recordings_)
recordingssupervisions)r   is_dirlistglobr1   r   r   stemsplitrG   r   r3   r4   appendsubmitprepare_single_partitionr   zipr   lenresultto_file)rD   rE   rF   	manifestsjson_manifestsdataset_partitionsexfuturespartition_idsraw_manifest_path
speaker_idrN   clean_or_otherrO   partition_idfuturerT   rU   r@   r@   rA   prepare_hifittsj   sj   
	
**rm   rh   ri   rj   c                 C   s   g }g }t | D ].}t||d  }|| |t|j|jd|jd|d t| t| |d |dd	 qt	
|}t|}t||\}}t|| ||fS )Naudio_filepathr   texttext_normalized)
text_punctrZ   )	idrecording_idstartdurationchannelro   speakergendercustom)r   r	   	from_filer[   r   rr   ru   
ID2SPEAKER	ID2GENDERr
   from_recordingsr   from_segmentsr   r   )rh   rD   ri   rj   rT   rU   meta	recordingr@   r@   rA   r]      s.   



r]   r0   c                 C   s(   | j d\}}}}| d| d| S )NrN   )rY   rZ   )r0   ri   rN   rj   rO   r@   r@   rA   rG      s   rG   )r#   Fr$   )NrC   ))__doc__r3   r5   r7   concurrent.futuresr   r   pathlibr   typingr   r   r   	tqdm.autor   lhotser	   r
   r   r   r   	lhotse.qar   lhotse.recipes.utilsr   r   lhotse.serializationr   lhotse.utilsr   r   r   r{   r|   boolstrrB   intrm   r]   rG   r@   r@   r@   rA   <module>   s    
'
K
 