o
    Si                     @   s  d Z ddlZddlZddlZddlmZ ddlmZmZm	Z	m
Z
 ddlmZ ddlmZmZmZmZ ddlmZmZ ddlmZ dd	lmZmZ dd
lmZmZmZ dZ			ddedee dee defddZ	ddedee deeeee
eef f f fddZ dS )a  
AISHELL-3 is a large-scale and high-fidelity multi-speaker Mandarin speech corpus
published by Beijing Shell Shell Technology Co.,Ltd.
It can be used to train multi-speaker Text-to-Speech (TTS) systems.
The corpus contains roughly 85 hours of emotion-neutral recordings spoken by
218 native Chinese mandarin speakers and total 88035 utterances.
Their auxiliary attributes such as gender, age group and native accents are
explicitly marked and provided in the corpus. Accordingly, transcripts in Chinese
character-level and pinyin-level are provided along with the recordings.
The word & tone transcription accuracy rate is above 98%, through professional
speech annotation and strict quality inspection for tone and prosody.
    N)Path)DictOptionalSequenceUnion)tqdm)RecordingSetSupervisionSegmentSupervisionSet$validate_recordings_and_supervisions)	Recordingr   )fix_manifests)manifests_existread_manifests_if_cached)Pathlikeresumable_downloadsafe_extract)testtrain.F http://www.openslr.org/resources
target_dirforce_downloadbase_urlreturnc                 C   s   t | } | jddd | d}d}| | }| d }| r,td| d| d | S t| d	| ||d
 t|}t|| d W d   n1 sNw   Y  |	  | S )aR  
    Download and untar the dataset

    :param target_dir: Pathlike, the path of the dir to storage the dataset.
    :param force_download: Bool, if True, download the tars no matter if the tars exist.
    :param base_url: str, the url of the OpenSLR resources.
    :return: the path to downloaded and extracted directory with data.
    Tparentsexist_okz/93zdata_aishell3.tgzz
.completedz	Skipping z	 because z exists./)filenamer   )pathN)
r   mkdiris_filelogginginfor   tarfileopenr   touch)r   r   r   urltar_nametar_pathcompleted_detectortar r-   K/home/ubuntu/.local/lib/python3.10/site-packages/lhotse/recipes/aishell3.pydownload_aishell3&   s"   
r/   
corpus_dir
output_dirc                 C   s  t | } |  sJ d|  t}i }|dur)t |}|jddd t||dd}i }| d }| s:J d| t|d	1}| D ]$}| }|	d
sUt
|dkrVqD|d}|d |d }}	|	||< qDW d   n1 ssw   Y  | d d }
|
 sJ d|
 t|
d	8}i }| D ])}| }|	d
st
|dkrq|d}t
|dksJ |dd ||d < qW d   n1 sw   Y  t|ddD ]}t||ddrtd| d q| | }|d }| sJ d| g }g }t|}t|D ]}| d\}}|d |dd  | }|dd }||d}|dd }ddd t| D }d d!d t| D }| s\td|  qt|}t||d"|jdd#|||d$|| |d |d d%d&
}|| || qW d   n	1 sw   Y  t|}t|}t||\}}t|| |dur||d'| d(  ||d)| d(  ||d*||< q|S )+a  
    Returns the manifests which consist of the Recordings and Supervisions.
    When all the manifests are available in the ``output_dir``, it will simply read and return them.

    :param corpus_dir: Pathlike, the path of the data dir.
    :param output_dir: Pathlike, the path where to write the manifests.
    :return: a Dict whose key is the dataset part, and the value is Dicts with the keys 'audio' and 'supervisions'.
    zNo such directory: NTr   aishell3)dataset_partsr1   prefixzspk-info.txtzNo such file: r#r   	   r   zlabel_train-set.txt|      zPreparing aishell3 parts)desc)partr1   r4   zaishell3 subset: z already prepared - skipping.zcontent.txtwav   r   )NN c                 S       g | ]\}}|d  dkr|qS )r8   r   r-   .0ixr-   r-   r.   
<listcomp>        z$prepare_aishell3.<locals>.<listcomp> c                 S   rA   )r8   r;   r-   rB   r-   r-   r.   rF      rG   g        Chinesefemale)pinyintones_pinyin
tones_text)
idrecording_idstartdurationchannellanguagespeakergendertextcustomaishell3_supervisions_z	.jsonl.gzaishell3_recordings_)
recordingssupervisions) r   is_dirr2   r!   r   r"   r&   	readlinesstrip
startswithlensplitr   r   r#   r$   getjoin	enumeratewarningr   	from_filer	   rQ   appendr   from_recordingsr
   from_segmentsr   r   to_file)r0   r1   r3   	manifestsspeaker_infospeaker_info_pathfkrT   rU   
label_pathtone_labelsr=   	part_pathscripts_pathrZ   r[   linerN   rV   
audio_pathtonesrK   	recordingsegmentrecording_setsupervision_setr-   r-   r.   prepare_aishell3F   s   

	







 


r{   )r   Fr   )N)!__doc__r#   shutilr%   pathlibr   typingr   r   r   r   r   lhotser   r	   r
   r   lhotse.audior   	lhotse.qar   lhotse.recipes.utilsr   r   lhotse.utilsr   r   r   r2   boolstrr/   r{   r-   r-   r-   r.   <module>   sB    
"