o
    2wi                     @   s  d Z ddlZddlZddlZddlZddlmZ ddlmZm	Z	m
Z
 ddlmZmZ ddlmZmZ ddlmZ ddlmZ dd	lmZmZ dd
lmZmZmZmZ 	ddede	e defddZ	ddede	e deee
eef f fddZ defddZ!dedefddZ"dS )a  
The LJ Speech Dataset is a public domain speech dataset consisting of 13,100 short audio clips of a single speaker
reading passages from 7 non-fiction books. A transcription is provided for each clip. Clips vary in length from 1 to
10 seconds and have a total length of approximately 24 hours.

The texts were published between 1884 and 1964, and are in the public domain. The audio was recorded in 2016-17 by
the LibriVox project and is also in the public domain.

See https://keithito.com/LJ-Speech-Dataset for more details.
    N)Path)DictOptionalUnion)fix_manifests$validate_recordings_and_supervisions)	RecordingRecordingSet)Fbank)TorchaudioFeatureExtractor)SupervisionSegmentSupervisionSet)Pathlikefastcopyresumable_downloadsafe_extract.F
target_dirforce_downloadreturnc                 C   s   t | } | jddd d}| | d }| | }|d }| r.td| d| d |S td	| d||d
 tj|dd t	|}t
|| d W d    n1 sVw   Y  |  |S )NTparentsexist_okzLJSpeech-1.1z.tar.bz2z
.completedz	Skipping z	 because z exists.z%http://data.keithito.com/data/speech/)filenamer   )ignore_errors)path)r   mkdiris_filelogginginfor   shutilrmtreetarfileopenr   touch)r   r   dataset_nametar_path
corpus_dircompleted_detectortar r*   T/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/lhotse/recipes/ljspeech.pydownload_ljspeech   s(   
r,   r'   
output_dirc                 C   sp  t | } |  sJ d|  |durt |}|jddd | d }| s-J d| g }g }t|N}|D ]C}| d\}}}	| d | d	 }
|
 sZtd|
  q8t	
|
}t||d
|jddd|d|	 id	}|| || q8W d   n1 sw   Y  t|}t|}t||\}}t|| |dur||d  ||d  ||dS )a/  
    Returns the manifests which consist of the Recordings and Supervisions

    :param corpus_dir: Pathlike, the path of the data dir.
    :param output_dir: Pathlike, the path where to write the manifests.
    :return: The RecordingSet and SupervisionSet with the keys 'audio' and 'supervisions'.
    zNo such directory: NTr   zmetadata.csvzNo such file: |wavsz.wavg        r   Englishfemalenormalized_text)	idrecording_idstartdurationchannellanguagegendertextcustomz"ljspeech_supervisions_all.jsonl.gzz ljspeech_recordings_all.jsonl.gz)
recordingssupervisions)r   is_dirr   r   r#   stripsplitr   warningr   	from_filer   r6   appendr	   from_recordingsr   from_segmentsr   r   to_file)r'   r-   metadata_csv_pathr<   r=   fliner4   r:   
normalized
audio_path	recordingsegmentrecording_setsupervision_setr*   r*   r+   prepare_ljspeech4   sP   








rP   c                  C   s   t  } d| j_| S )zm
    Set up the feature extractor for TTS task.
    :return: A feature extractor with custom parameters.
    P   )r
   confignum_mel_bins)feature_extractorr*   r*   r+   rT   l   s   rT   rM   c                 C   sN   | j  }tdd|}tdd|}tdd|}tdd|}t| |dS )Nz[^\w !?] z^\s+z\s+$z\s+ )r:   )r:   upperresubr   )rM   r:   r*   r*   r+   text_normalizerw   s   
rZ   )r   F)N)#__doc__r   rX   r    r"   pathlibr   typingr   r   r   lhotser   r   lhotse.audior   r	   lhotse.featuresr
   lhotse.features.baser   lhotse.supervisionr   r   lhotse.utilsr   r   r   r   boolr,   strrP   rT   rZ   r*   r*   r*   r+   <module>   s>    

8