o
    Nii                     @   sv   d Z ddlmZ ddlmZ ddlmZ ddlZddlm  mZ	 ddl
mZ dZdZdZd	ZG d
d dejjZdS )LJSpeech dataset.    )absolute_import)division)print_functionNz@misc{ljspeech17,
  author       = {Keith Ito},
  title        = {The LJ Speech Dataset},
  howpublished = {\url{https://keithito.com/LJ-Speech-Dataset/}},
  year         = 2017
}
a  This is a public domain speech dataset consisting of 13,100 short audio clips of
a single speaker reading passages from 7 non-fiction books. A transcription is
provided for each clip. Clips vary in length from 1 to 10 seconds and have a
total length of approximately 24 hours.

The texts were published between 1884 and 1964, and are in the public domain.
The audio was recorded in 2016-17 by the LibriVox project and is also in the
public domain.
z'https://keithito.com/LJ-Speech-Dataset/z:https://data.keithito.com/data/speech/LJSpeech-1.1.tar.bz2c                   @   s4   e Zd ZdZejdZdd Zdd Z	dd Z
d	S )
Ljspeechr   z1.1.0c              
   C   sN   t jj| tt jtjt jjddt j	 t j	 ddt
tt jjdddS )Ni"V  )sample_rateidspeechtexttext_normalized)r   r
   )builderdescriptionfeaturessupervised_keyshomepagecitationmetadata)tfdscoreDatasetInfo_DESCRIPTIONr   FeaturesDicttfstringAudioText_URL	_CITATIONMetadataDict)self r!   V/home/ubuntu/.local/lib/python3.10/site-packages/tensorflow_datasets/audio/ljspeech.py_info:   s   zLjspeech._infoc                 C   s$   | t}tjjtjjd|idgS )N	directory)name
gen_kwargs)download_and_extract_DL_URLr   r   SplitGeneratorSplitTRAIN)r    
dl_managerextracted_dirr!   r!   r"   _split_generatorsJ   s   
zLjspeech._split_generatorsc           
   	   c   s    t j|dd}tjj|1}|D ]%}| }|d\}}}t j|ddd| }||||d}	||	fV  qW d   dS 1 sDw   Y  dS )zYields examples.zLJSpeech-1.1zmetadata.csv|wavsz%s.wavr   N)	ospathjoinr   iogfileGFilestripsplit)
r    r$   metadata_pathflinekey
transcripttranscript_normalizedwav_pathexampler!   r!   r"   _generate_examplesS   s"   "zLjspeech._generate_examplesN)__name__
__module____qualname____doc__r   r   VersionVERSIONr#   r.   rA   r!   r!   r!   r"   r   5   s    	r   )rE   
__future__r   r   r   r1   tensorflow.compat.v2compatv2r   tensorflow_datasets.public_api
public_apir   r   r   r   r(   r   GeneratorBasedBuilderr   r!   r!   r!   r"   <module>   s   	