o
    Ni                     @   s   d Z ddlmZ ddlmZ ddlmZ ddlZddlZddlm	Z	 ddl
mZ dZdZd	Zd
Zg dZg dZdZdZdZdZG dd dejjZdS )zSpeechCommands dataset.    )absolute_import)division)print_functionN)lazy_imports_liba  
@article{speechcommandsv2,
   author = {{Warden}, P.},
    title = "{Speech Commands: A Dataset for Limited-Vocabulary Speech Recognition}",
  journal = {ArXiv e-prints},
  archivePrefix = "arXiv",
  eprint = {1804.03209},
  primaryClass = "cs.CL",
  keywords = {Computer Science - Computation and Language, Computer Science - Human-Computer Interaction},
    year = 2018,
    month = apr,
    url = {https://arxiv.org/abs/1804.03209},
}
a(  
An audio dataset of spoken words designed to help train and evaluate keyword
spotting systems. Its primary goal is to provide a way to build and test small
models that detect when a single word is spoken, from a set of ten target words,
with as few false positives as possible from background noise or unrelated
speech. Note that in the train and validation set, the label "unknown" is much
more prevalent than the labels of the target words or background noise.
One difference from the release version is the handling of silent segments.
While in the test set the silence segments are regular 1 second files, in the
training they are provided as long segments under "background_noise" folder.
Here we split these background noise into 1 second clips, and also keep one of
the files for the validation set.
z@http://download.tensorflow.org/data/speech_commands_v0.02.tar.gzzIhttp://download.tensorflow.org/data/speech_commands_test_set_v0.02.tar.gz)trainvalidtest)
downgoleftnooffonrightstopupyes	_silence_	_unknown__background_noise_i>  c                   @   s<   e Zd ZdZejdZdd Zdd Z	dd Z
d	d
 ZdS )SpeechCommandsz2The Speech Commands dataset for keyword detection.z0.0.2c              
   C   sD   t jj| tt jt jjdtdt jjt	t
tg ddddtdS )Nwav)file_formatsample_rate)namesaudiolabelz https://arxiv.org/abs/1804.03209)builderdescriptionfeaturessupervised_keyshomepagecitation)tfdscoreDatasetInfo_DESCRIPTIONr    FeaturesDictAudioSAMPLE_RATE
ClassLabelWORDSSILENCEUNKNOWN	_CITATION)self r1   ]/home/ubuntu/.local/lib/python3.10/site-packages/tensorflow_datasets/audio/speech_commands.py_infoK   s   zSpeechCommands._infoc                 C   s   | ttg\}}| ||\}}tjjtjj	|||ddtjjtjj
|||ddtjjtjj||dddgS )zReturns SplitGenerators.)archive	file_list)name
gen_kwargsN)download_DOWNLOAD_PATH_TEST_DOWNLOAD_PATH__split_archiveiter_archiver$   r%   SplitGeneratorSplitTRAIN
VALIDATIONTEST)r0   
dl_managerdl_pathdl_test_pathtrain_pathsvalidation_pathsr1   r1   r2   _split_generators[   s2   z SpeechCommands._split_generatorsc              	   c   s<   |D ]\}}|dur||vrqt j|\}}t j|\}}d||}	|tv r-|}
n|tks5|tkr8t}
nt}
|tkrut	t
jjjj|dd }tdt|t td D ]}|||t  }d|	|}||
d}||fV  qYqzt	t
jjjj|dd |
d}|	|fV  W q t
jjjjy   Y qw dS )zYields examples.Nz{}_{}r   )formatr      r   )ospathsplitrH   r,   r-   BACKGROUND_NOISEr.   nparrayr   lazy_importspydubAudioSegment	from_fileget_array_of_samplesrangelenr*   
exceptionsCouldntDecodeError)r0   r4   r5   rK   file_objrelpathwavname_word
example_idr   audio_samplesstartaudio_segmentcur_idexampler1   r1   r2   _generate_examplesv   sT   

	z!SpeechCommands._generate_examplesc                 C   s   g }|D ]6\}}d|v r|    }dd |D }qd|v r0|    }dd |D }q|dr:|| q|tjtd t	|t	| t	| }||fS )Nztesting_list.txtc                 S      g | ]}| d qS asciidecode.0pr1   r1   r2   
<listcomp>       z1SpeechCommands._split_archive.<locals>.<listcomp>zvalidation_list.txtc                 S   re   rf   rh   rj   r1   r1   r2   rm      rn   z.wavzrunning_tap.wav)
readstrip
splitlinesendswithappendrJ   rK   joinrM   set)r0   train_archiverE   rK   rY   train_test_pathsrF   r1   r1   r2   r;      s"   

zSpeechCommands._split_archiveN)__name__
__module____qualname____doc__r$   r%   VersionVERSIONr3   rG   rd   r;   r1   r1   r1   r2   r   F   s    2r   )r{   
__future__r   r   r   rJ   numpyrN   tensorflow_datasets.corer   tensorflow_datasets.public_api
public_apir$   r/   r'   r9   r:   _SPLITSr,   r-   r.   rM   r*   r%   GeneratorBasedBuilderr   r1   r1   r1   r2   <module>   s&   