o
    2wi:                     @   s  d Z ddlZddlZddlZddlZddlmZ ddlmZ ddl	m
Z
mZmZmZmZmZ ddlmZ ddlmZmZ ddlmZmZ dd	lmZmZ dd
lmZmZ ddlmZm Z m!Z!m"Z" dZ#dZ$dZ%dZ&g dZ'g dZ(dZ)dZ*dZ+			d,de,dede-defddZ.de,dedee/e/f fdd Z0de,d!e/dedeeef fd"d#Z1de,d$e/dedeeef fd%d&Z2de,dedeeef fd'd(Z3	d-de,ded)ee de
e,e
e,eeef f f fd*d+Z4dS ).ak  
About the speech_commands corpus
This is a set of one-second .wav audio files, each containing a single spoken English word or background noise.
These words are from a small set of commands, and are spoken by a variety of different speakers.
This data set is designed to help train simple machine learning models.
It is covered in more detail at https://arxiv.org/abs/1804.03209.

Version 0.01 of the data set (configuration "v0.01") was released on August 3rd 2017 and contains 64,727 audio files.
Version 0.02 of the data set (configuration "v0.02") was released on April 11th 2018 and contains 105,829 audio files.
    N)defaultdict)Path)DictListOptionalSequenceTupleUnion)tqdm)fix_manifests$validate_recordings_and_supervisions)	RecordingRecordingSet)manifests_existread_manifests_if_cached)SupervisionSegmentSupervisionSet)Pathlikeis_module_availableresumable_downloadsafe_extractz@http://download.tensorflow.org/data/speech_commands_v0.01.tar.gzzIhttp://download.tensorflow.org/data/speech_commands_test_set_v0.01.tar.gzz@http://download.tensorflow.org/data/speech_commands_v0.02.tar.gzzIhttp://download.tensorflow.org/data/speech_commands_test_set_v0.02.tar.gz)trainvalidtest)
downgoleftnooffonrightstopupyes	_silence_	_unknown__background_noise_2.Fspeechcommands_version
target_dirforce_downloadreturnc              	   C   s  t |}|d|   }|jddd d|  d|  f}t|d|  dD ][}td|  || }|d	 }| rHtd
| d| d q%| d}|| }	td| |	|d tj|dd t	
|	}
|
| W d   n1 sww   Y  |  q%|S )a[  
    Download and unzip Speech Commands dataset

    :param speechcommands_version: str, dataset version.
    :param target_dir: Pathlike, the path of the dir to store the dataset.
    :param force_download: bool, if True, download the archive even if it already exists.

    :return: the path to downloaded and extracted directory with data.
    SpeechCommandsTparentsexist_okspeech_commands_v0.0speech_commands_test_set_v0.0z Downloading Speech Commands v0.0desczProcessing split: z
.completedz	Skipping z	 because z exists.z.tar.gzz$http://download.tensorflow.org/data/)filenamer+   )ignore_errorsN)r   mkdirr
   logginginfois_filer   shutilrmtreetarfileopen
extractalltouch)r)   r*   r+   
corpus_dirdataset_partspartpart_dircompleted_detectortar_nametar_pathtar rI   Z/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/lhotse/recipes/speechcommands.pydownload_speechcommands-   s8   



rK   rA   c              	      s  t |}|d|   }g }t|D ]b  dkr7t|  }|   }W d   n1 s1w   Y  q dkrZt|  }|   }W d   n1 sTw   Y  qtj|  rtt|  }| fdd|D 7 }q|	tj
td t|t| t| }||fS )z
    Returns the train_paths and valid_paths given a dataset part.
    :param speechcommands_version: str, dataset version.
    :param corpus_dir: Pathlike, the path of the data dir.
    :return: the train_paths and valid_paths.
    r1   ztesting_list.txtNzvalidation_list.txtc                    $   g | ]}| d rtj |qS .wavendswithospathjoin.0	file_pathrootrI   rJ   
<listcomp>v       z&_split_train_valid.<locals>.<listcomp>zrunning_tap.wav)r   rQ   listdirr>   readstrip
splitlinesrR   isdirappendrS   BACKGROUND_NOISEset)r)   rA   	part_pathtrain_pathsfile_objtrain_test_pathsvalid_paths
file_pathsrI   rW   rJ   _split_train_valid^   s.   
ri   rd   c                 C     t |}|d|   }g }g }|D ]`}|| }| }|d}|dddd}	|d d}
|d tkr<d}d}n
|
d }|d  }| sStd	|  qt	j
||	d
}|| t|	|	d|jdd||d}|| qt|}t|}t||\}}t|| ||fS )a8  
    Returns the RecodingSet and SupervisionSet given a dataset part.
    :param speechcommands_version: str, dataset version.
    :param train_paths: set, the train subset paths.
    :param corpus_dir: Pathlike, the path of the data dir.
    :return: the RecodingSet and SupervisionSet for train and valid.
    r1   /_rN       r   NNo such file: rR   recording_id        Englishidrq   startdurationchannellanguagespeakertextr   resolvesplitreplacera   r]   r:   r8   warningr   	from_filer`   r   rw   r   from_recordingsr   from_segmentsr   r   )r)   rd   rA   rc   train_recordingstrain_supervisions
train_path
audio_pathtrain_path_splitsaudio_file_nameaudio_file_name_splitsrz   r{   train_recordingtrain_segmenttrain_recording_settrain_supervision_setrI   rI   rJ   _prepare_train   sR   





r   rg   c                 C   rj   )a8  
    Returns the RecodingSet and SupervisionSet given a dataset part.
    :param speechcommands_version: str, dataset version.
    :param valid_paths: set, the valid subset paths.
    :param corpus_dir: Pathlike, the path of the data dir.
    :return: the RecodingSet and SupervisionSet for train and valid.
    r1   rk   rl   rN   rm   rn   r   Nro   rp   rr   rs   rt   r|   )r)   rg   rA   rc   valid_recordingsvalid_supervisions
valid_pathr   valid_path_splitsr   r   rz   r{   valid_recordingvalid_segmentvalid_recording_setvalid_supervision_setrI   rI   rJ   _prepare_valid   sR   





r   c                    s  t |}|d|   }g }t|D ] tj|  r.t|  }| fdd|D 7 }qt|}g }g }|D ]w}|| }| }|d}	|dddd}
|	d d}|	d	 t	v rj|d	 }|	d	 
 }n|	d	 tkrud
}d}n|	d	 tkr|d }|d	 
 }| std|  q9tj||
d}|| t|
|
d|jd	d||d}|| q9t|}t|}t||\}}t|| ||fS )a  
    Returns the RecodingSet and SupervisionSet given a dataset part.
    :param speechcommands_version: str, dataset version.
    :param corpus_dir: Pathlike, the path of the data dir.
    :return: the RecodingSet and SupervisionSet for train and valid.
    r2   c                    rL   rM   rO   rT   rW   rI   rJ   rY     rZ   z!_prepare_test.<locals>.<listcomp>rk   rl   rN   rm   rn   r   Nro   rp   rr   rs   rt   )r   rQ   r[   rR   r_   rb   r}   r~   r   WORDSr]   SILENCEUNKNOWNr:   r8   r   r   r   r`   r   rw   r   r   r   r   r   r   )r)   rA   rc   
test_pathsrh   test_recordingstest_supervisions	test_pathr   test_path_splitsr   r   rz   r{   test_recordingtest_segmenttest_recording_settest_supervision_setrI   rW   rJ   _prepare_test   sl   






r   
output_dirc              	   C   sh  t |}| sJ d| td d|  d|  f}t}|dur/t |}|jddd tt}t|dd	D ]x}td
|  d|  t	||d|  ddr^td|  d| d q9|dkrrt
| |\}}t| ||\}	}
n|dkrt| ||\}	}
n|dkrt| |\}	}
|dur|
|d|  d| d  |	|d|  d| d  |	|
d||< q9|S )a  
    Returns the manifests which consist of the Recordings and Supervisions
    :param speechcommands_version: str, dataset version.
    :param corpus_dir: Path to the Speech Commands dataset.
    :param output_dir: Pathlike, the path where to write the manifests.
    :return: a Dict whose key is the dataset part, and the value is Dicts with the keys 'recordings' and 'supervisions'.
    zNo such directory: z9Preparing Speech Commands v0.0{speechcommands_version}...r1   r2   NTr.   zDataset partsr3   zProcessing Speech Commands v0.0z	 subset: speechcommandszjsonl.gz)rC   r   prefixsuffixzSpeech Commands v0.0z already prepared - skipping.r   r   r   _supervisions_z	.jsonl.gz_recordings_)
recordingssupervisions)r   is_dirr8   r9   _SPLITSr7   r   dictr
   r   ri   r   r   r   to_file)r)   rA   r   rB   subsets	manifestsrC   rd   rg   recording_setsupervision_setrI   rI   rJ   prepare_speechcommandsK  sj   


r   )r'   r(   F)N)5__doc__r8   rQ   r;   r=   collectionsr   pathlibr   typingr   r   r   r   r   r	   	tqdm.autor
   lhotser   r   lhotse.audior   r   lhotse.recipes.utilsr   r   lhotse.supervisionr   r   lhotse.utilsr   r   r   r   _DOWNLOAD_PATH_V1_TEST_DOWNLOAD_PATH_V1_DOWNLOAD_PATH_V2_TEST_DOWNLOAD_PATH_V2r   r   r   r   ra   strboolrK   rb   ri   r   r   r   r   rI   rI   rI   rJ   <module>   s     
1

'

=

>

N