o
    Ni[                     @   sX  d Z ddlmZ ddlmZ ddlmZ ddlZddlZddlZddlm	  m
Z ddlmZ dZejjdejjdejjd	iZg d
Zedg dfdg dfdg dfdddgfddgfdg fddgfdg fddgfdg dfddgfdg dfddgfddgfd dgfd!g d"fd#dgfd$d%dgfgZG d&d' d'ejjZG d(d) d)ejjZdS )*Mozilla Common Voice Dataset.    )absolute_import)division)print_functionNzjhttps://voice-prod-bundler-ee1969a6ce8178826482b88e843c335139bd3fb4.s3.amazonaws.com/cv-corpus-1/{}.tar.gztraintest	validated)malefemaleotheren)canadausindianphilippinesenglandscotlandr   	australiahongkongafrican
newzealandwalesirelandmalaysiabermuda	singaporesouthatlandticde)
germanyswitzerlandaustriar   liechtensteinfrancenetherlandsunited_kingdomhungarypolandfr)r"   r   belgiumalgeriar   r   r   tunisiasenegalunited_statesst_pierre_et_miquelonmonacomayottecote_d_ivoire
guadeloupe
martiniquereunionportugalr#   cyr$   r   brcvtrttkyzga-IE)r   ulaidh	connachtakabca)northwesterncentralr   	valencianbalearicnorthernzzh-TWslitnl)r#   r(   r   cnheo
internaciac                       s,   e Zd ZdZejjjd fdd	Z  Z	S )CommonVoiceConfigz4Configuration Class for Mozilla CommonVoice Dataset.Nc                    sv   |t vrtdtt  ||| _|| _|d| |dd|  |dtj	
d tt| jdi | dS )	aC  Constructs CommonVoiceConfig.

    Args:
     language: `str`, one of [ca, nl, br, de, sl, cy, en, kab, tt, zh-TW, eo,
       it, fr, ga-IE, tr, ky, cnh, cv]. Language Code of the Dataset to be used.
     accents: `list[str]`, labels for the accents of the language
     **kwargs: keywords arguments forwarded to super
    z#language must be one of {}. Not: {}namedescriptionzLanguage Code: %sversionz1.0.0N )_LANGUAGE_ACCENTS
ValueErrorformatlistkeyslanguageaccents
setdefaulttfdscoreVersionsuperrJ   __init__)selfrT   rU   kwargs	__class__rN   Y/home/ubuntu/.local/lib/python3.10/site-packages/tensorflow_datasets/audio/commonvoice.pyr[   L   s   
zCommonVoiceConfig.__init__)N)
__name__
__module____qualname____doc__rW   rX   	api_utilsdisallow_positional_argsr[   __classcell__rN   rN   r^   r`   rJ   I   s    rJ   c                   @   s:   e Zd ZdZdd e D Zdd Zdd Zdd	 Z	d
S )CommonVoicer   c                 C   s   g | ]
\}}t ||d qS ))rT   rU   )rJ   ).0larN   rN   r`   
<listcomp>d   s    
zCommonVoice.<listcomp>c                 C   sb   t jjd| t jt j tjtjt j t jjt	dt jj| j
jdt j t j dddS )NzMozilla Common Voice Dataset)names)	client_idupvotes	downvotesagegenderaccentsentencevoicez%https://voice.mozilla.org/en/datasets)rL   builderfeatureshomepage)rW   rX   DatasetInforw   FeaturesDictTexttfint32
ClassLabel_GENDER_CLASSESbuilder_configrU   Audio)r\   rN   rN   r`   _infoi   s   zCommonVoice._infoc                    s:   | t| jjtjd  fddt	 D S )Nclipsc              
      s4   g | ]\}}t jj| tjd | ddqS )z%s.tsv)
audio_path
label_path)rK   
gen_kwargs)rW   rX   SplitGeneratorospathjoin)ri   kvclip_folderdl_pathrN   r`   rl      s    z1CommonVoice._split_generators.<locals>.<listcomp>)
download_and_extract_DOWNLOAD_URLrQ   r   rT   r   r   r   _SPLITSitems)r\   
dl_managerrN   r   r`   _split_generators   s   zCommonVoice._split_generatorsc                 c   s    t jj|c}tj|dd}t|D ]N\}}tj	|d|d  }t jj
|rb||d ||d |d r<t|d nd|d	 rGt|d	 nd|d
 |d rS|d nd|d r\|d nddfV  qW d   dS 1 snw   Y  dS )zGenerate Voice samples and statements given the data paths.

    Args:
      audio_path: str, path to audio storage folder
      label_path: str, path to the label files

    Yields:
      example: The example `dict`
    	)	delimiterz%s.mp3r   rn   rt   up_votesr   
down_votesrq   rr   rs   )rn   ru   rt   ro   rp   rq   rr   rs   N)r|   iogfileGFilecsv
DictReader	enumerater   r   r   existsint)r\   r   r   file_datasetirow	file_pathrN   rN   r`   _generate_examples   s&   

"zCommonVoice._generate_examplesN)
ra   rb   rc   rd   rO   r   BUILDER_CONFIGSr   r   r   rN   rN   rN   r`   rh   b   s    rh   )rd   
__future__r   r   r   collectionsr   r   tensorflow.compat.v2compatv2r|   tensorflow_datasets.public_api
public_apirW   r   SplitTRAINTEST
VALIDATIONr   r   OrderedDictrO   rX   BuilderConfigrJ   GeneratorBasedBuilderrh   rN   rN   rN   r`   <module>   sN   






#