o
    헦if                     @   s   d dl Z d dlZd dlmZ d dlmZmZmZmZ d dl	m
Z
 d dlmZ dddZh d	Zd
ee dedee fddZG dd de
ZdS )    N)Path)IterableListTupleUnion)Dataset)download_url_to_file@209a8b4cd265013e96f4658632a9878103b0c5abf62b50d4ef3ae1be226b29e4@408ccaae803641c6d7b626b6299949320c2dbca96b2220fd3fb17887b023b027)Bhttp://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7bJhttp://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b.symbols>8   .DOT+PLUS-DASH"QUOTE'QUOTE(PAREN)PAREN,COMMA--DASH.POINT/SLASH:COLON{BRACE(PARENS)PARENS-HYPHEN.PERIOD"UNQUOTE%PERCENT.DECIMAL
"END-QUOTE
"IN-QUOTES
#HASH-MARK
&AMPERSAND
'END-QUOTE
)END-PAREN
.FULL-STOP#POUND-SIGN#SHARP-SIGN(LEFT-PAREN)END-PARENS...ELLIPSIS;SEMI-COLON{LEFT-BRACE{OPEN-BRACE"CLOSE-QUOTE'INNER-QUOTE(PARENTHESES)CLOSE-PAREN)RIGHT-PAREN}CLOSE-BRACE}RIGHT-BRACE"DOUBLE-QUOTE"END-OF-QUOTE'SINGLE-QUOTE(BEGIN-PARENS)END-THE-PAREN;SEMI-COLON(1)?QUESTION-MARK(IN-PARENTHESES)UN-PARENTHESES'END-INNER-QUOTE)END-PARENTHESES(OPEN-PARENTHESES!EXCLAMATION-POINT)CLOSE-PARENTHESESlinesexclude_punctuationsreturnc                 C   s   t d}g }| D ]A}|r|drq	| d\}}|tv r7|r#q	|dr+d}n|dr3d}n|d }t |d|}|d}|||f q	|S )	Nz
\([0-9]+\)z;;;z  z...z--r     )recompile
startswithstripsplit_PUNCTUATIONSsubappend)rE   rF   _alt_recmudictlinewordphones rW   Y/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/torchaudio/datasets/cmudict.py_parse_dictionaryJ   s$   



rY   c                   @   s   e Zd ZdZ	ddddddeeef ded	ed
ededdfddZde	de
eee f fddZde	fddZedee fddZdS )CMUDictaZ  *CMU Pronouncing Dictionary* :cite:`cmudict` (CMUDict) dataset.

    Args:
        root (str or Path): Path to the directory where the dataset is found or downloaded.
        exclude_punctuations (bool, optional):
            When enabled, exclude the pronounciation of punctuations, such as
            `!EXCLAMATION-POINT` and `#HASH-MARK`.
        download (bool, optional):
            Whether to download the dataset if it is not found at root path. (default: ``False``).
        url (str, optional):
            The URL to download the dictionary from.
            (default: ``"http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b"``)
        url_symbols (str, optional):
            The URL to download the list of symbols from.
            (default: ``"http://svn.code.sf.net/p/cmusphinx/code/trunk/cmudict/cmudict-0.7b.symbols"``)
    TFr   r   )downloadurlurl_symbolsrootrF   r[   r\   r]   rG   Nc          
      C   sB  || _ t|| _tj| jstd| | jtj| }| jtj| }tj|sC|s7td| t	
|d }t||| tj|s^|sRtd| t	
|d }t||| t|d}	dd |	 D | _W d    n1 sxw   Y  t|ddd}	t|	 | j d	| _W d    d S 1 sw   Y  d S )
Nz#The root directory does not exist; z`The dictionary file is not found in the following location. Set `download=True` to download it. z\The symbol file is not found in the following location. Set `download=True` to download it. rc                 S   s   g | ]}|  qS rW   )rM   ).0rT   rW   rW   rX   
<listcomp>   s    z$CMUDict.__init__.<locals>.<listcomp>zlatin-1)encoding)rF   )rF   r   
_root_pathospathisdirRuntimeErrorbasenameexists
_CHECKSUMSgetr   open	readlines_symbolsrY   _dictionary)
selfr^   rF   r[   r\   r]   	dict_filesymbol_filechecksumtextrW   rW   rX   __init__{   s<   

"zCMUDict.__init__nc                 C   s
   | j | S )a  Load the n-th sample from the dataset.

        Args:
            n (int): The index of the sample to be loaded.

        Returns:
            Tuple of a word and its phonemes

            str:
                Word
            List[str]:
                Phonemes
        )ro   )rp   rv   rW   rW   rX   __getitem__   s   
zCMUDict.__getitem__c                 C   s
   t | jS )N)lenro   rp   rW   rW   rX   __len__   s   
zCMUDict.__len__c                 C   s
   | j  S )zLlist[str]: A list of phonemes symbols, such as ``"AA"``, ``"AE"``, ``"AH"``.)rn   copyry   rW   rW   rX   symbols   s   
zCMUDict.symbols)T)__name__
__module____qualname____doc__r   strr   boolru   intr   r   rw   rz   propertyr|   rW   rW   rW   rX   rZ   i   s0    

)rZ   )rd   rJ   pathlibr   typingr   r   r   r   torch.utils.datar   torchaudio._internalr   rj   rO   r   r   rY   rZ   rW   rW   rW   rX   <module>   s    <