o
    Xεi(                  	   @   sL  d Z ddlZddlZddlZddlZddlZddlZddlm  m	Z ddl
mZ ddlmZ ddlZddlmZ ddlmZmZmZmZmZmZmZmZmZ ddlmZ edZe d	Z!d
e"de"fddZ#	dAd
e"dej$ej%ej&e"ef   dej$e fddZ'de"dej(e"e"f fddZ)dd Z*dAddZ+dBddZ,e dZ-de"de"fddZ.	dAdej/d e"d!ej0dej0fd"d#Z1dCd%d&Z2	dAd'e"d(ed)ej$ej3 fd*d+Z4e d,Z5d-e"de"fd.d/Z6d-e"dej7e" fd0d1Z8d2d3e9fd4ed5ej&eef d6e"d7e:fd8d9Z;d4ed5efd:d;Z<d4ed<ede=fd=d>Z>d4ed<ede=fd?d@Z?dS )DzUtility methods for gruut    NPath)urlopen)IPA)		DATA_PROPLANG_ALIASES	NODE_TYPE
EndElement	GraphTypeInlineLexiconLexemeNodeWordRole)_DIRzgruut.utilsz[-_]langreturnc                 C   s   |   dd} t| | S )z
    Try to resolve language using aliases.

    Args:
        lang: Language name or alias

    Returns:
        Resolved language name
    _-)lowerreplacer   get)r    r   ?/home/ubuntu/.local/lib/python3.10/site-packages/gruut/utils.pyresolve_lang%   s   
r   search_dirsc           	      C   s  t | d  }d| }zt|}td| | | W S  ty.   td| | Y nw t	tj
t dd |p;g D }tjd}|rQ|t|d  n|t d	 d  |tjd
  td| | |D ]}||  }|d }| rtd| | |  S qmdS )aX  
    Search for a language's model directory by name.

    Tries to find a directory by:

    #. Importing a module name ``gruut_lang_<short_lang>`` where short_lang is "en" for "en-us", etc.
    #. Looking for ``<lang>/lexicon.db`` in each directory in order:

       * ``search_dirs``
       * ``$XDG_CONFIG_HOME/gruut``
       * A "data" directory next to the gruut module

    Args:
        lang: Full language name (e.g., en-us)
        search_dirs: Optional iterable of directory paths to search first

    Returns:
        Path to the language model directory or None if it can't be found
    r   gruut_lang_z(%s) successfully imported %sz(%s) couldn't import module %sc                 S   s   g | ]}t |qS r   r   ).0pr   r   r   
<listcomp>X   s    z!find_lang_dir.<locals>.<listcomp>XDG_CONFIG_HOMEgruutz.configdataz&(%s) searching %s for language file(s)z
lexicon.dbz!(%s) found language file(s) in %sN)LANG_SPLIT_PATTERNsplitr   
__import___LOGGERdebugget_lang_dirImportErrortypingcastListr   osenvironr   appendhomer   parentis_file)	r   r   	base_langlang_module_namelang_modulemaybe_config_home	check_dirlang_dirlexicon_pathr   r   r   find_lang_dir4   s2   

 r9   
locale_strc                    sj   i }zddl  ddl  | } fdd|jD }W |S  ty&   Y |S  ty4   td Y |S w )z
    Try to get currency names and symbols for a Babel locale.

    Returns:
        Dictionary whose keys are currency symbols (like "$") and whose values are currency names (like "USD")
    r   Nc                    s   i | ]	} j ||qS r   )numbersget_currency_symbol)r   cnbabelr   r   
<dictcomp>   s    z&get_currency_names.<locals>.<dictcomp>get_currency_names)r?   babel.numbersLocalecurrency_symbolsr(   	Exceptionr%   warning)r:   currency_nameslocaler   r>   r   rA   u   s    

	rA   c                 C   s"   t | \}}t|d t||S )z$s -> (s0,s1), (s1,s2), (s2, s3), ...N)	itertoolsteenextzip)iterableabr   r   r   pairwise   s   

rP   c                 C   s   t | g| }tj|d|iS )z/Collect data into fixed-length chunks or blocks	fillvalue)iterrI   zip_longest)rM   nrQ   argsr   r   r   grouper   s   rV      c                 C   sD   t | |}t|t  D ]\}}t|D ]}t|d qqt| S )z3Returns a sliding window of size n over an iterableN)rI   rJ   rL   countrangerK   )rM   rT   	iterableswin_iternum_skippedr   r   r   r   sliding_window   s   r]   z^{[^}]+}tagc                 C      t d| S )zRemove namespace from XML tag )NO_NAMESPACE_PATTERNsub)r^   r   r   r   tag_no_namespace      rc   elementnamedefaultc                 C   s4   | j  D ]\}}td|}||kr|  S q|S )z1Search for an attribute by key without namespacesr`   )attribitemsra   rb   )re   rf   rg   keyvalue	key_no_nsr   r   r   attrib_no_namespace   s   rm   Fc           	      c   s    d}|r	ddi}| |fV  | j dur| j nd}| r|V  t| }t|d }t|D ]\}}||k}t||dE dH  q-t| V  | jdurL| jnd}| rW|V  dS dS )z9Yields element, text, sub-elements, end element, and tailNis_lastTr`      )rn   )textstriplistlen	enumeratetext_and_elementsr	   tail)	re   rn   element_metadatarp   childrenlast_child_idx	child_idxchildrv   r   r   r   ru      s$   


ru   urilexiconssl_contextc                 C   s>  |du rt  }t| |d}t|}| D ]t}t|jdkr"qt }t	|d}|r5t
|  |_|D ]$}t|j}	|	dkrL|jrK|j |_q7|	dkr[|jr[t|j |_q7|jr|jr|j|j}
|
du rui }
|
|j|j< |
dus{J |jptjg}|D ]}|j|
|< qqW d   dS 1 sw   Y  dS )z(Loads a pronunciation lexicon from a URIN)contextlexemerolegraphemephoneme)sslcreate_default_contextr   etreeparsegetrootrc   r^   r   rm   setrq   r#   rolesrp   r   maybe_split_ipaphonemeswordsr   r   DEFAULT)r|   r}   r~   responsetreelexeme_elemr   role_strlexeme_child	child_tagrole_phonemesr   r   r   r   r   load_lexicon   sB   


"r   z\Wsc                 C   r_   )z)Removes non-word characters from a stringr`   )NON_WORDS_PATTERNrb   r   r   r   r   remove_non_word_chars  rd   r   c                 C   s   d| v r|   S t| S )zWSplit on whitespace if a space is present, otherwise return string as list of graphemes )r#   r   	graphemesr   r   r   r   r     s   
r   z--ro   graphnodeindentlevelc                 C   sj   t |tr|}|j}n|}tt| j| t }||| || | |D ]}t| |||d |d q%dS )zPrints a graph to the consolero   )r   r   
print_funcN)	
isinstancer   r   r)   r*   nodesr   
successorsprint_graph)r   r   r   r   r   n_data
graph_node	succ_noder   r   r   r   -  s   
r   c                 c   s:    t | |jD ]}| |dksq| j| t V  qdS )z:Iterate through the leaves of a graph in depth-first orderr   N)nxdfs_preorder_nodesr   
out_degreer   r   )r   r   dfs_noder   r   r   leavesC  s   r   parent_nodec                 C   sj   d}t t||D ])}| ||D ]!\}}|ddt|i|}|j|j|d ||j|j d}qq	|S )z5Splits leaf nodes of tree into zero or more sub-nodesFr   )r!   TNr   )rr   r   rs   add_noder   add_edge)
split_funcr   r   was_changed	leaf_node
node_classnode_kwargsnew_noder   r   r   pipeline_splitL  s   r   c                 C   s*   d}t t||D ]	}| ||rd}q	|S )z0Transforms leaves of tree with a custom functionFT)rr   r   )transform_funcr   r   r   r   r   r   r   pipeline_transformZ  s   
r   )N)rW   )F)@__doc__rI   loggingr,   rer   r)   xml.etree.ElementTreer   ElementTreepathlibr   urllib.requestr   networkxr   	gruut_ipar   gruut.constr   r   r   r	   r
   r   r   r   r   gruut.resourcesr   	getLoggerr%   compiler"   strr   OptionalIterableUnionr9   DictrA   rP   rV   r]   ra   rc   ElementAnyrm   ru   
SSLContextr   r   r   r+   r   printintr   r   boolr   r   r   r   r   r   <module>   s    ,


A




$


0
	