o
    Xεia                     @   s   d Z ddlZddlZddlZddlZddlm  mZ ddlm	Z	m
Z
 ddlmZ ddlmZ ddlmZ ddlZddlZddlZi dddd	d
ddddddddddddddddddddddddddd d!d!d"d#iZddhZeeee ZzejZejZejeejf ZW n e y   ejZejZejeejf ZY nw ej!e Z"e#Z$d$Z%G d%d& d&Z&e'd'Z(e'd(Z)e'd)Z*e'd*Z+e	G d+d, d,Z,G d-d. d.eeZ-G d/d0 d0eeZ.G d1d2 d2eeZ/G d3d4 d4eeZ0G d5d6 d6e#eZ1e	G d7d8 d8Z2e	G d9d: d:Z3e	G d;d< d<Z4e	G d=d> d>e4Z5e	G d?d@ d@e4Z6e	G dAdB dBe4Z7e	G dCdD dDe4Z8e	G dEdF dFe4Z9e	G dGdH dHe4Z:e	G dIdJ dJe4Z;e	G dKdL dLe4Z<e	G dMdN dNe4Z=e	G dOdP dPZ>e	G dQdR dRZ?G dSdT dTZ@G dUdV dVZAG dWdX dXZBG dYdZ dZZCe	G d[d\ d\ZDd]ed^eEfd_d`ZFe'daZGd]ed^ejHe fdbdcZId]ed^ejJeef fdddeZKd]ed^efdfdgZLdhejeef d^efdidjZMe	G dkdl dlZNdS )mz Shared classes, types, and enums    N)	dataclassfield)datetime)Decimal)Enumarcszcs-czdezde-deenzen-uszen-gbeszes-eszes-mxfafrzfr-fritzit-itzlb-lblbnlznl-nlzpt-brptruzru-rusvzsv-seswzhzzh-cndatac                   @   s  e Zd ZU dZejeejejejf f ed< 	 defddZ	dedefdd	Z
ded
efddZded
eje fddZded
eje fddZded
ejejeef  fddZdejejeef  fddZdejejeef  fddZd
efddZdS )	GraphTypezType wrapper for networkx graphnodesnodec                 K      dS )zAdd a new node to the graphN )selfr   kwargsr   r   ?/home/ubuntu/.local/lib/python3.10/site-packages/gruut/const.pyadd_nodeH      zGraphType.add_nodesrcdstc                 C   r   )zAdd a new edge to the graphNr   )r   r!   r"   r   r   r   add_edgeL   r    zGraphType.add_edgereturnc                 C   r   )z(Get number of outgoing edges from a nodeNr   r   r   r   r   r   
out_degreeP   r    zGraphType.out_degreec                 C   r   )zYield nodes on outgoing edgesNr   r%   r   r   r   
successorsT   r    zGraphType.successorsc                 C   r   )zYield nodes from incoming edgesNr   r%   r   r   r   predecessorsX   r    zGraphType.predecessorsc                 C   r   )z Yield outgoing edges from a nodeNr   r%   r   r   r   	out_edges\      zGraphType.out_edgesedgesc                 C   r   )zAdd edges from iterableNr   r   r+   r   r   r   add_edges_fromb   r*   zGraphType.add_edges_fromc                 C   r   )zRemove edges from iterableNr   r,   r   r   r   remove_edges_fromh   r*   zGraphType.remove_edges_fromc                 C   r   )z Get number of nodes in the graphNr   r   r   r   r   __len__n   r    zGraphType.__len__N)__name__
__module____qualname____doc__typingDict	NODE_TYPEAny__annotations__r   r#   intr&   Iterabler'   r(   Tupler)   r-   r.   r0   r   r   r   r   r   B   s*   
  


r   z(\s+)z\s+z^(\s*)\S+(\s*)$z[0-9]c                   @   s8   e Zd ZU dZeed< dZeed< dZej	e
 ed< dS )TimezParsed time from texthoursr   minutesNperiod)r1   r2   r3   r4   r:   r9   r?   r@   r5   Optionalstrr   r   r   r   r=   |   s   
 r=   c                   @   s2   e Zd ZdZdZ	 dZ	 dZ	 dZ	 dZ	 dZ	dS )	InterpretAsz8Supported options for interpret-as attribute of <say-as>z	spell-outdatenumbercurrencytimewordN)
r1   r2   r3   r4   	SPELL_OUTDATENUMBERCURRENCYTIMEWORDr   r   r   r   rC      s    rC   c                   @   sX   e Zd ZdZdZ	 dZ	 dZ	 dZ	 dZdZ	dZ
d	Zd
ZdZdZdZdZdZdZdZdS )InterpretAsFormatz2Supported options for format attribute of <say-as>cardinalordinaldigitsyeardmymdyymdomymoyymoymmymdmoomyN)r1   r2   r3   r4   NUMBER_CARDINALNUMBER_ORDINALNUMBER_DIGITSNUMBER_YEARDATE_DMYDATE_MDYDATE_YMDDATE_DMY_ORDINALDATE_MDY_ORDINALDATE_YMD_ORDINALDATE_YMDATE_MYDATE_MDDATE_MD_ORDINALDATE_DM_ORDINALDATE_Yr   r   r   r   rO      s,    rO   c                   @      e Zd ZdZdZ	 dZdS )	BreakTypezTypes of sentence breaksminormajorN)r1   r2   r3   r4   MINORMAJORr   r   r   r   rq          rq   c                   @   rp   )WordRolez4Role of a word. Used to disambiguate pronunciations. zgruut:letterN)r1   r2   r3   r4   DEFAULTLETTERr   r   r   r   rw      rv   rw   c                   @   s*   e Zd ZdZdZdZ	 dZ	 dZ	 dZdS )SSMLParsingStatezCurrent state of SSML parsingr               N)	r1   r2   r3   r4   ry   IN_WORD
IN_LEXICONIN_LEXICON_GRAPHEMEIN_LEXICON_PHONEMEr   r   r   r   r{      s    r{   c                   @   sL   e Zd ZU dZeed< dZeed< eedZ	e
jee
jeef f ed< dS )InlineLexiconz5SSML lexicon defined inline (not standards compliant)
lexicon_idrx   alphabetdefault_factorywordsN)r1   r2   r3   r4   rB   r9   r   r   dictr   r5   r6   PHONEMES_TYPEr   r   r   r   r      s   
 &r   c                   @   sH   e Zd ZU dZdZeed< dZej	e
 ed< dZej	eje  ed< dS )LexemezEntry of an inline lexiconrx   graphemeNphonemesroles)r1   r2   r3   r4   r   rB   r9   r   r5   rA   r   r   Setr   r   r   r   r      s
   
 r   c                   @   sR   e Zd ZU dZeed< dZeje	j
 ed< dZeed< dZeed< dZeed	< dS )
Nodez-Base class of all text processing graph nodesr   Nelementrx   voicelangFimplicit)r1   r2   r3   r4   r7   r9   r   r5   rA   etreeElementr   rB   r   r   boolr   r   r   r   r      s   
 r   c                   @      e Zd ZdZdS )
IgnoreNodezNode should be ignoredNr1   r2   r3   r4   r   r   r   r   r   	      r   c                   @   s.   e Zd ZU dZdZeed< 	 defddZdS )	BreakNodez!Represents a user-specified breakrx   rG   r$   c                 C   sH   | j drt| j dd S | j dr"tt| j dd d S dS )z/Get number of milliseconds from the time stringmsNsi  r   )rG   endswithr:   floatr/   r   r   r   get_milliseconds  s
   zBreakNode.get_millisecondsN)	r1   r2   r3   r4   rG   rB   r9   r:   r   r   r   r   r   r     s
   
 r   c                   @   s   e Zd ZU dZdZeed< dS )MarkNodez Represents a user-specified markrx   nameN)r1   r2   r3   r4   r   rB   r9   r   r   r   r   r   "  s   
 r   c                   @   sX  e Zd ZU dZdZeed< dZeed< dZe	j
eef ed< dZe	j
eef ed< dZe	je ed< dZe	je ed	< dZe	je ed
< dZe	je ed< dZe	je ed< ejZe	j
eef ed< dZe	je ed< dZe	je	je  ed< dZe	je ed< dZe	je	je  ed< dZ eed< dZ!eed< dZ"eed< dZ#eed< dZ$eed< dS )WordNodezRepresents a single wordrx   texttext_with_wsinterpret_asformatNrE   rD   currency_symbolcurrency_namerG   roleposr   
in_lexiconlexicon_idsTis_maybe_numberis_maybe_dateis_maybe_currencyis_maybe_timeFis_from_broken_word)%r1   r2   r3   r4   r   rB   r9   r   r   r5   UnionrC   r   rO   rE   rA   r   rD   r   r   r   rG   r=   rw   ry   r   r   r   Sequencer   r   r   r   r   r   r   r   r   r   r   r   r   *  s*   
 r   c                   @   s@   e Zd ZU dZdZejeef e	d< dZ
ee	d< dZee	d< dS )BreakWordNodez*Represents a major/minor break in the textrx   
break_typer   r   N)r1   r2   r3   r4   r   r5   r   rB   rq   r9   r   r   r   r   r   r   r   I  s
   
 r   c                   @   s*   e Zd ZU dZdZeed< dZeed< dS )PunctuationWordNodez+Represents a punctuation marker in the textrx   r   r   N)r1   r2   r3   r4   r   rB   r9   r   r   r   r   r   r   R  s   
 r   c                   @   r   )SentenceNodez-Represents a sentence with WordNodes under itNr   r   r   r   r   r   Z  r   r   c                   @   r   )ParagraphNodez2Represents a paragraph with SentenceNodes under itNr   r   r   r   r   r   a  r   r   c                   @   r   )	SpeakNodezTop-level node for SSMLNr   r   r   r   r   r   h  r   r   c                   @   s\  e Zd ZU dZeed< 	 eed< 	 eed< 	 dZeed< 	 dZeed< 	 dZ	eed	< 	 dZ
eed
< 	 dZeed< 	 dZeed< 	 dZeje ed< 	 dZejeje  ed< 	 dZeed< 	 dZeed< 	 dZeed< 	 dZeje ed< 	 dZeje ed< 	 dZeed< 	 dZeed< 	 dZejeje  ed< 	 dZejeje  ed< 	 dd ZdS )WordzProcessed word from a Sentenceidxr   r   rx   
leading_wstrailing_wsr   sent_idxpar_idxr   r   Nr   r   Fis_major_breakis_minor_breakis_punctuationis_break	is_spokenpause_before_mspause_after_msmarks_beforemarks_afterc                 C   sH   | j d u r| jp
| j| _ | jd u r| jp| j  | _t| j\| _| _d S N)	r   r   r   r   r   default_get_whitespacer   r   r   r/   r   r   r   __post_init__  s
   

zWord.__post_init__)r1   r2   r3   r4   r:   r9   rB   r   r   r   r   r   r   r   r5   rA   r   r   r   r   r   r   r   r   r   r   r   Listr   r   r   r   r   r   r   r  sV   
 r   c                   @   s   e Zd ZU dZeed< 	 eed< 	 eed< 	 eed< 	 dZeed< 	 dZeed	< 	 dZ	eed
< 	 e
edZeje ed< 	 dZeed< 	 dZeed< 	 dZejeje  ed< 	 dZejeje  ed< 	 dd Zdd Zdd ZdS )Sentencez"Processed sentence from a documentr   r   r   text_spokenr   r   rx   r   r   r   r   r   r   Nr   r   c                 C   
   t | jS )zIterates over words)iterr   r/   r   r   r   __iter__     
zSentence.__iter__c                 C   r   )zNumber of words)lenr   r/   r   r   r   r0     r   zSentence.__len__c                 C   s
   | j | S )zGets word by index)r   )r   keyr   r   r   __getitem__  r   zSentence.__getitem__)r1   r2   r3   r4   r:   r9   rB   r   r   r   r   listr   r5   r   r   r   r   r   rA   r   r   r0   r   r   r   r   r   r     s:   
 r   c                
   @   s:   e Zd ZdZ	d
dedeje dedeje fdd	Z	dS )LookupPhonemesz+Look up phonemes for word/role in a lexiconNTrH   r   do_transformsr$   c                 C      d S r   r   )r   rH   r   r   r   r   r   __call__     zLookupPhonemes.__call__)NT)
r1   r2   r3   r4   rB   r5   rA   r   r   r   r   r   r   r   r     s    r   c                   @   s6   e Zd ZdZ	ddedeje deje fddZdS )	GuessPhonemeszGuess phonemes for word/roleNrH   r   r$   c                 C   r   r   r   )r   rH   r   r   r   r   r      r   zGuessPhonemes.__call__r   )	r1   r2   r3   r4   rB   r5   rA   r   r   r   r   r   r   r     s    r   c                   @   s.   e Zd ZdZdeje deje fddZdS )GetPartsOfSpeechz!Get part of speech tags for wordsr   r$   c                 C   r   r   r   )r   r   r   r   r   r   	  s   zGetPartsOfSpeech.__call__N)r1   r2   r3   r4   r5   r   rB   r   r   r   r   r   r     s    "r   c                   @   s(   e Zd ZdZdededejfddZdS )PostProcessSentencez@Post-process each sentence node after tokenization/phonemizationgraphsentence_nodesettingsc                 C   r   r   r   )r   r   r   r   r   r   r   r     r   zPostProcessSentence.__call__N)	r1   r2   r3   r4   r   r   r5   r8   r   r   r   r   r   r     s    r   c                   @   s   e Zd ZU dZejed< dS )
EndElementz9Wrapper for end of an XML element (used in TextProcessor)r   N)r1   r2   r3   r4   r   r   r9   r   r   r   r   r     s   
 r   r   r$   c                 C   s   t | duS )z*True if string contains at least one digitN)HAS_DIGIT_PATTERNsearchr   r   r   r   	has_digit   s   r   z(\s*\S+(?:\s+|$))c                 c   s    t dt| E dH  dS )zSplit text on whitespaceN)filterDEFAULT_WORD_PATTERNfindallr   r   r   r   default_split_words(  s   r   c                 C   s.   d\}}t | }|dur| \}}||fS )z3Returns leading and trailing whitespace of a string)rx   rx   N)SURROUNDING_WHITESPACE_PATTERNmatchgroups)r   r   r   r   r   r   r   r   -  s
   
r   c                 C   s   t d|  S )z)Replace multiple spaces with single space )NORMALIZE_WHITESPACE_PATTERNsubstripr   r   r   r   default_normalize_whitespace7  s   r   str_or_patternc                 C   s&   t | tr| S t | tsJ t| S )z&Compile regex pattern if it's a string)
isinstanceREGEX_PATTERNrB   recompile)r   r   r   r   maybe_compile_regex<  s   

r   c                   @   sF  e Zd ZU dZeed< 	 eZej	egej
e f ed< 	 dZeed< 	 dZeed< 	 dZejej	egef  ed	< 	 eZej	egejeef f ed
< 	 eZej	egef ed< 	 dZejeje  ed< 	 dZeje ed< 	 dZejeje  ed< 	 dZeje ed< 	 eedZejejeef  ed< 	 eedZ ej!eef ed< 	 eedZ"ej!eef ed< 	 ee#dZ$eje ed< 	 dZ%eje ed< 	 ee#dZ&eje ed< 	 dZ'eje ed< 	 ee#dZ(eje ed< dZ)eje ed< 	 e*Z+ejej	egef  ed< 	 dZ,ejej	egeje- f  ed< 	 dZ.eje ed< 	 dZ/eje ed< 	 dZ0eed< 	 eedZ1ej2eef ed < 	 eedZ3eje ed!< 	 e*Z4ejej	egef  ed"< 	 dZ5eje ed#< 	 e*Z6ejej	egef  ed$< 	 e7j8Z9ej:ee7f ed%< 	 e*Z;ejej	egef  ed&< 	 dZ<ejej	egeje= f  ed'< 	 dZ>ejej	e=gej
e f  ed(< 	 dZ?eje@ ed)< 	 dZAejej	egef  ed*< 	 dZBejej	egeje f  ed+< 	 dZCejeD ed,< 	 dZEejeF ed-< 	 dZGejej	egef  ed.< 	 dZHejeI ed/< 	 d0d1 ZJdS )2TextProcessorSettingsz.Language specific settings for text processingr   split_wordsr   join_strTkeep_whitespaceNis_non_wordget_whitespacenormalize_whitespacebegin_punctuationsbegin_punctuations_patternend_punctuationsend_punctuations_patternr   replacementsabbreviationsspell_out_wordsmajor_breaksmajor_breaks_patternminor_breaksminor_breaks_patternword_breaksword_breaks_patternr   get_ordinalbabel_localenum2words_langUSDdefault_currency
currenciescurrency_symbolsr   dateparser_langr   default_date_formatr   
parse_timeverbalize_timeget_parts_of_speechis_initialismsplit_initialismlookup_phonemesguess_phonemespre_process_textpost_process_sentencec                 C   s  | j d u r'd| jv r#| jjddd}d|d  |d  g| _ n| j| _ | jd u r0| j | _| jd u r>| j dd | _dd | jD | _i }| j	
 D ]1\}}t|tr{|dsv| jrvd	d
d | jD }| d| d}|d7 }t|}|||< qN|| _	| jd u r| jrd	dd | jD }d| d| _| jd urt| j| _| jd u r| jrd	dd | jD }d| d| _| jd urt| j| _| jd u r| jrd	dd | jD }d| d| _| jd urt| j| _| jd u r| jrd	dd | jD }d| d| _| jd urt| j| _| jd u r6| jr6d	dd | jD }d| d| _| jd urBt| j| _| jsbzt| j }dd |jD | _W n
 tya   Y nw | jsrt | jt!j"dd| _d S d S ) N-r|   )maxsplit_r   c                 S   s   g | ]
\}}t ||fqS r   )r   ).0patterntemplater   r   r   
<listcomp>  s    
z7TextProcessorSettings.__post_init__.<locals>.<listcomp>$|c                 s       | ]}t |V  qd S r   r   escaper'  br   r   r   	<genexpr>  s    

z6TextProcessorSettings.__post_init__.<locals>.<genexpr>z
(?P<break>z)?(?P<whitespace>\s*)$z\g<break>\g<whitespace>c                 s   r-  r   r.  r0  r   r   r   r2        z^()c                 s   r-  r   r.  r0  r   r   r   r2    r3  (z)$c                 s   r-  r   r.  r0  r   r   r   r2  #  r3  z((?:z)+(?:\s+|$))c                 s   r-  r   r.  r0  r   r   r   r2  .  r3  z)(?:\s+|$))c                 s   r-  r   r.  r0  r   r   r   r2  8  r3  z(?:c                 S   s   i | ]	}t j||qS r   )babelnumbersget_currency_symbol)r'  cnr   r   r   
<dictcomp>E  s    z7TextProcessorSettings.__post_init__.<locals>.<dictcomp>T)r   reverse)#r  r   splitjoinlowerupperr  r  r	  r
  itemsr   rB   r   r  r   r   r  r  r   r  r  r  r  r  r  r  r  r6  Localer  	Exceptionsortedoperatorlength_hint)r   
lang_partscompiled_abbreviationsr(  r)  break_pattern_strpattern_str
locale_objr   r   r   r     s   











z#TextProcessorSettings.__post_init__)Kr1   r2   r3   r4   rB   r9   r   r   r5   Callabler;   r   r  r   r  rA   r   r  r<   r   r  r  r   r  
REGEX_TYPEr  r  r   r   r	  r   r   r
  r6   r  setr  r  r  r  r  r  r   r   r  r:   r  r  r  r  MutableMappingr  r   r  r   rO   rh   r  r   r   r  r=   r  r  r   r  r  r   r   r!  r   r"  r#  r   r   r   r   r   r   r   K  s   
 $$r   )Or4   	itertoolsrD  r   r5   xml.etree.ElementTreer   ElementTreedataclassesr   r   r   decimalr   enumr   r6  
babel.corebabel.numbersLANG_ALIASESENGLISH_LANGSrM  chainvaluesKNOWN_LANGSPatternr   MatchREGEX_MATCHr   rB   rL  AttributeErrorr   r   r:   r7   	DATA_PROPr   r   DEFAULT_SPLIT_PATTERNr   r   r   r=   rC   rO   rq   rw   r{   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r;   r   r<   r   r   r   r   r   r   r   r   <module>   s    	


3


"


	I7				


