o
    Pεi[                     @   s
  U d Z ddlZddlZddlmZ ddlmZ ddlmZ ee	j
Zed Zddd	d
ddddddddddZG dd deeZG dd deeZG dd deeZG dd deeZG dd deeZG dd  d eeZG d!d" d"eeZeG d#d$ d$Zg ed%ejejd&ed'ejejd(ed)ejejd&ed*ejejd&d)d+ed,ejejd(ed-ejejd&ed.ejejd(ed/ejejd&ed0ejejd(ed1ejejd(ed2ejejd&ed3ejejd&d(d4ed5ejejd(ed6ejejd&ed7ejejd(ed8ejejd&ed9ejejd(ed:ejejd&ed;ej ejd&ed<ej ejd(ed=ej ejd&ed>ej ejd(ed?ej ejd&ed@ej ejd(edAej ejd(d(d4edBej!ejd&edCej!ejd&edDej"ejd&edEej"ejd&d(d4edFej"ejd(edGej"ejd&edHej"ejd(Z#dIdJ e#D Z$eG dKdL dLZ%eG dMdN dNZ&e&d:d&e&dOd(e&dPd(dOd+gZ'dQdJ e'D Z(G dRdS dSeeZ)G dTdU dUeeZ*G dVdW dWeeZ+eG dXdY dYZ,g e,dZe)j-e*j.d(e,d[e)j-e*j/d(e,d\e)j-e*j0d(e,d]e)j-e*j1d(e,d^e)j-e*j2d(e,d_e)j-e*j3d(e,d`e)j-e*j4d(e,dae)j5e*j.d&e,dbe)j5e*j.d(e,dce)j5e*j0d&e,dde)j5e*j0d(e,dee)j5e*j1d&e,dfe)j5e*j1d(e,dge)j5e*j2d&e,dhe)j5e*j2d(e,die)j5e*j3d&e,dje)j5e*j3d(e+j6dke,dle)j5e*j3d(e+j6djdme,dne)j5e*j4d&e+j6dke,doe)j5e*j4d(e+j6dke,dpe)j5e*j7d&e,dqe)j5e*j8d&e,dre)j9e*j/d&e,dse)j9e*j/d(e,dte)j9e*j:d&e,dse)j9e*j:d(e,due)j9e*j0d&e,dve)j9e*j0d(e,dwe)j9e*j;d&e,dxe)j9e*j;d(e,dye)j9e*j1d&e,dze)j9e*j1d(e,d{e)j9e*j2d&e,d|e)j9e*j2d(e,d}e)j9e*j3d&e,d~e)j<e*j.d&e,de)j<e*j.d(e,de)j<e*j/d&e,de)j<e*j/d(e,de)j<e*j:d&e,de)j<e*j:d(e,de)j<e*j0d&e,de)j<e*j0d(e,de)j<e*j;d&e,de)j<e*j;d(e,de)j<e*j1d&e,de)j<e*j1d(e,de)j<e*j2d&e,de)j<e*j2d&dd+e,de)j<e*j2d(e,de)j<e*j3d&e,de)j<e*j3d(e,de)j<e*j4d&e,de)j<e*j4d(e+j=dke,de)j<e*j7d&e,de)j<e*j8d&e,de)j<e*j8d(e,de)j>e*j.d(e,de)j>e*j/d(e,de)j>e*j0d(e+j=dke,de)j>e*j1d(e+j=dke,de)j>e*j2d(e,de)j>e*j3d(e,de)j?e*j/d(e,de)j?e*j0d(e+j=dke,de)j?e*j1d(e+j=dke,de)j@e*j.d(e,de)j@e*j0d(e+j=dke,de)j@e*j4d(e+j=dke,de)jAe*j0d(e+jBdke,de)jAe*j0d(d(e+jBde,de)jAe*j1d(e+jBdke,de)jAe*j2d(e,de)jAe*j3d(e+jBdkZCddJ eCD ZDeG dd dZEG dd dZFdZGddgeGdddgeGgdd eD  eGddgeGgdd eD  eGgdd eD  eGddgeGgdd eD  eGddgeGgdd e)D  eGgdd e*D  eGdddldgeGgdd eD  dZHejIeejJe f eKd< h dZLejMe eKd< dejNeejOePeQf f fddńZReR ZSdS )z+Enums, vowels, and consonants for gruut-ipa    N)	dataclass)Enum)Pathdataarzcs-czzde-dezen-uszes-esfazfr-frzit-itnlptzru-ruzsv-sesw)r   csdeenesr   fritr   zpt-brrusvr
   c                   @   s  e Zd ZdZdZdZdZdZdZdZ	dZ
d	Zd
ZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZdZ dZ!dZ"d Z#d!Z$d"Z%d#Z&d$Z'd%Z(d&Z)d&Z*d'Z+d(Z,d)Z-d*Z.e/d+e0d,e1fd-d.Z2e/d+e0d,e1fd/d0Z3e/d+e0d,e1fd1d2Z4e/d+e0d,e1fd3d4Z5e/d+e0d,e1fd5d6Z6e/d+e0d,e1fd7d8Z7e/d+e0d,e1fd9d:Z8e/d+e0d,e1fd;d<Z9e/d+e0d,e1fd=d>Z:e/d+e0d,e1fd?d@Z;e/dAe0d,e<j=e0 fdBdCZ>e/dIdAe0dEe1d,e0fdFdGZ?dHS )JIPAz*International phonetic alphabet charactersu   ˈu   ˌ'   ²u   ːu   ˑu   ̆u   ̃u   ̝u   ͡u   ͜u   ̩u   ̯.|u   ‖#u   ↗u   ↘   ¹   ³u   ⁴u   ⁵u   ⁶u   ⁷u   ⁸u   ⁹u   ˥u   ˦u   ˧u   ˨u   ˩u   ˀ   ʔ[]/{}()	codepointreturnc                 C   
   | t jkS )zTrue if elongated symbol)r   LONGr#    r(   G/home/ubuntu/.local/lib/python3.10/site-packages/gruut_ipa/constants.pyis_longU      
zIPA.is_longc                 C   r%   )zTrue if nasalated diacritic)r   NASALr'   r(   r(   r)   is_nasalZ   r+   zIPA.is_nasalc                 C   r%   )zTrue if rased diacritic)r   RAISEDr'   r(   r(   r)   	is_raised_   r+   zIPA.is_raisedc                 C      | t jt jfv S )z'True if primary/secondary stress symbol)r   STRESS_PRIMARYSTRESS_SECONDARYr'   r(   r(   r)   	is_stressd      zIPA.is_stressc                 C      | t jt jhv S )zTrue if accent symbol)r   ACCENT_ACUTEACCENT_GRAVEr'   r(   r(   r)   	is_accenti   r4   zIPA.is_accentc                 C   r0   )zTrue if above/below tie symbol)r   	TIE_ABOVE	TIE_BELOWr'   r(   r(   r)   is_tien   r4   z
IPA.is_tiec              	   C   s(   | t jt jt jt jt jt jt jt jhv S )zTrue if any IPA bracket symbol)	r   BRACKET_PHONETIC_LEFTBRACKET_PHONETIC_RIGHTBRACKET_PHONEMIC_LEFTBRACKET_PHONEMIC_RIGHTBRACKET_PROSODIC_LEFTBRACKET_PROSODIC_RIGHTBRACKET_OPTIONAL_LEFTBRACKET_OPTIONAL_RIGHTr'   r(   r(   r)   
is_brackets   s   zIPA.is_bracketc                 C   s   | t jt jt jt jhv S )zTrue if any IPA break symbol)r   BREAK_SYLLABLEBREAK_MINORBREAK_MAJOR
BREAK_WORDr'   r(   r(   r)   is_break   s   zIPA.is_breakc                 C   r5   )z1True if a rising or falling IPA intonation symbol)r   INTONATION_RISINGINTONATION_FALLINGr'   r(   r(   r)   is_intonation   r4   zIPA.is_intonationc                 C   s@   | t jt jt jt jt jt jt jt jt j	t j
t jt jt jt jhv S )zTrue if any IPA tone symbol)r   TONE_1TONE_2TONE_3TONE_4TONE_5TONE_6TONE_7TONE_8TONE_9TONE_EXTRA_HIGH	TONE_HIGHTONE_MIDTONE_LOWTONE_EXTRA_LOWr'   r(   r(   r)   is_tone   s    zIPA.is_tone
codepointsc                 C   sp   t d| } g }d}| D ]}t |dkr||7 }q|r(|t d| |}q|}q|r6|t d| |S )zSplit a string into graphemesNFD r   NFC)unicodedata	normalize	combiningappend)r\   	graphemesgraphemecr(   r(   r)   rd      s   
zIPA.graphemesTdrop_accentc                    s   d  fdd| D S )z.Return string without primary/secondary stressr^   c                 3   s,    | ]}t |s rt |s|V  qd S N)r   r3   r8   .0rf   rg   r(   r)   	<genexpr>   s    
z%IPA.without_stress.<locals>.<genexpr>)join)r\   rg   r(   rk   r)   without_stress   s   zIPA.without_stressN)T)@__name__
__module____qualname____doc__r1   r2   r6   r7   r&   	HALF_LONGEXTRA_SHORTr,   r.   r9   r:   SYLLABICNON_SYLLABICrE   rF   rG   rH   rJ   rK   rM   rN   rO   rP   rQ   rR   rS   rT   rU   rV   rW   rX   rY   rZ   TONE_GLOTTALIZED
TONE_SHORTr<   r=   r>   r?   r@   rA   rB   rC   staticmethodstrboolr*   r-   r/   r3   r8   r;   rD   rI   rL   r[   typingListrd   rn   r(   r(   r(   r)   r      s    	r   c                   @      e Zd ZdZdZdZdS )StresszApplied stress	secondaryprimaryN)ro   rp   rq   rr   	SECONDARYPRIMARYr(   r(   r(   r)   r          r   c                   @   r~   )AccentzApplied accentacutegraveN)ro   rp   rq   rr   ACUTEGRAVEr(   r(   r(   r)   r      r   r   c                   @      e Zd ZdZdZdZdZdS )	BreakTypezType of breakwordminormajorN)ro   rp   rq   rr   WORDMINORMAJORr(   r(   r(   r)   r      
    r   c                   @   r   )PhonemeLengthzSpoken length of a phonemeshortnormallongN)ro   rp   rq   rr   SHORTNORMALr&   r(   r(   r(   r)   r      r   r   c                   @   s,   e Zd ZdZdZdZdZdZdZdZ	dZ
d	S )
VowelHeightzHeight of a vowelclosez
near-closez	close-midmidzopen-midz	near-openopenN)ro   rp   rq   rr   CLOSE
NEAR_CLOSE	CLOSE_MIDMIDOPEN_MID	NEAR_OPENOPENr(   r(   r(   r)   r      s    r   c                   @   s$   e Zd ZdZdZdZdZdZdZdS )VowelPlacementzFront/back placement of a vowelfrontz
near-frontcentralz	near-backbackN)	ro   rp   rq   rr   FRONT
NEAR_FRONTCENTRAL	NEAR_BACKBACKr(   r(   r(   r)   r      s    r   c                   @   sp   e Zd ZU dZeed< eed< eed< eed< dZ	eed< dZ
eje ed	< ejZeed
< dZeje ed< dS )Vowelz!Necessary information for a vowelipaheight	placementroundedF	nasalatedNstresslengthalias_of)ro   rp   rq   rr   rz   __annotations__r   r   r{   r   r   r|   Optionalr   r   r   r   r   r(   r(   r(   r)   r      s   
 r   iFyTu   ɨu   ᵻ)r   u   ʉu   ɯuu   ɪu   ʏu   ʊeu   ẽ)r      øu   ɘu   ɵu   ɤou   əu   ɛu   œu   ɜu   ɞu   ʌu   ɔu   ɔ̃   æu   ɐau   ãu   ɶu   ɑu   ɒc                 C      i | ]}|j |qS r(   r   rj   vr(   r(   r)   
<dictcomp>C      r   c                   @   s"   e Zd ZU dZeed< eed< dS )DipthongzCombination of two vowelsvowel1vowel2N)ro   rp   rq   rr   r   r   r(   r(   r(   r)   r   H  s   
 r   c                   @   sB   e Zd ZU dZeed< eed< ejZ	eed< dZ
eje ed< dS )SchwazVowel-like soundr   
r_colouredr   Nr   )ro   rp   rq   rr   rz   r   r{   r   r   r   r   r|   r   r(   r(   r(   r)   r   S  s   
 r   u   ɚu   ɝc                 C   r   r(   r   )rj   sr(   r(   r)   r   _  r   c                   @   s0   e Zd ZdZdZdZdZdZdZdZ	dZ
d	Zd
S )ConsonantTypezType of a consonantnasalplosive	affricate	fricativeapproximantflaptrillzlateral-approximantN)ro   rp   rq   rr   r,   PLOSIVE	AFFRICATE	FRICATIVEAPPROXIMANTFLAPTRILLLATERAL_APPROXIMANTr(   r(   r(   r)   r   d  s    r   c                   @   s<   e Zd ZdZdZdZdZdZdZdZ	dZ
d	Zd
ZdZdZdS )ConsonantPlacezPlace of articulationbilabialzlabio-dentaldentalalveolarzpost-alveolar	retroflexpalatalvelaruvular
pharyngealglottalN)ro   rp   rq   rr   BILABIALLABIO_DENTALDENTALALVEOLARPOST_ALVEOLAR	RETROFLEXPALATALVELARUVULAR
PHARYNGEALGLOTTALr(   r(   r(   r)   r   q  s    r   c                   @   s    e Zd ZdZdZdZdZdZdS )ConsonantSoundsLikez,Class of sounds this consonant is similar tor^   rglN)ro   rp   rq   rr   NONERGLr(   r(   r(   r)   r     s    r   c                   @   sl   e Zd ZU dZeed< eed< eed< eed< dZ	eed< e
jZe
ed< ejZeed	< d
Zeje ed< d
S )	Consonantz%Necessary information for a consonantr   typeplacevoicedF	velarizedsounds_liker   Nr   )ro   rp   rq   rr   rz   r   r   r   r{   r   r   r   r   r   r   r   r   r|   r   r(   r(   r(   r)   r     s   
 r   mu   ɱnu   ɳu   ɲu   ŋu   ɴpbtdu   ʈu   ɖrf   u   ɟku   ɡ)r   r   )r   r   qu   ɢu   ʡr   u   p͡fu   b͡vu   t̪͡su   t͡su   d͡zu   t͡ʃu   d͡ʒu   ʈ͡ʂu   ɖ͡ʐu   t͡ɕu   d͡ʑu   k͡xu   ɸu   βfr   u   θ   ðr   zu   ʃu   ʒu   ʂu   ʐ   çu   ʝxu   ɣu   χu   ʁu   ħhu   ɦwu   ʋu   ɹu   ɻju   ɰu   ⱱu   ɾu   ɽu   ʙr   u   ʀr   u   ɫ)r   r   u   ɭu   ʎu   ʟc                 C   r   r(   r   ri   r(   r(   r)   r   W  r   c                   @   sD   e Zd ZU dZeed< dZeed< dd Ze	dedd fd	d
Z
dS )BreakzIPA break/boundaryr   r^   textc                 C   sV   | j tjkrtj| _d S | j tjkrtj| _d S | j tjkr$tj	| _d S t
dt  )NUnrecognized break type: )r   r   r   r   rF   r  r   rG   r   rH   
ValueErrorselfr(   r(   r)   __post_init__c  s   zBreak.__post_init__	break_strr$   c                 C   sV   | t jkrtj}t	|S | t jkrtj}t	|S | t jkr$tj}t	|S td|  )zParse break from stringr  )
r   rF   r   r   rG   r   rH   r   r  r  )r  
break_typer(   r(   r)   from_stringm  s   


zBreak.from_stringN)ro   rp   rq   rr   r   r   r  rz   r  ry   r  r(   r(   r(   r)   r  \  s   
 
r  c                   @   sB   e Zd ZdZdefddZdefddZededd fd	d
Z	dS )
IntonationzIPA rising/falling intonationrisingc                 C   s$   || _ | j rtj| _d S tj| _d S rh   )r  r   rJ   r  rK   )r  r  r(   r(   r)   __init__  s   zIntonation.__init__r$   c                 C   s   | j S rh   )r  r  r(   r(   r)   __repr__  s   zIntonation.__repr__intonation_strc                 C   s:   | t jkrd}t|S | t jkrd}t|S td|  )zParse intonation from stringTFzUnrecognized intonation type: )r   rJ   rK   r  r  )r  r  r(   r(   r)   r    s   

zIntonation.from_stringN)
ro   rp   rq   rr   r{   r  rz   r  ry   r  r(   r(   r(   r)   r  |  s    r  r   phonemebreakvowel	consonantschwac                 C      g | ]}|j qS r(   valuer   r(   r(   r)   
<listcomp>      r%  r   r   c                 C   r"  r(   r#  r   r(   r(   r)   r%    r&  c                 C   r"  r(   r#  r   r(   r(   r)   r%    r&  r   	unroundedc                 C   r"  r(   r#  r   r(   r(   r)   r%    r&  r   unvoicedc                 C   r"  r(   r#  r   r(   r(   r)   r%    r&  c                 C   r"  r(   r#  r   r(   r(   r)   r%    r&  r^   c                 C   r"  r(   r#  r   r(   r(   r)   r%    r&  )symbol_typephoneme_typer  	diacriticvowel_heightvowel_placevowel_roundedvowel_stressconsonant_voicedconsonant_typeconsonant_placeconsonant_sounds_likephoneme_lengthFEATURE_COLUMNS>   r  r-  r,  r/  r1  r4  r2  FEATURE_ORDINAL_COLUMNSr$   c                  C   sZ   i } d}t  D ]"\}}|tv r|| |< |d7 }qt||t| | |< |t|7 }q| S )zTCreate mapping from feature column name to vector index (ordinal) or slice (one-hot)r      )r5  itemsr6  slicelen)feature_keysoffsetfeature_colfeature_valuesr(   r(   r)   _make_feature_keys  s   
r?  )Trr   r|   r`   dataclassesr   enumr   pathlibr   __file__parent_DIR	_DATA_DIRLANG_ALIASESrz   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   _VOWELSVOWELSr   r   _SCHWASSCHWASr   r   r   r   r,   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   _CONSONANTS
CONSONANTSr  r  FEATURE_EMPTYr5  Dictr}   r   r6  SetMappingUnionintr9  r?  FEATURE_KEYSr(   r(   r(   r)   <module>   s   
 +
	
 !#$%&'*
 			
!(/023456789:;<=>@ABCDEFGHIJKLMPQRST[\]_`ahoprsz           "  )  *  3
"
