o
    i&                     @   s*  d Z ddlZddlmZ ddlmZ ddlZddlZej	ej
eZdZdZdZdZeejed	d
dZeeZW d   n1 sGw   Y  dd e D Zeejed	ddZeeZW d   n1 spw   Y  dd e D Zdd eddD Zdd eddD Zdd eddD Zdd eddD Zdd eddD Zdd eddD Z G dd  d e!Z"d!d" Z#dId#d$Z$d%d& Z%d'd( Z&d)d* Z'd+d, Z(d-d. Z)d/d0 Z*d1d2 Z+d3d4 Z,d5d6 Z-d7d8 Z.dJd:d;Z/dJd<d=Z0d>d? Z1d@dA Z2dKdCdDZ3dIdEdFZ4dGdH Z5dS )La$  Syllable and jamo analysis for Korean. Default internal exchange form is
Hangul characters, not codepoints. Jamo exchange form is U+11xx characters,
not U+3xxx Hangul Compatibility Jamo (HCJ) characters or codepoints.

For more information, see:
http://python-jamo.readthedocs.org/ko/latest/
    N)stderr)chain   i  `  i  datazU+11xx.jsonrc                 C      i | ]\}}||qS  r	   .0charnamer	   r	   =/home/ubuntu/.local/lib/python3.10/site-packages/jamo/jamo.py
<dictcomp>       r   zU+31xx.jsonc                 C   r   r	   r	   r
   r	   r	   r   r      r   c                 C      g | ]}t |qS r	   chrr   _r	   r	   r   
<listcomp>       r      _  c                 C   r   r	   r   r   r	   r	   r   r       r   i  c                 C   r   r	   r   r   r	   r	   r   r   !   r   a    c                 C   r   r	   r   r   r	   r	   r   r   "   r   iv  c                 C   r   r	   r   r   r	   r	   r   r   #   r   i   c                 C   r   r	   r   r   r	   r	   r   r   $   r   i  c                       s    e Zd ZdZ fddZ  ZS )InvalidJamoErrorzjamo is a U+11xx codepoint.c                    s@   t t| | tt|| _tdj| jdd  dtd d S )NzCould not parse jamo: U+{code}   )code)file)	superr   __init__hexordjamoprintformatr   )selfmessager$   	__class__r	   r   r!   )   s
   
zInvalidJamoError.__init__)__name__
__module____qualname____doc__r!   __classcell__r	   r	   r)   r   r   '   s    r   c                 C   s~   t | r=t| t }|d }d|| d d  }d|d  }|r1t|t t|t t|t fS t|t t|t fS | S )zpReturn a 3-tuple of lead, vowel, and tail jamo characters.
    Note: Non-Hangul characters are echoed back.
          L  )is_hangul_charr#   _JAMO_OFFSETr   _JAMO_LEAD_OFFSET_JAMO_VOWEL_OFFSET_JAMO_TAIL_OFFSET)syllableremtailvowelleadr	   r	   r   _hangul_char_to_jamo0   s   




r=   c                 C   sP   t | t } t |t }|rt |t nd}t||d d  | d d  t S )z?Return the Hangul character for the given jamo characters.
    r   r1   r0   r2   )r#   r5   r6   r7   r   r4   r<   r;   r:   r	   r	   r   _jamo_to_hangul_charD   s   $r?   c                 C   s2   t | rtddt| }|t v rt| S | S )N(?<=HANGUL )(\w+)LETTER)is_jamoresub_get_unicode_name_HCJ_REVERSE_LOOKUPkeys)r   hcj_namer	   r	   r   _jamo_char_to_hcjM   s   rI   c                 C   s:   | t  vr| t vrtd| t| rt|  S t |  S )z0Fetch the unicode name for jamo characters.
    z#Not jamo or nameless jamo character)_JAMO_TO_NAMErG   _HCJ_TO_NAMEr   is_hcj)r   r	   r	   r   rE   W   s
   
rE   c                 C   sp   t | }d|  kodkn  p7d|  kodkn  p7d|  ko%dkn  p7d|  ko1dkn  p7t| S )	zTest if a single character is a jamo character.
    Valid jamo includes all modern and archaic jamo, as well as all HCJ.
    Non-assigned code points are invalid.
    r   i  i`  i|  i  i  i  i  )r#   rL   	characterr   r	   r	   r   rB   b   s   rB   c                 C   sX   t | }d|  kodkn  p+d|  kodkn  p+d|  ko%dkn  p+t| S )a*  Test if a single character is a modern jamo character.
    Modern jamo includes all U+11xx jamo in addition to HCJ in modern usage,
    as defined in Unicode 7.0.
    WARNING: U+1160 is NOT considered a modern jamo character, but it is listed
    under 'Medial Vowels' in the Unicode 7.0 spec.
    r   i  r   iu  r   i  )r#   is_hcj_modernrM   r	   r	   r   is_jamo_modernn   s   rP   c                 C   s(   dt |   kodkn  ot | dkS )zTest if a single character is a HCJ character.
    HCJ is defined as the U+313x to U+318x block, sans two non-assigned code
    points.
    11  i1  id1  r#   rN   r	   r	   r   rL   |   s   (rL   c                 C   s8   t | }d|  kodkn  pd|  kodkS   S )zTest if a single character is a modern HCJ character.
    Modern HCJ is defined as HCJ that corresponds to a U+11xx jamo character
    in modern usage.
    rQ   iN1  O1  c1  rR   rM   r	   r	   r   rO      s   rO   c                 C   s   dt |   kodkS   S )zfTest if a single character is in the U+AC00 to U+D7A3 code block,
    excluding unassigned codes.
    r   i  rR   rS   r	   r	   r   r3      s   r3   c                 C   sd   | t v s
| tdkrdS | tv s%| tdks%dt|   kr#dkr'dS  ndS | tv r-dS td| )	a  Determine if a jamo character is a lead, vowel, or tail.
    Integers and U+11xx characters are valid arguments. HCJ consonants are not
    valid here.

    get_jamo_class should return the class ["lead" | "vowel" | "tail"] of a
    given character or integer.

    Note: jamo class directly corresponds to the Unicode 7.0 specification,
    thus includes filler characters as having a class.
    r   r<   r   rT   rU   r;   r:   z#Invalid or classless jamo argument.)
JAMO_LEADSr   JAMO_VOWELSr#   
JAMO_TAILSr   r$   r	   r	   r   get_jamo_class   s   
rZ   c                 C   s   dd | D S )aY  Convert jamo to HCJ.
    Arguments may be iterables or single characters.

    jamo_to_hcj should convert every jamo character into HCJ in a given input,
    if possible. Anything else is unchanged.

    jamo_to_hcj is the generator version of j2hcj, the string version. Passing
    a character to jamo_to_hcj will still return a generator.
    c                 s       | ]}t |V  qd S N)rI   r   r	   r	   r   	<genexpr>       zjamo_to_hcj.<locals>.<genexpr>r	   )r   r	   r	   r   jamo_to_hcj      
r_   c                 C      d t| S )a  Convert jamo into HCJ.
    Arguments may be iterables or single characters.

    j2hcj should convert every jamo character into HCJ in a given input, if
    possible. Anything else is unchanged.

    j2hcj is the string version of jamo_to_hcj, the generator version.
     )joinr_   rY   r	   r	   r   j2hcj   s   	rd   r;   c                 C   s^   |dkrd}n|dkrd}n|dkrd}nt d| td|t| }|t v r-t| S | S )	zConvert a HCJ character to a jamo character.
    Arguments may be single characters along with the desired jamo class
    (lead, vowel, tail). Non-mappable input will raise an InvalidJamoError.
    r<   CHOSEONGr;   	JUNGSEONGr:   	JONGSEONGzNo mapping from input to jamo.r@   )r   rC   rD   rE   _JAMO_REVERSE_LOOKUPrG   )hcj_charposition
jamo_class	jamo_namer	   r	   r   hcj_to_jamo   s   
rm   c                 C   s
   t | |S )zOConvert a HCJ character to a jamo character.
    Identical to hcj_to_jamo.
    )rm   )ri   rj   r	   r	   r   hcj2j   s   
rn   c                 C   s   dd t dd | D D S )a4  Convert a string of Hangul to jamo.
    Arguments may be iterables of characters.

    hangul_to_jamo should split every Hangul character into U+11xx jamo
    characters for any given string. Non-hangul characters are not changed.

    hangul_to_jamo is the generator version of h2j, the string version.
    c                 s   s    | ]}|V  qd S r\   r	   r   r	   r	   r   r]      s    z!hangul_to_jamo.<locals>.<genexpr>c                 s   r[   r\   )r=   r   r	   r	   r   r]      r^   )r   from_iterablehangul_stringr	   r	   r   hangul_to_jamo   s
   

rr   c                 C   ra   )a  Convert a string of Hangul to jamo.
    Arguments may be iterables of characters.

    h2j should split every Hangul character into U+11xx jamo for any given
    string. Non-hangul characters are not touched.

    h2j is the string version of hangul_to_jamo, the generator version.
    rb   )rc   rr   rp   r	   r	   r   h2j   r`   rs   rb   c                 C   s   t | d} t |d}|rt|dkrd}n	t|rt |d}t| rJt| dkrJt|rJt|dkrJ|r>t|rJt|dkrJt| ||}t|rJ|S tdd)zReturn the Hangul character for the given jamo input.
    Integers corresponding to U+11xx jamo codepoints, U+11xx jamo characters,
    or HCJ are valid inputs.

    Outputs a one-character Hangul string.

    This function is identical to j2h.
    r<   r;   r   Nr:   z*Could not synthesize characters to Hangul. )rm   r#   rL   rB   rZ   r?   r3   r   )r<   r;   r:   resultr	   r	   r   jamo_to_hangul   s*   


rv   c                 C   s   t | ||S )zArguments may be integers corresponding to the U+11xx codepoints, the
    actual U+11xx jamo characters, or HCJ.

    Outputs a one-character Hangul string.

    This function is defined solely for naming conisistency with
    jamo_to_hangul.
    )rv   r>   r	   r	   r   j2h  s   
rw   c                 C   s   t )zAConvert jamo characters in a string into hcj as much as possible.)NotImplementedErrorrc   )stringr	   r	   r   synth_hangul&  s   rz   )r   )r;   )rb   )6r.   ossysr   	itertoolsr   jsonrC   pathabspathdirname__file___ROOTr4   r5   r6   r7   openrc   namedataloadrJ   itemsrh   rK   rF   rangerV   JAMO_LEADS_MODERNrW   JAMO_VOWELS_MODERNrX   JAMO_TAILS_MODERN	Exceptionr   r=   r?   rI   rE   rB   rP   rL   rO   r3   rZ   r_   rd   rm   rn   rr   rs   rv   rw   rz   r	   r	   r	   r   <module>   sZ   	
	





