o
    vOiG                     @   s  d Z ddlmZ ddlmZ dZdZdZdZdZ	d	Z
ee e e e	 e
 Ze Zd
Ze Zee Zee Zee Zee ZdZdZdZee Zee edd  e e Zeeeee	e
dZdddddddZd ddZdd Zdd Zeedd Z ZZ eedd Z! Z"Z#eedd Z$Z%eee Z& Z'Z(ee!e Z) Z*Z+ee$eZ,ee& Z- Z.Z/ee) Z0 Z1Z2ee, Z3Z4dS )!z(Constants for processing Pinyin strings.    )
whitespace)escapeu	   aāàáǎu	   eēéěèu	   iīíǐìu	   oōóǒòu	   uūúǔùu   vüǖǘǚǜbpmfdtnlgkhjqxzcsrwyu   ·012345:-'z"#$%&'()*+,-/\:;<=>@[]^_`{|}~z.!?Naeiouvr   r   r	   r
   r   u   vüFc              	   C   s   dj ttttttd}|  }| D ]\}}t	|dkr$d |||< qd|r*dnd ||d |d	 |d
 |d |d |d d S )a  Builds a Pinyin syllable re pattern.

    Syllables can be preceded by a middle dot (tone mark). Syllables that end
    in a consonant are only valid if they aren't followed directly by a vowel
    with no apostrophe in between.

    The rough approach used to validate a Pinyin syllable is:
        1. Get the longest valid syllable.
        2. If it ends in a consonant make sure it's not followed directly by a
            vowel (hyphens and apostrophes don't count).
        3. If the above didn't match, repeat for the next longest valid match.

    Lookahead assertions are used to ensure that hyphens and apostrophes are
    only considered valid if used correctly. This helps to weed out non-Pinyin
    strings.

    z(?![{a}{e}{i}{o}{u}{v}]|u:)r      z[{}]u  (?:·|‧)?(?:(?:(?:[zcs]h|[gkh])u%(a)sng%(consonant_end)s)|(?:[jqx]i%(o)sng%(consonant_end)s)|(?:[nljqx]i%(a)sng%(consonant_end)s)|(?:(?:[zcs]h?|[dtnlgkhrjqxy])u%(a)sn%(consonant_end)s)|(?:(?:[zcs]h|[gkh])u%(a)si)|(?:(?:[zc]h?|[rdtnlgkhsy])%(o)sng%(consonant_end)s)|(?:(?:[zcs]h?|[rbpmfdtnlgkhw])?%(e)sng%(consonant_end)s)|(?:(?:[zcs]h?|[rbpmfdtnlgkhwy])?%(a)sng%(consonant_end)s)|(?:[bpmdtnljqxy]%(i)sng%(consonant_end)s)|(?:[bpmdtnljqx]i%(a)sn%(consonant_end)s)|(?:[bpmdtnljqx]i%(a)so)|(?:[nl](?:v|u:|ü)%(e)s)|(?:[nl](?:%(v)s|u:))|(?:[jqxy]u%(e)s)|(?:[bpmnljqxy]%(i)sn%(consonant_end)s)|(?:[mdnljqx]i%(u)s)|(?:[bpmdtnljqx]i%(e)s)|(?:[dljqx]i%(a)s)|(?:(?:[zcs]h?|[rdtnlgkhxqjy])%(u)sn%(consonant_end)s)|(?:(?:[zcs]h?|[rdtgkh])u%(i)s)|(?:(?:[zcs]h?|[rdtnlgkh])u%(o)s)|(?:(?:[zcs]h|[rgkh])u%(a)s)|(?:(?:[zcs]h?|[rbpmfdngkhw])?%(e)sn%(consonant_end)s)|(?:(?:[zcs]h?|[rbpmfdtnlgkhwy])?%(a)sn%(consonant_end)s)|(?:(?:[zcs]h?|[rpmfdtnlgkhy])?%(o)su)|(?:(?:[zcs]h?|[rbpmdtnlgkhy])?%(a)so)|(?:(?:[zs]h|[bpmfdtnlgkhwz])?%(e)si)|(?:(?:[zcs]h?|[bpmdtnlgkhw])?%(a)si)|(?:(?:[zcs]h?|[rjqxybpmdtnl])%(i)s)|(?:(?:[zcs]h?|[rwbpmfdtnlgkhjqxwy])%(u)s)|(?:%(e)s(?:r%(consonant_end)s)?)|(?:(?:[zcs]h?|[rmdtnlgkhy])%(e)s)|(?:[bpmfwyl]?%(o)s)|(?:(?:[zcs]h|[bpmfdtnlgkhzcswy])?%(a)s)|(?:r%(consonant_end)s))z[0-5]? r   r   r	   r
   r   r   )consonant_endr   r   r	   r
   r   r   )
format_a_e_i_o_u_vcopyitemslen)vowelstone_numbersr   _vowelsr   s r   ?/home/ubuntu/.local/lib/python3.10/site-packages/zhon/pinyin.py
_build_syl8   s(   
%'r    c                 C   s   dj | |d |d |d dS )a  Builds a Pinyin word re pattern from a Pinyin syllable re pattern.

    A word is defined as a series of consecutive valid Pinyin syllables
    with optional hyphens and apostrophes interspersed. Hyphens must be
    followed immediately by another valid Pinyin syllable. Apostrophes must be
    followed by another valid Pinyin syllable that starts with an 'a', 'e', or
    'o'.

    z3(?:{syl}(?:-(?={syl})|'(?=[{a}{e}{o}])(?={syl}))?)+r   r   r
   )sylr   r   r
   )r   )r!   r   r   r   r   _build_word   s   
r"   c                 C   s   dj | ttttdS )a!  Builds a Pinyin sentence re pattern from a Pinyin word re pattern.

    A sentence is defined as a series of valid Pinyin words, punctuation
    (non-stops), and spaces followed by a single stop and zero or more
    container-closing punctuation marks (e.g. apostrophe and brackets).

    z?(?:{word}|[{non_stops}]|(?<![{stops} ]) )+[{stops}]['\"\]}}\)]*)word	non_stopsstops)r   r   r$   r%   )r#   r   r   r   _build_sentence   s   r&   )r   T)F)5__doc__stringr   rer   r   r   r   r   r   r   _lowercase_vowelsupper_uppercase_vowels_lowercase_consonants_uppercase_consonantsr   
consonants	lowercase	uppercasemarksr$   r%   punctuation	printable	_a_vowels	_n_vowelsr    r"   r&   a_sylacc_sylaccented_syllablen_sylnum_sylnumbered_syllabler!   syllablea_wordacc_wordaccented_wordn_wordnum_wordnumbered_wordr#   a_sentacc_sentaccented_sentencen_sentnum_sentnumbered_sentencesentsentencer   r   r   r   <module>   sH   
L
