o
    ޥia0                     @   sr   d dl mZmZmZmZ d dlZd dlZd dlZd dlZdZ	dZ
G dd dZG dd deZG d	d
 d
eZdS )    )unicode_literalsdivisionabsolute_importprint_functionNconfig
dictionaryc                   @   sJ   e Zd ZdddZdd Zg fddZdd	 Zd
d Zdd Zdd Z	dS )OpenCCNc                 C   sP   d| _ || _d| _t | _t | _t | _t	d| _
| jdur&|   dS dS )a  
        init OpenCC
        :param conversion: the conversion of usage, options are
         'hk2s', 's2hk', 's2t', 's2tw', 's2twp', 't2hk', 't2s', 't2tw', 'tw2s', 'tw2sp', etc
         check the json file names in config directory
        :return: None
         Fu  (\s+|-|,|\.|\?|!|\*|　|，|。|、|；|：|？|！|…|“|”|‘|’|『|』|「|」|﹁|﹂|—|－|（|）|《|》|〈|〉|～|．|／|＼|︒|︑|︔|︓|︿|﹀|︹|︺|︙|︐|［|﹇|］|﹈|︕|︖|︰|︳|︴|︽|︾|︵|︶|｛|︷|｝|︸|﹃|﹄|【|︻|】|︼)N)conversion_name
conversion_dict_init_donelist_dict_chain_dict_chain_datadict
dict_cacherecompilesplit_chars_re
_init_dictselfr    r   A/home/ubuntu/.local/lib/python3.10/site-packages/opencc/opencc.py__init__$   s   
zOpenCC.__init__c                 C   sv   | j s
|   d| _ g }| j|}tdt|D ]}|d dkr.|| || | j q|||  qd	|S )z]
        Convert string from Simplified Chinese to Traditional Chinese or vice versa
        Tr      r	   )
r   r   r   splitrangelenappend_convertr   join)r   stringresultsplit_string_listir   r   r   convert9   s   
zOpenCC.convertc                 C   s<   t |}|D ]}|| t d| }qd| S )av  
        Convert string from Simplified Chinese to Traditional Chinese or vice versa
        If a dictionary is part of a group of dictionaries, stop conversion on a word
        after the first match is found.
        :param string: the input string
        :param dictionary: list of dictionaries to be applied against the string
        :return: converted string
        r	   )
StringTreecreate_parse_treer!   inorder)r   r"   r   treec_dictr   r   r   r    P   s
   	
zOpenCC._convertc                 C   s   | j du r	tdg | _| j d }tjtjtt|}t	|}t
|}W d   n1 s1w   Y  |d| _|dD ]}| | j|d qAg | _| | j| j t| jD ]\}}t|trm|g| j|< q^d| _dS )zR
        initialize the dict with chosen conversion
        :return: None
        Nzconversion is not setz.jsonnameconversion_chainr   T)r   
ValueErrorr   ospathr!   dirname__file__
CONFIG_DIRopenjsonloadgetr
   _add_dict_chainr   _add_dictionaries	enumerate
isinstancetupler   )r   r   config_filefsetting_jsonchainindexr+   r   r   r   r   _   s$   




zOpenCC._init_dictc              	   C   s   |D ]w}t |trg }| || || q|| jvrqi }d}d}tj|ddd.}|D ]#}	|	 d\}
}|||
< t	|
|krFt	|
}t	|
|k rPt	|
}q-W d    n1 s[w   Y  ||||f |||f| j|< q|| j|  qd S )N   i  rzutf-8)encoding	)
r;   r   r9   r   r   ior4   stripr   r   )r   
chain_list
chain_dataitemr@   map_dictmax_lenmin_lenr>   linekeyvaluer   r   r   r9   z   s0   

zOpenCC._add_dictionariesc                 C   s~   | ddkrg }| dD ]}| || q|| dS | ddkr=| d}tjtjtt|}|| dS dS )z
        add dict chain
        :param dict_chain: the dict chain to add to
        :param dict_dict: the dict to be added in
        :return: None
        typegroupdictstxtfileN)	r7   r8   r   r/   r0   r!   r1   r2   DICT_DIR)r   
dict_chain	dict_dictr@   	dict_itemfilename	dict_filer   r   r   r8      s   
zOpenCC._add_dict_chainc                 C   s   | j |krdS d| _|| _ dS )a	  
        set conversion
        :param conversion: the conversion of usage, options are
         'hk2s', 's2hk', 's2t', 's2tw', 's2twp', 't2hk', 't2s', 't2tw', 'tw2s', and 'tw2sp'
         check the json file names in config directory
        :return: None
        NF)r   r   r   r   r   r   set_conversion   s   

zOpenCC.set_conversionN)
__name__
__module____qualname__r   r&   r    r   r9   r8   r\   r   r   r   r   r   #   s    
r   c                   @   s>   e Zd ZdZdZdddZdd Zdd	 Zd
d Zdd Z	dS )TreeNoder   rB   Nc                 C   s    d d g| _ || _d| _|| _d S )NF)branchrP   matchedlength_hint)r   rP   hintr   r   r   r      s   

zTreeNode.__init__c                 C   
   || _ d S r]   )rc   )r   rc   r   r   r   set_matched      
zTreeNode.set_matchedc                 C   rf   r]   )rP   )r   rP   r   r   r   	set_value   rh   zTreeNode.set_valuec                 C   s   || j |< d S r]   )rb   )r   rb   noder   r   r   
set_branch      zTreeNode.set_branchc                 C   rf   r]   )rd   )r   re   r   r   r   set_hint   rh   zTreeNode.set_hintr]   )
r^   r_   r`   LEFTRIGHTr   rg   ri   rk   rm   r   r   r   r   ra      s    
ra   c                   @   s.   e Zd Zdd Zdd Zdd Zd
dd	ZdS )r'   c                 C   s   t || _d S r]   )ra   root)r   r"   r   r   r   r      rl   zStringTree.__init__c                 C   s   | j g}g }|D ]]}|r_| }| |j||j\}}}}	|rU|| |d |d |rAt||	}
|	|
 |
tj|
 |rTt||	}
|	|
 |
tj|
 n|	| d|_|s|}|}|}qdS )a  
        Compare smaller and smaller sub-strings going from left to
        rightin root node value against a test_dict_list entry. If match is found,
        create tree nodes for remaining left and right string portions and place
        these nodes on a stack for processing.

        :param test_dict_list: a list of tuples of the max key length and dict
                        currently being applied against the string
        NT)rp   pop_StringTree__findMatchrP   rd   ri   rm   rg   ra   r   rk   rn   ro   )r   test_dict_listworking_stackunmatched_stack	test_dictcurrrP   lstringrstringtest_lenrj   tempr   r   r   r(      s4   







zStringTree.create_parse_treec                 C   s\   g }g }| j }	 |r|| |jtj }|s
|r*| }||j |jtj }n	 |S q)zd
        Do a non-recursive inorder traversal of the tree.
        :return: list of strings
        )rp   r   rb   ra   rn   rq   rP   ro   )r   
return_valstackrw   r   r   r   r)      s   
zStringTree.inorderNc                 C   s   t |}d}d}t||d }|rt||}|d }||krztd|| d D ]J}	||	|	|  |d v rq|	dkr?|d|	 }|	| |k rM||	| d }|d ||	|	|   }
t |
ddkri|
dd }
|
|||f  S q'|d8 }||ksdS )a8  
        Compare smaller and smaller sub-strings going from left to
        right against test_dict. If an entry is found, return it as well
        as the remaining string(s) and the test length.

        :param cstring:  the string to find a match
        :param test_dict: a tuple of the max key length and dict currently being
                          applied against the string
        :return: the new matched value, old string to left of the match, old string to right
                of the match (may be all None if no match found), last test length
        Nr   rB   r    )NNNN)r   minr   r   )r   r"   rv   re   
string_lenrx   ry   rz   rM   r%   rP   r   r   r   __findMatch  s,   
zStringTree.__findMatchr]   )r^   r_   r`   r   r(   r)   rr   r   r   r   r   r'      s
    'r'   )
__future__r   r   r   r   rF   r/   r5   r   r3   rV   r   objectra   r'   r   r   r   r   <module>   s    