o
    Ti]                     @   s`   d dl mZ d dlmZ d dlmZmZ d dlmZm	Z	 d dl
mZ dd Zdd	 Zd
d ZdS )    )unicode_literals)chain)	text_type
bytes_type)RE_HANSPHRASES_DICT)mmsegc                 C   sV   t | } g }| D ] }t|s|| qtr#|ttj	| q|| q|S )N)

simple_segr   matchappendr   extendlistr   segcut)hansretx r   J/home/ubuntu/.local/lib/python3.10/site-packages/pypinyin/seg/simpleseg.pyr      s   
r   c                 C   sZ   t | tr	J dt | trt| S t| } t| dkr"t| d S ttdd | D  S )u0   将传入的字符串按是否是汉字来分割z-must be unicode string or [unicode, ...] list   r   c                 S   s   g | ]}t |qS r   )r	   ).0r   r   r   r   
<listcomp>#   s    zsimple_seg.<locals>.<listcomp>)
isinstancer   r   _segr   lenr	   r   )r   r   r   r   r	      s   
r	   c                 C   s   d}g }d}t | D ]:\}}t|r,|dkrd}|dkr"||7 }q
|| d}|}q
|dkr2d}|dkr;||7 }q
|| d}|}q
|| |S )u   按是否是汉字进行分词 r   r   )	enumerater   r
   r   )charssr   flagncr   r   r   r   &   s*   





r   N)
__future__r   	itertoolsr   pypinyin.compatr   r   pypinyin.constantsr   r   pypinyin.segr   r   r	   r   r   r   r   r   <module>   s   