o
    
i}                     @   sp   d dl Z ddlmZ defddZdd Zdd
ededefddZdd
ededefddZ	dedefddZ
dS )    N   )punctuationscc                 C   s   |   S )N)isalnum)r    r   T/home/ubuntu/.local/lib/python3.10/site-packages/misaki/vi_cleaner/sentence_utils.py
isTextOnly   s   r   c                 C   s$   dd t t|| t|| D S )Nc                 S   s    g | ]\}}|r|  | qS r   )strip).0edr   r   r   
<listcomp>
   s     z(split_text_sentences.<locals>.<listcomp>)zipresplitfindall)textregexr   r   r   split_text_sentences	   s   $r      	sentences	maxLengthreturnc                 C   s   t | dkr| S t | d d|kr"| d gt| dd  |d S t | d | d  d|krFt| d d | d  g| dd   |dS | d gt| dd  |d S )Nr   r    r      )lenr   combine_sentences)r   r   r   r   r   r      s   *r   c                 C   sB   g }| D ]}t |d|kr|t|d q||g q|S )Nr   z	[?!.,:;-])r   r   appendr   )r   r   sub_sentencessentencer   r   r   split_long_sentences   s   r!   passagec                    sr   t t| d } fdd|D }g }|D ]!}|D ]}tdd |D }t|s5ttt|r5|| qq|S )Nz[.!?]c                    s   g | ]}t | qS r   )r   r
   ir   r   r   r   "   s
    zget_pieces.<locals>.<listcomp>c                 S   s   g | ]}|qS r   r   r#   r   r   r   r   '   s    )	r!   r   setr   
issupersetanymapr   r   )r"   r   r   combined_sub_sentences	flat_listsublistitem
item_charsr   r   r   
get_pieces    s   

r.   )r   )r   	symbol_vir   strr   r   listintr   r!   r.   r   r   r   r   <module>   s   	