o
    à¥µii  ã                   @   s\   d dl Z d dlZd dlZG dd„ deƒZedkr*eƒ ZejD ]Ze 	e¡Z
ee
ƒ qdS dS )é    Nc                   @   s$   e Zd Zdd„ Zdd„ Zdd„ ZdS )Ú	TextCleanc                 C   sv   g d¢}|  tddƒ¡ |  tddƒ¡ |  tddƒ¡ tdd	„ |D ƒƒ| _t d
tj¡| _ddddddddœ| _d S )N)é    i€  i/   i_   é 0  éÿþ  é   i   i   ij   il   r   i à  i ù  i    r   é   é¡   c                 S   s   g | ]}t |ƒ‘qS © )Úchr)Ú.0Úir	   r	   ú[/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/preprocessors/nlp/text_clean.pyÚ
<listcomp>   s    z&TextClean.__init__.<locals>.<listcomp>z\s+ú'õ   Â°ú-)ú`u   â€™õ   Â´u   â€˜õ   Âºu   â€“u   â€”)	ÚextendÚrangeÚsetÚspacesÚreÚcompileÚUNICODEÚ	space_patÚreplace_char)ÚselfÚspur	   r	   r   Ú__init__
   s   ùzTextClean.__init__c                 C   sH   t |ƒ}d|  k rdk rn n|d8 }t|ƒS |dkr"d}t|ƒS |S )Ni ÿ  i_ÿ  iàþ  r   é    )Úordr
   )r   ÚchÚnr	   r	   r   Úsbc2dbc!   s   üÿzTextClean.sbc2dbcc                 C   s¸   zQt | ¡ ƒ}t|ƒ}d}||k rA|| dk s|| | jv r#d||< n| j || || ¡||< |  || ¡||< |d7 }||k sd |¡}| j 	d|¡ ¡ }|W S  t
y[   Y dS w )Nr   ú é   Ú )ÚlistÚstripÚlenr   r   Úgetr%   Újoinr   ÚsubÚ	Exception)r   ÚsÚlineÚsizer   r	   r	   r   Úclean+   s"   
ù
ÿzTextClean.cleanN)Ú__name__Ú
__module__Ú__qualname__r    r%   r3   r	   r	   r	   r   r      s    
r   Ú__main__)Úcodecsr   ÚsysÚobjectr   r4   ÚtcÚstdinr1   r3   ÚresÚprintr	   r	   r	   r   Ú<module>   s   8


ú