o
     ¢i  ã                   @   sv   d dl Z d dlmZ ddlmZ dg d¢fgZe j de¡dd	„ ƒZd
d„ Z	e j de¡dd„ ƒZ
dd„ Zdd„ ZdS )é    N)Ú
Vietnameseé   )ÚNAUGHTY_STRINGSõt   ÄÃ¢y lÃ  má»™t vÄƒn  báº£n báº±ng tiáº¿ng Viá»‡t Sau Ä‘Ã³, Ä‘Ã¢y lÃ  má»™t vÄƒn báº£n khÃ¡c báº±ng ngÃ´n ngá»¯ nÃ y)u   ÄÃ¢yõ   lÃ õ   má»™tu   vÄƒn  báº£nõ   báº±ngu   tiáº¿ngu   Viá»‡tÚSauu   Ä‘Ã³ú,u   Ä‘Ã¢yr   r   u
   vÄƒn báº£nu   khÃ¡cr   u   ngÃ´n ngá»¯u   nÃ yztext,expected_tokensc                 C   s"   dd„ | |ƒD ƒ}||ksJ ‚d S )Nc                 S   s   g | ]}|j ‘qS © )Útext)Ú.0Útokenr   r   úV/home/ubuntu/.local/lib/python3.10/site-packages/spacy/tests/lang/vi/test_tokenizer.pyÚ
<listcomp>   s    z%test_vi_tokenizer.<locals>.<listcomp>r   )Úvi_tokenizerr   Úexpected_tokensÚtokensr   r   r   Útest_vi_tokenizer   s   r   c                 C   s   | dƒ}|d j dksJ ‚d S )NzI   like cheese.é   z  )Úorth_)r   r   r   r   r   Útest_vi_tokenizer_extra_spaces   s   r   r   c                 C   s   | |ƒ}|j |ksJ ‚d S )N)Útext_with_ws)r   r   r   r   r   r   Ú!test_vi_tokenizer_naughty_strings   s   r   c                 C   sL   | dƒ}t |ƒdksJ ‚| dƒ}t |ƒdksJ ‚| dƒ}t |ƒdks$J ‚d S )NÚ r   ú r   z



 		 


)Úlen)r   Údocr   r   r   Ú test_vi_tokenizer_emptyish_texts    s   r   c                  C   sR   t  ddddiii¡} d}| |ƒ}dd„ |D ƒ| ¡ ksJ ‚|d jd	ks'J ‚d
S )z-Test for whitespace tokenization without pyviÚnlpÚ	tokenizerÚuse_pyviFr   c                 S   s   g | ]}|j s|j‘qS r   )Úis_spacer   )r   Útr   r   r   r   .   s    z-test_vi_tokenizer_no_pyvi.<locals>.<listcomp>é   r   N)r   Úfrom_configÚsplitr   )r   r   r   r   r   r   Útest_vi_tokenizer_no_pyvi)   s
   r'   )ÚpytestÚspacy.lang.vir   Útokenizer.test_naughty_stringsr   ÚTOKENIZER_TESTSÚmarkÚparametrizer   r   r   r   r'   r   r   r   r   Ú<module>   s    
ÿ

	