o
     ¢iD  ã                   @   s  d dl Z d dlmZ d dlmZ g d¢Zg d¢Zg d¢Ze j 	dg d¢¡d	d
„ ƒZ
e j 	de¡e j 	ddg¡dd„ ƒƒZe j 	de¡e j 	ddg¡dd„ ƒƒZe j 	de¡e j 	ddg¡e j 	ddg¡dd„ ƒƒƒZe j 	de¡e j 	ddg¡e j 	ddg¡dd„ ƒƒƒZe j 	de¡e j 	ddg¡dd„ ƒƒZe j 	de¡e j 	ddg¡dd„ ƒƒZe j 	ddg¡dd„ ƒZe j 	ddg¡dd „ ƒZe j 	d!e¡e j 	ddg¡d"d#„ ƒƒZe j 	d!e¡e j 	d$d%g¡e j 	ddg¡d&d'„ ƒƒƒZe j 	d(d)g¡d*d+„ ƒZd,d-„ ZdS ).é    N)ÚTOKENIZER_PREFIXES)Úcompile_prefix_regex)ú(ú[Ú{Ú*)ú)ú]Ú}r   ))r   r   )r   r	   )r   r
   )r   r   Útext)r   z((ú<c                 C   s    | |ƒ}t |ƒt |ƒksJ ‚d S ©N©Úlen©Útl_tokenizerr   Útokens© r   úR/home/ubuntu/.local/lib/python3.10/site-packages/spacy/tests/lang/tl/test_punct.pyÚ$test_tl_tokenizer_handles_only_punct   s   r   ÚpunctÚMabuhayc                 C   sD   | || ƒ}t |ƒdksJ ‚|d j|ksJ ‚|d j|ks J ‚d S ©Né   r   é   ©r   r   ©r   r   r   r   r   r   r   Ú"test_tl_tokenizer_split_open_punct   ó   r   c                 C   sD   | || ƒ}t |ƒdksJ ‚|d j|ksJ ‚|d j|ks J ‚d S r   r   r   r   r   r   Ú$test_tl_tokenizer_splits_close_punct   r   r   Ú	punct_addú`c                 C   sZ   | || | ƒ}t |ƒdksJ ‚|d j|ksJ ‚|d j|ks"J ‚|d j|ks+J ‚d S ©Né   r   r   r   r   ©r   r   r    r   r   r   r   r   Ú,test_tl_tokenizer_splits_two_diff_open_punct#   ó
   r%   c                 C   sZ   | || | ƒ}t |ƒdksJ ‚|d j|ksJ ‚|d j|ks"J ‚|d j|ks+J ‚d S r"   r   r$   r   r   r   Ú-test_tl_tokenizer_splits_two_diff_close_punct.   r&   r'   c                 C   sL   | || | | ƒ}t |ƒdksJ ‚|d j|ksJ ‚|d j|ks$J ‚d S )Né   r   r#   r   r   r   r   r   Ú(test_tl_tokenizer_splits_same_open_punct9   ó   r)   c                 C   sL   | || | | ƒ}t |ƒdksJ ‚|d j|ksJ ‚|d j|ks$J ‚d S )Nr(   r   r   r   r   r   r   r   Ú)test_tl_tokenizer_splits_same_close_punctB   r*   r+   z'Angc                 C   s.   | |ƒ}t |ƒdksJ ‚|d jdksJ ‚d S )Nr   r   ú'r   r   r   r   r   Ú(test_tl_tokenizer_splits_open_apostropheK   s   r-   z	Mabuhay''c                 C   s4   | |ƒ}t |ƒdksJ ‚| dƒ}t |ƒdksJ ‚d S )Nr   z''r   r   )r   r   r   Útokens_punctr   r   r   Ú)test_tl_tokenizer_splits_double_end_quoteR   s   r/   zpunct_open,punct_closec                 C   sZ   | || | ƒ}t |ƒdksJ ‚|d j|ksJ ‚|d j|ks"J ‚|d j|ks+J ‚d S r"   r   )r   Ú
punct_openÚpunct_closer   r   r   r   r   Ú)test_tl_tokenizer_splits_open_close_punctZ   s
   r2   zpunct_open2,punct_close2)r!   r,   c                 C   s†   | || | | | ƒ}t |ƒdksJ ‚|d j|ksJ ‚|d j|ks&J ‚|d j|ks/J ‚|d j|ks8J ‚|d j|ksAJ ‚d S )Né   r   r   r   r#   r(   r   )r   r0   r1   Úpunct_open2Úpunct_close2r   r   r   r   r   Ú test_tl_tokenizer_two_diff_punctf   s   r6   z
text,punct)z(sa'yor   c                 C   s&   t tƒj}|| ƒ}| ¡ |ksJ ‚d S r   )r   r   ÚsearchÚgroup)r   r   Útl_search_prefixesÚmatchr   r   r   Ú(test_tl_tokenizer_splits_pre_punct_regexu   s   
r;   c                 C   s*   d}| |ƒ}|t |ƒd  jdksJ ‚d S )Nz(Dumating siya kahapon).r   Ú.r   r   r   r   r   Ú'test_tl_tokenizer_splits_bracket_period|   s   r=   )ÚpytestÚspacy.lang.punctuationr   Ú
spacy.utilr   Ú
PUNCT_OPENÚPUNCT_CLOSEÚPUNCT_PAIREDÚmarkÚparametrizer   r   r   r%   r'   r)   r+   r-   r/   r2   r6   r;   r=   r   r   r   r   Ú<module>   sT    




