o
     ¢iF  ã                   @   s  d dl Z d dlmZ d dlmZ g d¢Zg d¢Zg d¢Ze j 	dg d¢¡d	d
„ ƒZ
e j 	de¡e j 	ddg¡dd„ ƒƒZe j 	de¡e j 	ddg¡dd„ ƒƒZe j 	de¡e j 	ddg¡e j 	ddg¡dd„ ƒƒƒZe j 	de¡e j 	ddg¡e j 	ddg¡dd„ ƒƒƒZe j 	de¡e j 	ddg¡dd„ ƒƒZe j 	de¡e j 	ddg¡dd„ ƒƒZe j 	ddg¡dd„ ƒZe j 	ddg¡d d!„ ƒZe j 	d"e¡e j 	ddg¡d#d$„ ƒƒZe j 	d"e¡e j 	d%d&g¡e j 	ddg¡d'd(„ ƒƒƒZe j 	d)d*g¡d+d,„ ƒZd-d.„ ZdS )/é    N)ÚTOKENIZER_PREFIXES)Úcompile_prefix_regex)ú(ú[Ú{Ú*)ú)ú]Ú}r   ))r   r   )r   r	   )r   r
   )r   r   Útext)r   z((ú<c                 C   s    | |ƒ}t |ƒt |ƒksJ ‚d S ©N©Úlen©Úen_tokenizerr   Útokens© r   úR/home/ubuntu/.local/lib/python3.10/site-packages/spacy/tests/lang/en/test_punct.pyÚ$test_en_tokenizer_handles_only_punct   s   r   ÚpunctÚHelloc                 C   sD   | || ƒ}t |ƒdksJ ‚|d j|ksJ ‚|d j|ks J ‚d S ©Né   r   é   ©r   r   ©r   r   r   r   r   r   r   Ú#test_en_tokenizer_splits_open_punct   ó   r   c                 C   sD   | || ƒ}t |ƒdksJ ‚|d j|ksJ ‚|d j|ks J ‚d S r   r   r   r   r   r   Ú$test_en_tokenizer_splits_close_punct   r   r   Ú	punct_addú`c                 C   sZ   | || | ƒ}t |ƒdksJ ‚|d j|ksJ ‚|d j|ks"J ‚|d j|ks+J ‚d S ©Né   r   r   r   r   ©r   r   r    r   r   r   r   r   Ú,test_en_tokenizer_splits_two_diff_open_punct#   ó
   r%   ú'c                 C   sZ   | || | ƒ}t |ƒdksJ ‚|d j|ksJ ‚|d j|ks"J ‚|d j|ks+J ‚d S r"   r   r$   r   r   r   Ú-test_en_tokenizer_splits_two_diff_close_punct.   r&   r(   c                 C   sL   | || | | ƒ}t |ƒdksJ ‚|d j|ksJ ‚|d j|ks$J ‚d S )Né   r   r#   r   r   r   r   r   Ú(test_en_tokenizer_splits_same_open_punct9   ó   r*   c                 C   sL   | || | | ƒ}t |ƒdksJ ‚|d j|ksJ ‚|d j|ks$J ‚d S )Nr)   r   r   r   r   r   r   r   Ú)test_en_tokenizer_splits_same_close_punctB   r+   r,   z'Thec                 C   s.   | |ƒ}t |ƒdksJ ‚|d jdksJ ‚d S )Nr   r   r'   r   r   r   r   r   Ú)test_en_tokenizer_splits_open_appostropheK   s   r-   zHello''c                 C   s4   | |ƒ}t |ƒdksJ ‚| dƒ}t |ƒdksJ ‚d S )Nr   z''r   r   )r   r   r   Útokens_punctr   r   r   Ú)test_en_tokenizer_splits_double_end_quoteR   s   r/   zpunct_open,punct_closec                 C   sZ   | || | ƒ}t |ƒdksJ ‚|d j|ksJ ‚|d j|ks"J ‚|d j|ks+J ‚d S r"   r   )r   Ú
punct_openÚpunct_closer   r   r   r   r   Ú)test_en_tokenizer_splits_open_close_punctZ   s
   r2   zpunct_open2,punct_close2)r!   r'   c                 C   s†   | || | | | ƒ}t |ƒdksJ ‚|d j|ksJ ‚|d j|ks&J ‚|d j|ks/J ‚|d j|ks8J ‚|d j|ksAJ ‚d S )Né   r   r   r   r#   r)   r   )r   r0   r1   Úpunct_open2Úpunct_close2r   r   r   r   r   Ú test_en_tokenizer_two_diff_punctf   s   r6   z
text,punct)z(can'tr   c                 C   s&   t tƒj}|| ƒ}| ¡ |ksJ ‚d S r   )r   r   ÚsearchÚgroup)r   r   Úen_search_prefixesÚmatchr   r   r   Ú(test_en_tokenizer_splits_pre_punct_regexu   s   
r;   c                 C   s*   d}| |ƒ}|t |ƒd  jdksJ ‚d S )Nz*(And a 6a.m. run through Washington Park).r   Ú.r   r   r   r   r   Ú'test_en_tokenizer_splits_bracket_period|   s   r=   )ÚpytestÚspacy.lang.punctuationr   Ú
spacy.utilr   Ú
PUNCT_OPENÚPUNCT_CLOSEÚPUNCT_PAIREDÚmarkÚparametrizer   r   r   r%   r(   r*   r,   r-   r/   r2   r6   r;   r=   r   r   r   r   Ú<module>   sT    




