o
    iu                     @   s   d dl Z d dlmZ d dlmZ d dlmZmZ ddlm	Z	 e j
dd Ze j
d	d
 Zdd Zdd Zdd Zdd Ze jddd ZdS )    N)Languagemerge_subtokens)DocSpan   )clean_underscorec                 C   s(   g d}g d}g d}t | |||dS )N)Thisisasentence.r	   r
   anotherr   r   Andr   thirdr   )   r      r   r      r      r   r            r   )nsubjROOTsubtokattrpunctr   r   r   r   r   r   r   r   r   )wordsheadsdeps)r   )en_vocabr   r   r    r!   W/home/ubuntu/.local/lib/python3.10/site-packages/spacy/tests/pipeline/test_functions.pydoc
   s   r#   c                 C   sT   g d}g d}g d}g d}g d}t | |||||d}t|ddd	d
g|_|S )N)IlikeNewYorkinAutumnr   )r   r   r   r   r      r   )PRPINNNPr-   r,   r-   r   )PRONVERBPROPNr0   ADPr0   PUNCT)r   prepcompoundpobjr3   r5   r   )r   r   tagsposr   r   r*   GPE)label)r   r   ents)r    r   r   r6   r7   r   r#   r!   r!   r"   doc2   s   r;   c                 C   s&   t | } dd | D g dksJ d S )Nc                 S      g | ]}|j qS r!   text.0tr!   r!   r"   
<listcomp>%       z(test_merge_subtokens.<locals>.<listcomp>)	r	   r
   z
a sentencer   r	   r
   zanother sentencer   zAnd a third .r   )r#   r!   r!   r"   test_merge_subtokens!   s   rD   c                 C   sN   t | dksJ t }|d}||  t | dksJ | d jdks%J d S )N   merge_noun_chunksr   r   New York)lenr   create_piper>   )r;   nlprF   r!   r!   r"    test_factories_merge_noun_chunks)   s   
rK   c                 C   sz   t | dksJ t t| jdksJ t }|d}||  t | dks'J t t| jdks2J | d jdks;J d S )NrE   r   merge_entitiesr   r   rG   )rH   listr:   r   rI   r>   )r;   rJ   rL   r!   r!   r"   test_factories_merge_ents2   s   
rN   c                     s   t  } ddd}| jd|d | d}dd |D g d	ks J | d
}dd |D g dks1J t fdd|D s>J d S )N      )
min_lengthsplit_lengthtoken_splitterconfigzaaaaabbbbbcccccdddd e f gc                 S   r<   r!   r=   r?   r!   r!   r"   rB   B   rC   z'test_token_splitter.<locals>.<listcomp>)aaaaabbbbbcccccddddefgz!aaaaabbbbbcccccdddddeeeeeff g h ic                 S   r<   r!   r=   r?   r!   r!   r"   rB   D   rC   )	aaaaabbbbbcccccdddddeeeeeffrY   hic                 3   s     | ]}t |j jkV  qd S )N)rH   r>   rR   r?   rS   r!   r"   	<genexpr>O   s    z&test_token_splitter.<locals>.<genexpr>)r   add_pipeall)rJ   rU   r#   r!   rb   r"   test_token_splitter=   s   
rf   r   c                  C   s   t  } | d | d}g d|_| |}|jd u sJ t  } | jdddid tt | d}W d    n1 s=w   Y  tjddd	 t  } | jdd
ddiid | d}d|j	_
| |}|j	j
dkslJ d S )Ndoc_cleanerr>   )r   r   r   silentFrT   	test_attr)defaultattrsz_.test_attrr   d   )r   rd   make_doctensorpytestwarnsUserWarningr   set_extension_ri   )rJ   r#   r!   r!   r"   test_factories_doc_cleanerR   s$   




ru   )rp   spacy.languager   spacy.pipeline.functionsr   spacy.tokensr   r   doc.test_underscorer   fixturer#   r;   rD   rK   rN   rf   markusefixturesru   r!   r!   r!   r"   <module>   s    


	
