o
    i,                     @   s2  d dl Z d dlZd dlmZmZmZmZmZmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ ejdd Zd	d
 Zdd Zejddgdd Zejddgdd Zdd Zdd Zdd Zdd Zdd Zdd Zd d! Zd"d# Z d$d% Z!d&d' Z"d(d) Z#d*d+ Z$d,d- Z%d.d/ Z&d0d1 Z'd2d3 Z(dS )4    N)IS_ALPHAIS_DIGITIS_LOWERIS_PUNCTIS_STOPIS_TITLE)VERBDoc)Example)Vocabc                 C   s(   g d}g d}g d}t | |||dS )N)Thisisasentence.r   r   anotherr   r   Andr   thirdr   )   r      r   r      r      r   r   
      r   r   )nsubjROOTdetattrpunctr   r   r   r   r   r   r   npadvmodr   wordsheadsdepsr	   )en_vocabr"   r#   r$    r&   R/home/ubuntu/.local/lib/python3.10/site-packages/spacy/tests/doc/test_token_api.pydoc   s   r(   c                 C   s   g d}g d}g d}g d}t | ||||d}|d jdks"J |d jdks+J |d jdks4J |d jd	ks=J |d jd
ksFJ |d jdksOJ |d jdksXJ |d jdksaJ |d j	dksjJ d S )N)Giveitback!Hepleadedr   )r   PRONPARTPUNCTr/   r   r1   )r   r   r   r      r2   r2   )r   dobjprtr   r   r   r   )r"   posr#   r$   r   r)   zGive giveXxxxGiver   r   )
r
   orth_texttext_with_wslower_shape_prefix_suffix_pos_dep_)r%   r"   r5   r#   r$   r(   r&   r&   r'   test_doc_token_api_strings   s   rC   c                 C   s   d}| |}|d  tsJ |d  trJ |d  ts!J |d  ts*J |d  ts3J |d  ts<J |d  trEJ d S )NGive it back! He pleaded.r   r   r      r2   )
check_flagr   r   r   r   r   r   en_tokenizerr;   tokensr&   r&   r'   test_doc_token_api_flags'   s   rJ   r;   rD   c                 C   s6   |  d }d| j| _| |}|d jdksJ d S )Nr   )splitvocabprob)rH   r;   wordrI   r&   r&   r'   ,test_doc_token_api_prob_inherited_from_vocab4   s   rP   zone twoc                 C   sH   | |}t |d |dd ksJ t |d |dd ks"J d S )Nr    r   )strrL   rG   r&   r&   r'   test_doc_token_api_str_builtin<   s   "rS   c                 C   s   t | g dd}|d jsJ |d jsJ |d jrJ |d js$J |d js+J |d jr2J |d js9J |d js@J d S )N)Hi,myemailr   ztest@me.comr"   r   r   r      r2   )	r
   is_titleis_alphais_digitis_punctis_asciilike_urlis_lower
like_email)r%   r(   r&   r&   r'    test_doc_token_api_is_propertiesC   s   rb   c                  C   s   t  } | jdd | jdtjddgddd | jd	tjdd
gddd t| g dd}|js2J |d js9J |d js@J |d jrGJ d}d
}d||  }|d |d |ks^J d S )NrE   )widthapplesg        g       @f)dtype)vectororangesg      ?)rd   rh   oovrX   r   r   )r   reset_vectors
set_vectornumpyasarrayr
   
has_vector
similarity)rM   r(   apples_normoranges_normcosiner&   r&   r'   test_doc_token_api_vectorsO   s   
rs   c                 C   s   g d}g d}dgt | }t| |||d}dd |d jD dd	gks'J d
d |d jD d	gks6J dd |d jD g ksDJ |d |d sOJ |d |d rZJ d S )N		YesterdayIsawr   dogthatbarkedloudlyr   	rE   rE   rE   rY   rE   r   rY   r   rE   depr!   c                 S      g | ]}|j qS r&   r;   .0tr&   r&   r'   
<listcomp>e       z0test_doc_token_api_ancestors.<locals>.<listcomp>r   rx   rw   c                 S   r~   r&   r   r   r&   r&   r'   r   f   r   r   c                 S   r~   r&   r   r   r&   r&   r'   r   g   r   rE      )lenr
   	ancestorsis_ancestorr%   r"   r#   r$   r(   r&   r&   r'   test_doc_token_api_ancestors_   s    r   c                 C   s  g d}g d}dgt | }t| |||d}|d jdks J |d jdks)J |d jjdks3J |d jjdks=J |d	 jdksFJ |d	 jdksOJ |d	 jjd
ksYJ |d	 jjdkscJ |d
 jdkslJ |d
 jdksuJ |d
 jjd
ksJ |d
 jjd
ksJ |d jjdksJ |d jjdksJ |d
 |d _|d jdksJ |d jdksJ |d jjdksJ |d jjdksJ |d
 jdksJ |d
 jdksJ |d
 jjd
ksJ |d
 jjdksJ |d	 jdksJ |d	 jdksJ |d	 jjd
ksJ |d	 jjdksJ |d jjdks$J |d jjdks/J |d |d _|d jjdksAJ |d jjdksLJ |d
 jjdksWJ |d	 jjdksbJ |d jjdksmJ t| ||dgt | d}t	t
 |d |d _W d    n	1 sw   Y  g d}g d}t| ||dgt | d}|d jsJ |d jsJ |d j|d ksJ |d j|d	 ksJ |d j|d ksJ |d j|d ksJ |d
 |d _|d jsJ |d jsJ |d j|d ksJ |d j|d	 ksJ |d j|d ks'J |d j|d ks3J |d |d _|d jsBJ |d jrJJ |d j|d ksVJ |d j|d ksbJ d S )Nrt   r|   r}   r!   r   r   r2   r   rY   r   r   rE   r   )
r   r   oner   r   r   r   r   r   r   )
r   r   r   r   r   r2   r2   r2   r2   r2   	   )r   r
   n_leftsn_rights	left_edgei
right_edgeheadpytestraises
ValueErroris_sent_start)r%   r"   r#   r$   r(   doc2r&   r&   r'   test_doc_token_api_head_setterl   s   r   c                 C   sP   | d}|d j d u sJ d|d _ |d j du sJ tt|jdks&J d S )N$This is a sentence. This is another.r2   TrE   )r   r   listsentsrH   r(   r&   r&   r'   test_is_sent_start   
   
r   c                 C   sP   | d}|d j d u sJ d|d _|d j du sJ tt|jdks&J d S )Nr   rY   Tr2   rE   )is_sent_endr   r   r   r   r   r&   r&   r'   test_is_sent_end   r   r   c                  C   sN   t t ddgd} d| d _| d jdksJ t| d _| d jdks%J d S )NhelloworldrX   NOUNr   r   r   )r
   r   rA   r   r5   r(   r&   r&   r'   test_set_pos   s
   

r   c                  C   sL   t t ddgd} tt d| d _W d    d S 1 sw   Y  d S )Nr   r   rX   blahr   )r
   r   r   r   r   rA   r   r&   r&   r'   test_set_invalid_pos   s   "r   c                 C   sr   t t| jdksJ | d jjdksJ | d jjdksJ | d jjjjdks+J | d jjjjdks7J dS )zTest token.sent propertyr   r   zThis is a sentence .r   zThis is another sentence .r   N)r   r   r   sentr;   rootr   r   r&   r&   r'   test_tokens_sent   s
   r   c                  C   sH   t t ddgd} | d jdu sJ | d jd u sJ | dr"J d S )Nr   r   rX   r   Tr   
SENT_START)r
   r   r   has_annotationr   r&   r&   r'   test_token0_has_sent_start_true      r   c                  C   sH   t t ddgd} | d jd u sJ | d jdu sJ | dr"J d S )Nr   r   rX   r   r   Tr   )r
   r   r   r   r   r&   r&   r'    test_tokenlast_has_sent_end_true   r   r   c                 C   s   g d}g d}g d}t | |||d}dd |d jD dd	gks$J d
d |d jD dd	gks4J dd |d jD ddgksDJ d S )N)
Theboyandthegirlr   r   manwentr   )
r   r   r   rY   r   rY   r   rY   r   r   )
r   r   ccr   conjr   r   r   r   r   r!   c                 S   r~   r&   r   r   wr&   r&   r'   r      r   z2test_token_api_conjuncts_chain.<locals>.<listcomp>r   r   r   c                 S   r~   r&   r   r   r&   r&   r'   r      r   rY   r   c                 S   r~   r&   r   r   r&   r&   r'   r      r   r   r
   	conjunctsr   r&   r&   r'   test_token_api_conjuncts_chain   s     $r   c                 C   sh   g d}g d}g d}t | |||d}dd |d jD dgks#J d	d |d
 jD dgks2J d S )N)Theycamer   r   r   )r   r   r   r   r   )r   r   r   r   r}   r!   c                 S   r~   r&   r   r   r&   r&   r'   r      r   z3test_token_api_conjuncts_simple.<locals>.<listcomp>r   r   c                 S   r~   r&   r   r   r&   r&   r'   r      r   r   r   r   r   r&   r&   r'   test_token_api_conjuncts_simple   s   "r   c                 C   sd   g d}g d}g d}t | |||d}dd |d jD g ks"J dd |d	 jD g ks0J d S )
N)r   r   r   )r   r   r   )r   r   r   r!   c                 S   r~   r&   r   r   r&   r&   r'   r     r   z0test_token_api_non_conjuncts.<locals>.<listcomp>r   c                 S   r~   r&   r   r   r&   r&   r'   r     r   r   r   r   r&   r&   r'   test_token_api_non_conjuncts   s    r   c                 C   s  g d}g d}g d}t | |||d}dd |D }dd |D }dd |D }d	d |D }d
d |D }	|g dks?J |g dksGJ |dd g dksSJ |dd g dks_J |	g dksgJ t|||d}
dd |
jD }dd |
jD }dd |
jD }dd |
jD }dd |
jD }||ksJ ||ksJ ||ksJ ||ksJ ||	ksJ |
jdd\}}|d |d ksJ |d |d ksJ |d |d ksJ |d |d ksJ dS )zWCheck that the Doc constructor and Example.from_dict parse missing information the same)r   r   r   r   rE   N) r   r3   r   r   N)rv   likeLondonr   Berlinr   r!   c                 S      g | ]}|  qS r&   has_headr   r&   r&   r'   r         z)test_missing_head_dep.<locals>.<listcomp>c                 S   r   r&   has_depr   r&   r&   r'   r     r   c                 S      g | ]}|j jqS r&   r   r   r   r&   r&   r'   r     r   c                 S   r~   r&   rB   r   r&   r&   r'   r     r   c                 S   r~   r&   r   r   r&   r&   r'   r     r   )FTTTTFr   r2   )r   r   r   rE   )r   r3   r   r   )TFFFFF)r#   r$   c                 S   r   r&   r   r   r&   r&   r'   r     r   c                 S   r   r&   r   r   r&   r&   r'   r     r   c                 S   r   r&   r   r   r&   r&   r'   r     r   c                 S   r~   r&   r   r   r&   r&   r'   r     r   c                 S   r~   r&   r   r   r&   r&   r'   r     r   T)projectivizer   N)r
   r   	from_dict	referenceget_aligned_parse)r%   r#   r$   r"   r(   pred_has_headspred_has_deps
pred_heads	pred_depspred_sent_startsexampleref_has_headsref_has_deps	ref_headsref_depsref_sent_startsaligned_headsaligned_depsr&   r&   r'   test_missing_head_dep  s<   r   c                 C   sh   | d}| d}|d |dd krJ |d |dd kr J |d |d kr*J |d |kr2J d S )Nza bzb cr   r   rE   r&   )rH   doc1r   r&   r&   r'   test_token_api_richcmp_other+  s   r   ))rl   r   spacy.attrsr   r   r   r   r   r   spacy.symbolsr   spacy.tokensr
   spacy.trainingr   spacy.vocabr   fixturer(   rC   rJ   markparametrizerP   rS   rb   rs   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r&   r&   r&   r'   <module>   s>     



K	
		#