o
    i[                     @   s  d dl Z d dlZd dlZd dlZd dlmZ d dlmZmZ d dl	m
Z
mZmZmZmZmZmZmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZmZmZ d d
l m!Z! ddl"m#Z# dd Z$ej%&ddd Z'ej%&ddd Z(ej%&ddd Z)ej%&ddd Z*ej%+dg dej%+deegej%&ddd  Z,ej%+d!g d"ej%&d#d$d% Z-ej%&d&d'd( Z.ej%&d&d)d* Z/e0d+G d,d- d-Z1ej%&d.d/d0 Z2ej%&d1d2d3 Z3ej%+dg d4gd5d6 Z4d7d8 Z5ej%+dd9d:gd;d< Z6d=d> Z7d?d@ Z8dAdB Z9dCdD Z:dEdF Z;dGdH Z<ej%+dIg dJg dKe=g dLg dMg dNg dOgfg dPg dQe=g dRg dSg dTg dUg dVg dWg dXg dYg dZg	fgd[d\ Z>d]d^ Z?d_d` Z@dadb ZAej%Bdcddde ZCdfdg ZDdhdi ZEdjdk ZFdldm ZGdndo ZHdpdq ZIdrds ZJdtdu ZKdvdw ZLdxdy ZMdzd{ ZNd|d} ZOd~d ZPdd ZQdd ZRdS )    N)assert_array_equal)NumpyOpsget_current_ops)	DEPENT_IOBENT_TYPEHEADIS_ALPHAMORPHPOS
SENT_STARTTAG)English)MultiLanguage)Language)Lexeme)DocSpan	SpanGroupToken)Vocab   )clean_underscorec                 C   s   g d}g d}t | |g dd}dd |D g dksJ t | ||dgd d	}d
d |D g dks6J t | |dgd |dgd d}dd |D g dksRJ d S )N)abcd)r   r      r   )TFTF)wordssent_startsc                 S      g | ]}|j qS  is_sent_start.0tr!   r!   P/home/ubuntu/.local/lib/python3.10/site-packages/spacy/tests/doc/test_doc_api.py
<listcomp>#       z%test_doc_api_init.<locals>.<listcomp>dep   r   headsdepsc                 S   r    r!   r"   r$   r!   r!   r'   r(   '   r)   T)r   r   r-   r.   c                 S   r    r!   r"   r$   r!   r!   r'   r(   ,   r)   r   )en_vocabr   r-   docr!   r!   r'   test_doc_api_init   s   r2   i  c                  C   s   g d} t t | d}t|dd|jjd dg|_| }||dd  W d	   n1 s1w   Y  d
d |jD s@J d	S )z9Test that entity labels still match after merging tokens.)

worda.r3   wordb-	Biosphere2r7   z 
r         PRODUCTlabel      Nc                 S   r    r!   text)r%   entr!   r!   r'   r(   7   r)   z"test_issue1547.<locals>.<listcomp>)r   r   r   vocabstringsents
retokenizemerge)r   r1   retokenizerr!   r!   r'   test_issue1547/   s   
rK   i  c                  C   s   t t g dd} | d dk rJ | d dusJ | d dks!J | dd dk r+J | dd dus5J | dd dks?J | jd dusHJ | jd dk rQJ dS )z4Test comparison against None doesn't cause segfault.r   r   r   r:   r   Nr   r   )r   r   rE   )r1   r!   r!   r'   test_issue1757:   s   rM   i\	  c                 C   s   g d}g d}dgt | }tjg dg dg dg dg dg d	gtjd
}t| |||d}|d d  }| |k sAJ | |k sKJ d S )N)Shecreatedr   testforspacy)r   r      r   rS   r+   r*   )r   r   r   r   r   r   )r   r   r   r   r   r   )r   r   r   rS   rS   rS   )r   r   rS   rS   rS   rS   )r   r   rS   rS   r+   r+   )r   r   rS   rS   r+   r@   dtyper,   )lennumpyarrayint32r   get_lca_matrixall)r0   r   r-   r.   matrixr1   spanr!   r!   r'   test_issue2396H   s"   r^   i,  c                 C   sL   ddg}ddg}ddg}t | |||d |ddgksJ |ddgks$J d S )NAsentencezB-TYPE1 TF)rE   r   rG   r   r/   )r0   r   rG   r   r!   r!   r'   test_init_args_unmodified^   s   rb   rC   )z-0.23z+123,456u   ±1lang_clsi
  c                 C   s0   | }|| }t |dksJ |d jsJ dS )z2Check that like_num handles + and - before number.r   r   N)rV   like_num)rC   rc   nlpr1   r!   r!   r'   test_issue2782m   s   rf   r`   )zThe story was to the effect that a young American student recently called on Professor Christlieb with a letter of introduction.zThe next month Barry Siddall joined Stoke City on a free transfer, after Chris Pearce had established himself as the Vale's #1.zThe next month Barry Siddall joined Stoke City on a free transfer, after Chris Pearce had established himself as the Vale's number onezIndeed, making the one who remains do all the work has installed him into a position of such insolent tyranny, it will take a month at least to reduce him to his proper proportions.zLIt was a missed assignment, but it shouldn't have resulted in a turnover ...i  c                 C   sD   t  }|| }d}|D ]}||j7 }q||tddks J dS )z8Test that the Doc's count_by function works consistentlyr   r   N)r   is_alphacount_byr	   get)r`   re   r1   counttokenr!   r!   r'   test_issue3869x   s   rl   iz  c                 C   s  g d}g d}g d}t | |||d}|dd }| }| }|s&J |d jjdks0J |d jd	ks9J |d jjdksCJ |d jd
ksLJ |d jjdksVJ |d jdks_J |d jjdksiJ |d jd	ksrJ tt|jdks}J |dd }| }	|	 }
|
sJ |	d jjdksJ |	d jdksJ |	d jjdksJ |	d jdksJ |	d jjdksJ |	d jd	ksJ tt|	jdksJ dS )Ensure that as_doc does not result in out-of-bound access of tokens.
    This is achieved by setting the head to itself if it would lie out of the span otherwise.
    )Hejestsatscars,thatneverfeltr   woundr5   )r   rA   r   r   rA   rA   rA   rA   	   rA   rA   )nsubjccomppreppobjpunctrx   negROOTdetdobjr|   r,   r   r@   r   ro   r*   rz   r   rp   r{   rS   r;   rw   ru   r}   r~   N)	r   as_docto_jsonheadrC   dep_rV   listsents)r0   r   r-   r.   r1   span2doc2	doc2_jsonspan3doc3	doc3_jsonr!   r!   r'   test_issue3962   s8   r   c           	      C   sr  g d}g d}g d}t | |||d}|dd }| }| }|s&J |d jjdks0J |d jd	ks9J |d jjdksCJ |d jd
ksLJ |d jjdksVJ |d jdks_J |d jjdksiJ |d jdksrJ |d jjdks|J |d jdksJ |d jjdksJ |d jdksJ t|j}t|dksJ |d jdksJ |d jdksJ dS )rm   )rn   ro   rp   rq   r5   Theyrt   ru   r   rv   r5   )r   r   r   r   r   rA   rA   rA   rw   rA   rA   )rx   r~   rz   r{   r|   rx   r}   r~   r   r   r|   r,   r   rA   r   ro   r~   rz   r   rp   r{   rS   r|   r+   r   r*   zjests at scars .z
They neverN)	r   r   r   r   rC   r   r   r   rV   )	r0   r   r-   r.   two_sent_docr   r   r   r   r!   r!   r'   test_issue3962_long   s0   
r   my_pipec                   @   s*   e Zd Zd	ddZdd Zedd ZdS )

CustomPiper   c                 C   s(   || _ tjd| jd tjdd d d S )Nmy_ext)getterdefault)namer   set_extension_get_my_extr   )selfre   r   r!   r!   r'   __init__   s   zCustomPipe.__init__c                 C   sJ   g }|j D ]}| |}|jd| || q|jdd| |S )Nr   r3   )r   r   _setappendjoin)r   r1   gathered_extsentsent_extr!   r!   r'   __call__   s   

zCustomPipe.__call__c                 C   s
   t | jS N)strendr]   r!   r!   r'   r      s   
zCustomPipe._get_my_extN)r   )__name__
__module____qualname__r   r   staticmethodr   r!   r!   r!   r'   r      s
    

r   i'  c                  C   s   t  } | d | jddd g d}tt tr=t| j|dd}|d jdks+J |d	 jd
ks4J |d jdks?J dS dS )zQEnsure that this runs correctly and doesn't hang or crash on Windows /
    macOS.sentencizerr   )after)I like bananas.Do you like them?No, I prefer wasabi.r   )	n_processr   r   r   r   r   N)r   add_pipe
isinstancer   r   r   piperC   )re   rC   docsr!   r!   r'   test_issue4903   s   
r   i  c                    s   g d}g d}g d}g d}g d}| j  |D ]} | q fdd|D } fdd|D } fd	d|D }	tttg}
tjtt|||	d
d}t	| ||d}|
|
| dd |D }t	| ||||d}dd |D }||ksxJ d S )N)Thisisr   r`   )DETVERBr   NOUN) r   r   ra   )r*   adjnnatm)DTVBZr   NNc                       g | ]}  |qS r!   add)r%   r   rF   r!   r'   r(   	      z"test_issue5048.<locals>.<listcomp>c                    r   r!   r   )r%   pr   r!   r'   r(   
  r   c                    r   r!   r   r$   r   r!   r'   r(     r   uint64rT   )r   spacesc                 S      g | ]
}|j |j|jfqS r!   rC   pos_tag_r%   rk   r!   r!   r'   r(         )r   posr.   tagsc                 S   r   r!   r   r   r!   r!   r'   r(     r   )rF   r   r   r   r   rW   rX   r   zipr   
from_array)r0   r   pos_sr   deps_stags_swr.   r   r   attrsrX   r1   v1r   v2r!   r   r'   test_issue5048   s&   
r   )onetwothreec                 C   s~   t | |d}|d }|d }|d }|\}}}||  k r"|k s%J  J ||kr+J ||ks1J ||ks7J ||ks=J d S )Nr:   r/   )r0   rC   r1   token3token2token1r!   r!   r'   'test_doc_api_compare_by_string_position  s   
r   c                 C   s  d}| |}|d j dksJ |d j dksJ tt |t|  W d    n1 s.w   Y  dd }|dd }||rCJ |dd	 }||d
ksQJ |dd	d }||d
ks`J tt |dd	d  W d    n1 sww   Y  tt |dd	d  W d    n1 sw   Y  |dd }||dksJ |d	d }||dksJ |dd }||dksJ |dd	 }|j|j  krdkrn J ||rJ |d	d }|j|j  krd	krn J ||rJ |d d  }||dksJ |d	d  }||dksJ |d d	 }||dks%J |d d }||dks4J |dd  }||dksCJ |d	d }||dksRJ |dd	 }||dksaJ |dd }|j|j  krudkr}n J ||rJ |dd }|j|j  krdkrn J ||rJ |dd	 }|d jdksJ |d d  }||d
ksJ |d d }||dksJ |dd  }||dksJ |d d }||dksJ |dd  }||dksJ |dd }||dksJ |dd }||dksJ |dd }||d
ks%J |dd }|j|j  kr9d	krAn J ||rCJ d S )NGive it back! He pleaded.r   Giver   r5   c                 S   s   d dd | D S )N/c                 s   s    | ]}|j V  qd S r   rB   r   r!   r!   r'   	<genexpr>.  s    z7test_doc_api_getitem.<locals>.to_str.<locals>.<genexpr>)r   r   r!   r!   r'   to_str-  s   z$test_doc_api_getitem.<locals>.to_strr   r+   z	it/back/!r   r;   z
He/pleadedzback/!r@   zGive/it/back/!/He/pleaded/.zHe/pleaded/.zGive/it/back/!2   ii(   rA   itzit/backr   back)	rC   pytestraises
IndexErrorrV   
ValueErrorstartr   orth_)en_tokenizerrC   tokensr   r]   subspanr!   r!   r'   test_doc_api_getitem%  s   **004r   r   z Give it back! He pleaded. c                 C   s&  | |}d|d _ d|d _|jjd ddfg|_d|d _d|d _t|j|	 }|j
|j
ks5J dd	 |D dd	 |D ksEJ d
d	 |D d
d	 |D ksUJ |d j dks^J |d jdksgJ |d jdkspJ |d jdksyJ t|jj|j	dgddgd}|j
|j
ksJ dd	 |D dd	 |D ksJ dd	 |D dd	 |D ksJ t|jj|j	dgddgd}|j
|j
ksJ dd	 |D dd	 |D ksJ dd	 |D dd	 |D ksJ dd }|	 }tt ||jd< |	 }W d    d S 1 sw   Y  d S )Nlemmar   normr=   r   	ent_kb_ident_idc                 S   r    r!   rB   r$   r!   r!   r'   r(   |  r)   z*test_doc_api_serialize.<locals>.<listcomp>c                 S   r    r!   orthr$   r!   r!   r'   r(   }  r)   tensorexcludec                 S   r    r!   rB   r$   r!   r!   r'   r(     r)   c                 S   r    r!   r   r$   r!   r!   r'   r(     r)   	sentimentc                 S   r    r!   rB   r$   r!   r!   r'   r(     r)   c                 S   r    r!   r   r$   r!   r!   r'   r(     r)   c                 S   s   dS )Nzhello!r!   )d1d2r!   r!   r'   
inner_func  s   z*test_doc_api_serialize.<locals>.inner_func
similarity)lemma_norm_rE   rF   rG   
ent_kb_id_ent_id_r   
from_bytesto_bytesrC   r   warnsUserWarning
user_hooks)r   rC   r   
new_tokensr  r   r!   r!   r'   test_doc_api_serializep  s@   



  
  
  

$r  c                 C   s   d}| |}t |jdksJ |jjd ddfg|_t t|jdks%J dd |D g d	ks2J |jd jdks<J |jd jdksFJ |jd jdksPJ d S )
Nz#I use goggle chrone to surf the webr   r=   r   r+   r   c                 S   r    r!   ent_iobr$   r!   r!   r'   r(     r)   z)test_doc_api_set_ents.<locals>.<listcomp>)r   r   rS   r   r   r   r   r   )rV   rG   rE   rF   r   label_r   r   )r   rC   r   r!   r!   r'   test_doc_api_set_ents  s   r  c                 C   s&   | d}t |j}t|dksJ d S )Nra   r   )r   r   rV   )r   r1   r   r!   r!   r'   test_doc_api_sents_empty_string  s   
r  c           	      C   s   d}g d}| |}t |jdd |D |d}g }|jD ]-}t|dkr>|d jdvr>|dd  }t|dkr>|d jdvs+t|dkrI|| q|  }|D ]}|jj|j	|jj
d	}|j||d
 qQW d    d S 1 sqw   Y  d S )Nu   67% of black households are single parent 

72% of all black babies born out of wedlock 

50% of all black kids don’t finish high school)nummodrx   rz   amodr{   r~   r  attrra   r  apposrz   r   r  r{   aclrz   rz   r{   ra   r  rx   rz   r   r  r{   auxr}   ry   r  r   c                 S   r    r!   rB   r$   r!   r!   r'   r(     r)   z.test_doc_api_runtime_error.<locals>.<listcomp>)r   r.   r   r   )advmodr  compound)tagr   ent_typer   )r   rE   noun_chunksrV   r   r   rH   rootr   rC   	ent_type_rI   )	r   rC   r.   r   r1   npsnprJ   r   r!   r!   r'   test_doc_api_runtime_error  s*   


"r)  c                 C   s|   g d}g d}dgt | }t| |||d}|d jdks J dd |d jD }|g d	ks2J |d jjd
ks<J dS )zHTest for bug occurring from Unshift action, causing incorrect right edge)Ihaveproposedtomyselfrr   rQ   thesakeofsuchasliveunderr/  
governmentr1  r/  Romansrr   r-  	translatethosebooksintor/  Greektonguer5   )r   r   r   r   rS   r      r<   r;   r<      r<   r?           rA        r@  r>  r      r>  r>     rF     r   r*   r,   r;   rQ   c                 S   r    r!   rB   )r%   r   r!   r!   r'   r(     r)   z+test_doc_api_right_edge.<locals>.<listcomp>)rQ   r/  r0  r1  r2  r3  r4  r5  r/  r6  r1  r/  r7  rr   rr   N)rV   r   rC   subtree
right_edge)r0   r   r-   r.   r1   rH  r!   r!   r'   test_doc_api_right_edge  s   rJ  c                  C   sJ   t  } | jdd | jdtjddgddd t| dgd	}|js#J d S )
Nr   )widthkitten        g       @frT   )vectorr:   )r   reset_vectors
set_vectorrW   asarrayr   
has_vector)rE   r1   r!   r!   r'   test_doc_api_has_vector  s
   rT  c                  C   s   t t dgd} | | d dksJ | | jd dksJ t | jg dd}tt | |d d dks;J | |dksDJ W d    d S 1 sOw   Y  d S )Nr   r:   r   g      ?rL   r   rM  )r   r   r  rE   r   r  r  )r1   r   r!   r!   r'   test_doc_api_similarity_match  s   "rU  zwords,heads,lca_matrix)r/  lazydogslept)r   r   rS   rS   )r   r   r   rS   )r   r   r   rS   )r   r   r   rS   )rS   rS   rS   rS   )	TherV  rW  rX  r5   rY  quickfoxjumped)	r   r   rS   rS   rS   rA   rA   r<   r<   )	r   r   r   rS   rS   r   r   r   r   )	r   r   r   rS   rS   r   r   r   r   )	r   r   r   rS   rS   r   r   r   r   )	rS   rS   rS   rS   rS   r   r   r   r   )	rS   rS   rS   rS   r+   r   r   r   r   )	r   r   r   r   r   r@   rA   rA   r<   )	r   r   r   r   r   rA   r;   rA   r<   )	r   r   r   r   r   rA   rA   rA   r<   )	r   r   r   r   r   r<   r<   r<   r<   c                 C   sf   t | ||dgt| d}| }||k sJ |d dks!J |d dks)J |d dks1J d S )Nr*   )r-   r.   )r   r   r   )r   r   r   )r   r   )r   rV   rZ   r[   )r0   r   r-   
lca_matrixr1   lcar!   r!   r'   test_lowest_common_ancestor  s   r_  c                 C   s   g d}t | |d}|drJ t|ddddg|_|ds"J tjddgddgddgd	dgd	d
ggdd}t | |dttg|}|dsKJ t | 	|
 }|ds[J d S )N)r*  r4  inNewYorkr:   r   rS   r@   GPEr>   r   i  r   r   rT   )r   has_annotationr   rG   rW   rX   r   r   r   r  r  )r0   r   r1   arrnew_docr!   r!   r'   test_doc_is_nered  s   ,rg  c                 C   sT  g d}g d}g d}t | |||d}ttg}||}t | |d}||| | }||}t  td ||| W d    n1 sLw   Y  tg}||}t | |d}||| dd |D dd |D ksuJ |	d	r|J tt
g}||}t | |d}||| d
d |D d
d |D ksJ |	d	sJ d S )N)
r*  r4  r`  ra  rb  r5   r*  likecatsr5   )
r   r   r   r   r   r   r;   r;   r;   r;   )
r~   r*   r*   r*   r*   r*   r~   r*   r*   r*   r,   r:   errorc                 S   r    r!   r"   r$   r!   r!   r'   r(   7  r)   z3test_doc_from_array_sent_starts.<locals>.<listcomp>r   c                 S   r    r!   r"   r$   r!   r!   r'   r(   >  r)   )r   r   r   to_arrayr   _get_array_attrswarningscatch_warningssimplefilterrd  r   )r0   r   r-   r.   r1   r   re  rf  r!   r!   r'   test_doc_from_array_sent_starts   s4   




 
 rp  c                 C   s   g d}g d}t | ||d}tg}||}t | |d}||| dd |D |ks.J dd |D dd |D ks>J d S )N)r*  r4  r`  ra  rb  r5   )zFeat1=AzFeat1=BzFeat1=CzFeat1=A|Feat2=DzFeat2=EzFeat3=F)r   morphsr:   c                 S      g | ]}t |jqS r!   r   morphr$   r!   r!   r'   r(   L  r   z-test_doc_from_array_morph.<locals>.<listcomp>c                 S   rr  r!   rs  r$   r!   r!   r'   r(   M  r   )r   r
   rk  r   )r0   r   rq  r1   r   re  rf  r!   r!   r'   test_doc_from_array_morphB  s   
$ru  r   c                    sp  g d}dd |D }d} fdd|D }|d dd g|d j d	< |d
 dd g|d
 j d	< |d dd g|d j d	< t|d dd j|d
 dd j|d dd jg}||}tjddd d|d d
 j_d|d
 d j_tg d u sJ |t|gusJ t	|t	t|gksJ t
t t||g  W d    n1 sw   Y  t|}t|tt|jksJ t|jt|d t|d  ksJ |jddd |D ksJ |t|d d  }	|	jdkrt|	jsJ dd |D }
t|t|
ksJ t|d d |d
 d }|d
 jjdu s+J |d j|ks5J |d jjdu s@J tdd |dd D rPJ d	|j v sXJ |tdd |j d	 D ksiJ t|d jssJ tj|dd}t|tt|jksJ t|jtdd |D ksJ |jd|ksJ |t|d d  }	|	jdkrt|	jrJ dd |D }
t|t|
ksJ t|d d |d
 d }|d j|ksJ d	|j v sJ |tdd |j d	 D ks J t|d js
J tj|g d d!}t|jt|d t|d  ks'J |jdd"d |D ks7J |t|d d  }	|	jdkrMt|	jsOJ d#d |D }
t|t|
ksaJ t|d d |d
 d }|d j|kszJ d	|j v sJ |td$d |j d	 D ksJ tj|d%gd&}d	|j vsJ tj|d'gd&}|ji ksJ t dgd( } fd)d|D }t|}d	|j vsJ |D ]}g |j d	< qt|}d	|j v sJ t|j d	 dksJ t }|D ]}|d*d |D |_qt|}t||j||jd+d |D  tj|d,gd&}|jjd-ks6J d S ).N)Merging the docs is fun.ra   zThey don't think alike. ra   zAnother doc.c                 S   s   g | ]}t |r|qS r!   rV   r$   r!   r!   r'   r(   Y  s    z*test_doc_api_from_docs.<locals>.<listcomp>zWie war die Frage?c                       g | ]} |qS r!   r!   r%   rC   r   r!   r'   r(   [      r   r   r+   groupr   is_ambiguousFr   TrS   r   c                 S      g | ]}|  qS r!   stripr$   r!   r!   r'   r(   p  r{  r5   c                 S      g | ]	}|D ]}|qqS r!   r!   r%   r1   r&   r!   r!   r'   r(   s      thinkrw   c                 S   s   g | ]}|j jqS r!   )r   r}  r$   r!   r!   r'   r(   y  r{  r<   c                 S   r    r!   rB   r%   sr!   r!   r'   r(   {  r)   r?  )ensure_whitespacec                 s   s    | ]}t |V  qd S r   rw  r$   r!   r!   r'   r     s    z)test_doc_api_from_docs.<locals>.<genexpr>ra   c                 S   r  r!   r!   r  r!   r!   r'   r(     r  c                 S   r    r!   rB   r  r!   r!   r'   r(     r)   )r   lengthr   r#  c                 S   r~  r!   r  r$   r!   r!   r'   r(     r{  c                 S   r  r!   r!   r  r!   r!   r'   r(     r  c                 S   r    r!   rB   r  r!   r!   r'   r(     r)   spansr  	user_data
   c                    rx  r!   r!   ry  rz  r!   r'   r(     r{  c                 S   s   g | ]	}t |jd gqS )rM  )rV   rC   r$   r!   r!   r'   r(     r  c                 S   s   g | ]	}t |r|jqS r!   )rV   r  )r%   r1   r!   r!   r'   r(     r  r  )r   ) r  sortedrC   r   r   r   r}  r   	from_docsr   r   r   r   rV   r   r   r   boolwhitespace_indexidxanysumr  r   rR  r  r   to_numpyxpvstackshape)r   de_tokenizeren_textsen_texts_without_emptyde_texten_docsspan_group_textsde_docm_docp_tokenen_docs_tokens	think_idxr1   opsr!   rz  r'   test_doc_api_from_docsP  s   2
& """( "



r  c                    s`   ddg} fdd|D }d|d _ t|d dddd	f|d _ t|}t|j dks.J d S )
Nrv  zThey don't think alike.c                    rx  r!   r!   r$   rz  r!   r'   r(     r{  z/test_doc_api_from_docs_ents.<locals>.<listcomp>r!   r   r   foor>   )rG   r   r   r  rV   )r   textsr   r1   r!   rz  r'   test_doc_api_from_docs_ents  s   

r  c                 C   s   t | ddgd}|jdksJ |j| jd ksJ |d jdks"J |d j| jd ks.J t }|d}|jdks<J |j| jd ksFJ |d jdksOJ |d j| jd ks[J d S )NHelloworldr:   enr   zHello world)r   lang_langrF   r   )r0   r1   re   r!   r!   r'   test_doc_lang  s   r  c                 C   sN   t | ddgdd }t|jtsJ |jj|jksJ | |j |jks%J dS )z%Test that tokens expose their lexeme.r  r  r:   r   N)r   r   lexr   rC   r   )r0   rk   r!   r!   r'   test_token_lexeme  s   r  c                 C   s@  t | ddgd}d}|D ]}||rJ |j|ddrJ qd|d _d	|d _|d d
 d|d _d|d _|d |d _|jt	|ddddgdd |D ]}||sZJ |j|ddrcJ qQd|d _d	|d _|d d d|d _d|d _t	|ddddg|_
|D ]}||sJ |j|ddsJ qd S )Nr  r  r:   )r   r   r
   LEMMAr   r   r   r   Trequire_completer_   r   XFeat=Valr   r*   r   HELLOr>   missingr   ra   r   )r   rd  r   r   	set_morphr	  r   r   set_entsr   rG   r0   r1   r   r  r!   r!   r'   test_has_annotation  s2   







r  c                 C   s   t | g dd}d}|D ]}||rJ |j|ddrJ qd|d _|D ]}||s/J |j|ddr8J q&d|d _|D ]}||sIJ |j|ddsRJ q@d S )	N)r  	beautifulr  r:   )r   IS_SENT_STARTIS_SENT_ENDTr  Fr   r   )r   rd  r#   r  r!   r!   r'   test_has_annotation_sents  s   

r  c                 C   s   | d}t   |j W d    n1 sw   Y  t   |j W d    n1 s-w   Y  t   |j W d    n1 sDw   Y  t   |j W d    d S 1 s\w   Y  d S )NrP   )r   deprecated_call	is_tagged	is_parsedis_neredis_sentencedr   r1   r!   r!   r'   test_is_flags_deprecated  s   



"r  c                 C   s  | d}| t|dddt|dddg dd |D g d	ks"J d
d |D g dks/J | d}| t|dddt|dddg |j t|dddgdd dd |D g dks^J dd |D g dkskJ | d}|j t|dddt|dddg|dd gd dd |D g dksJ dd |D g dksJ | d}|j t|dddt|dddg|dd gdd dd |D g dksJ dd |D g dksJ | d}|j g |dd |dd gdd d d |D g d!ksJ d"d |D g d#ks	J |jt ksJ t|ddd$g|_d%d |D g d&ks)J |j g |dd gdd d'd |D g d(ksDJ | d}|j t|dddg|dd g|dd g|dd gdd) d*d |D g d+ksuJ d,d |D g d-ksJ | d}tt |j g |dd d W d    n	1 sw   Y  tt |j g |dd gd.d/ W d    n	1 sw   Y  tt |j g |dd g|dd gd0 W d    d S 1 sw   Y  d S )1Nz	a b c d er   r   r  rS   r?  c                 S   r    r!   r  r$   r!   r!   r'   r(      r)   z%test_doc_set_ents.<locals>.<listcomp>)rS   rS   r   r   r   c                 S   r    r!   r"  r$   r!   r!   r'   r(   !  r)   )r  r?  r?  r   r   r   r@  
unmodifiedr   c                 S   r    r!   r  r$   r!   r!   r'   r(   '  r)   )rS   r   rS   r   r   c                 S   r    r!   r  r$   r!   r!   r'   r(   (  r)   )r@  r@  r?  r   r   r+   r@   )r  c                 S   r    r!   r  r$   r!   r!   r'   r(   -  r)   )rS   rS   r   r   r   c                 S   r    r!   r  r$   r!   r!   r'   r(   .  r)   r  )outsider   c                 S   r    r!   r  r$   r!   r!   r'   r(   7  r)   )rS   rS   r   r   r   c                 S   r    r!   r  r$   r!   r!   r'   r(   8  r)   )blockedr   c                 S   r    r!   r  r$   r!   r!   r'   r(   =  r)   )r   rS   r   rS   rS   c                 S   r    r!   r  r$   r!   r!   r'   r(   >  r)   )r   r   r   r   r   ENTc                 S   r    r!   r  r$   r!   r!   r'   r(   C  r)   )r   r   r   rS   r   c                 S   r    r!   r  r$   r!   r!   r'   r(   E  r)   )r   r   r   rS   rS   )r  r  r  r   c                 S   r    r!   r  r$   r!   r!   r'   r(   P  r)   )rS   rS   r   r   r   c                 S   r    r!   r  r$   r!   r!   r'   r(   Q  r)   )r  r   r   r   r   none)r  r   )r  r  )r  r   rG   tupler   r   r   r  r!   r!   r'   test_doc_set_ents  sd   ""0$&$r  c                  C   s   g d} t t | d}d|jjdddfg|_dd |jD d	dgks&J t }d|jdddfg}g d
}t || |d}dd |jD d	dgksMJ dS )zaTest that both strings and integers can be used to set entities in
    tuple format via doc.ents.r   r   r   r   er:   )r  r   r   WORLDrS   r@   c                 S   r    r!   r  r%   r  r!   r!   r'   r(   e  r)   z(test_doc_ents_setter.<locals>.<listcomp>r  )zB-HELLOzI-HELLOOzB-WORLDzI-WORLDr   rG   c                 S   r    r!   r  r  r!   r!   r'   r(   j  r)   N)r   r   rE   rF   r   rG   )r   r1   rE   rG   r!   r!   r'   test_doc_ents_setter_  s    r  c                 C   s   | d}| d}|d}|d j |d _ |d j jdksJ |d j jdks(J |d d |d j |d _ |d j |d j ksCJ |d j |d _ |d j |d j ksWJ |d d tt |d j |d _ W d    d S 1 sww   Y  d S )Nza bzc dr   r   r  z
Feat2=Val2)rt  keyr  r   r   r   )r   r  doc1doc1br   r!   r!   r'   test_doc_morph_setterm  s   "r  c                  C   s  g d} dgt |  }tt | |d}|jdksJ g d}tt | |d}t |jdks/J g d}tt | |d}t |jdksDJ g d	}tt | |d}t |jdksYJ g d
}tt | |d}t |jdksnJ g d}tt tt | |d}W d   n1 sw   Y  g d}tt tt | |d}W d   n1 sw   Y  g d}tt tt | |d}W d   n1 sw   Y  g d}tt tt | |d}W d   dS 1 sw   Y  dS )z2Test ents validation/normalization in Doc.__init__r  r  r  r!   )B-PERSONI-PERSONr  r  r  r   )r  r  r  r  I-GPErS   )r  r  r  Nr  )ra   r  r  r  r  )zQ-PERSONr  r  r  r  N)OPERSONr  r  r  r  )r  B-r  r  r  )r   r  r  r  r  )rV   r   r   rG   r   r   r   )r   rG   r1   r!   r!   r'   test_doc_init_iob  s@   "r  c                 C   s   | d}t |ddddt |ddddg}| }|D ]}|| qW d    n1 s-w   Y  tt ||_W d    d S 1 sFw   Y  d S )N/Some text about Colombia and the Czech RepublicrS   r+   rc  r>   r;   r<   )r   rH   rI   r   r   r   rG   )r   r1   r  rJ   r]   r!   r!   r'   test_doc_set_ents_invalid_spans  s    
"r  c                  C   sL   d} t  }|| }tt t|j}W d   dS 1 sw   Y  dS )zNTest that a language without noun_chunk iterator, throws a NotImplementedErroru   Může data vytvářet a spravovat, ale především je dokáže analyzovat, najít v nich nové vztahy a vše přehledně vizualizovat.N)r   r   r   NotImplementedErrorr   r$  )rC   re   r1   r   r!   r!   r'   $test_doc_noun_chunks_not_implemented  s   "r  c                 C   st  | d}t |ddddg|jd< d|jv sJ d|jvsJ t|jd dks)J |jd d jdks5J |jd |dd  t|jd d	ksLJ |jd d jd
ksXJ dd |jd D dd
gkshJ |jd jrpJ t |ddddt |ddddg|_|jd |j t|jd dksJ dd |jd D g dksJ |jd jsJ |jd= d|jvsJ d S )Nr  rS   r+   byer>   hir   r   r   zSome text aboutc                 S   r    r!   rB   r%   r]   r!   r!   r'   r(     r)   z$test_span_groups.<locals>.<listcomp>Colombiarc  r;   r<   c                 S   r    r!   r  r  r!   r!   r'   r(     r)   )r  ra   rc  rc  )	r   r  rV   r  r   rC   has_overlaprG   extendr  r!   r!   r'   test_span_groups  s$    " r  c                 C   s@   | d}t ||jjksJ | }t ||jjksJ d S )Nr  )weakrefrefr  doc_refcopy)r   r  r   r!   r!   r'   test_doc_spans_copy  s   r  c                 C   s   | d}|j d t|j d dksJ |j jd|dd gd t|j d dks-J |j jdt||dd |dd gd	d t|j d dksNJ d S )
Nr  key1r   key2r   r   key3r   )r  )r  
setdefaultrV   r   r  r!   r!   r'   test_doc_spans_setdefault  s   ,r  )Srm  r  rW   r   numpy.testingr   	thinc.apir   r   spacy.attrsr   r   r   r   r	   r
   r   r   r   spacy.lang.enr   spacy.lang.xxr   spacy.languager   spacy.lexemer   spacy.tokensr   r   r   r   spacy.vocabr   test_underscorer   r2   markissuerK   rM   r^   rb   parametrizerf   rl   r   r   factoryr   r   r   r   r   r  r  r  r)  rJ  rT  rU  rX   r_  rg  rp  ru  usefixturesr  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r!   r!   r!   r'   <module>   s    ,















'
$




K
'
 
	"

l	C.
	