o
    i\5                     @   s  d dl Z d dlZd dlZd dlmZ d dlmZmZmZ ddlm	Z	 e 
 dd Ze 
 dd	 Ze 
 d
d Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) Zd*d+ Zd,d- Zd.d/ Zd0d1 Z d2d3 Z!dS )4    N)schemas)DocSpanToken   )clean_underscorec           	      C   sZ   g d}g d}g d}g d}g d}g d}g d}g d}t | ||||||||d		S )
Ncde)TTTVERBNOUNr   VBPNNr   )r   r   r   )ROOTdobjr   OzB-ORGr   Feat1=AFeat1=BFeat1=A|Feat2=D)wordsspacespostagsheadsdepsentsmorphsr   )	en_vocabr   r   r   r   r   r   r    r!    r$   \/home/ubuntu/.local/lib/python3.10/site-packages/spacy/tests/doc/test_json_doc_conversion.pydoc   s&   r&   c              	   C   sB   g d}g d}g d}g d}g d}t | |||||g ddS )Nr   r   r   r   r   )TFT)r   r   r   r    r!   sent_startsr"   )r#   r   r   r   r    r!   r$   r$   r%   doc_without_deps#   s   r(   c                   C   s\   dddddgdddgddd	d
dddddd	dddddddddddddddd	dgdS )Nc d e       ORG)startendlabelr      )r-   r.   r   r   r   r   r   )idr-   r.   tagr   morphdepheadr   r   r   r      r   )textr    sentstokensr$   r$   r$   r$   r%   doc_json6   s@   
r:   c                 C   s  |   }|d dksJ t|d dksJ |d d d dks"J |d d d d	ks.J |d d d
 dks:J t|d dksDJ |d d d dksPJ |d d d dks\J |d d d dkshJ tttj|dksuJ tt||ksJ d S )Nr7   r)   r9   r+   r   r   r   r2   r   r4   r   r    r   r-   r*   r.   r/   r,   )to_jsonlenr   validateDocJSONSchemasrsly
json_loads
json_dumpsr&   json_docr$   r$   r%   test_doc_to_json_   s   rD   c                 C   s   t jddd t jddd d| j_g d| j_| jddgd}d|v s&J |d d dks0J |d d g dks<J tttj	|d	ksIJ t
t
||ksUJ d S )
N
json_test1Fdefault
json_test2hello worldr   r*   r+   
underscore_r   )r   set_extensionrM   rE   rH   r;   r<   r   r=   r>   r?   r@   rA   rB   r$   r$   r%   test_doc_to_json_underscoren   s   rO   c                 C   s  t jddd t jddd tjddd tjddd d| j_g d| j_d	| d
d j_d| d
d j_d| d
 j_d| d j_| d
d g| j	d< | j
g dd}d|v sZJ |d d dksdJ |d d g dkspJ d|v svJ d|v s|J |d d d
 d dksJ |d d d d dksJ |d d d
 d d	ksJ |d d d d dksJ tttj|d
ksJ tt||ksJ d S )NrE   FrF   rH   
token_test	span_testrI   rJ   span_attributer   r   span_attribute_2r*   u   v   
span_grouprE   rH   rP   rQ   rK   rM   underscore_tokenunderscore_spanvalue)r   rN   r   r   rM   rE   rH   rQ   rP   spansr;   r<   r   r=   r>   r?   r@   rA   rB   r$   r$   r%   +test_doc_to_json_with_token_span_attributes|   s2   r\   c                 C   s  t jddd tjddd tjddd d| j_d| dd	 j_d
| d j_| jg dd}d| j	d< d| j	d< d|v s?J |d d dksIJ d|v sOJ d|v sUJ |d d d d d
kscJ |d d d d dksqJ t
ttj|dks~J tt||ksJ d S )N	json_testFrF   rP   rQ   rI   rR   r   r   rT   )r]   rP   rQ   rK   
   user_data_test)user_data_test2TrM   rX   rY   rZ   )r   rN   r   r   rM   r]   rQ   rP   r;   	user_datar<   r   r=   r>   r?   r@   rA   rB   r$   r$   r%   &test_doc_to_json_with_custom_user_data   s"   

rb   c                 C   s  t jddd tjddd tjddd d| j_d| dd j_d| d j_| jdgd	}d
|v s4J |d
 d dks>J d|v sDJ d|v sJJ |d d d d dksXJ |d d d d dksfJ tt	tj
|dkssJ tt||ksJ d S )Nmy_extFrF   rI   rR   r   r   rT   rK   rM   rX   rY   rZ   )r   rN   r   r   rM   rc   r;   r<   r   r=   r>   r?   r@   rA   rB   r$   r$   r%   0test_doc_to_json_with_token_span_same_identifier   s   rd   c                 C   s   t jddd tjddd d| dd j_d| d j_| jdgd	}d
|v s)J |d
 d d d dks7J d|vs=J tt	tj
|dksJJ d S )NrP   FrF   rQ   rR   r   r   rT   rK   rY   rZ   rX   )r   rN   r   rM   rQ   rP   r;   r<   r   r=   r>   rB   r$   r$   r%   .test_doc_to_json_with_token_attributes_missing   s   re   c                 C   s>   t t | jdgd W d   dS 1 sw   Y  dS )z`Test that Doc.to_json() raises an error if a custom attribute doesn't
    exist in the ._ space.
json_test3rK   N)pytestraises
ValueErrorr;   r&   r$   r$   r%   &test_doc_to_json_underscore_error_attr   s   "rk   c                 C   sP   t jddd d tt | jdgd W d   dS 1 s!w   Y  dS )z`Test that Doc.to_json() raises an error if a custom attribute value
    isn't JSON-serializable.
json_test4c                 S   s   | j S Nr7   rj   r$   r$   r%   <lambda>   s    z=test_doc_to_json_underscore_error_serialize.<locals>.<lambda>)methodrK   N)r   rN   rg   rh   ri   r;   rj   r$   r$   r%   +test_doc_to_json_underscore_error_serialize   s   "rq   c                 C   s   t | dddt | dddg| jd< |  }d|v sJ t|d dks%J t|d d dks1J |d d d d dks?J tttj|dksLJ dS )z&Test that Doc.to_json() includes spansr   r*   testr   r[   r-   N)r   r[   r;   r<   r   r=   r>   rB   r$   r$   r%   test_doc_to_json_span   s   "rs   c                 C   sR  |   }tt|}t| jj|dd}|j| j  kr#dks&J  J t|t|   kr5dks8J  J |d j	| d j	ksDJ |d j
| d j
ksPJ |d j| d jks\J |d jj| d jjksjJ |d j| d jksvJ t|jdksJ |jd jdksJ |jd jdksJ |jd jdksJ |  | ksJ d S )	NTr=   r)   r+   r   r   r*   r,   )r;   r?   r@   rA   r   vocab	from_jsonr7   r<   r   r2   r4   r5   idxlemmar    r-   r.   label_to_bytesr&   rC   new_docr$   r$   r%   test_json_to_doc   s    $r}   c                 C   s>  t | jj|dd}dd |D }|j| j  krdks!J  J t|tdd | D   kr5dks8J  J |d j| d jksDJ |d j| d jksPJ |d j| d jks\J |d jj	| d jj	ksjJ |d j
| d j
ksvJ t|jd	ksJ |jd jd	ksJ |jd jd
ksJ |jd jdksJ d S )NTrt   c                 S      g | ]}|qS r$   r$   .0tokenr$   r$   r%   
<listcomp>       z+test_json_to_doc_compat.<locals>.<listcomp>r)   c                 S   r~   r$   r$   r   r$   r$   r%   r     r   r+   r   r   r*   r,   )r   ru   rv   r7   r<   r   r2   r4   r5   rw   rx   r    r-   r.   ry   )r&   r:   r|   
new_tokensr$   r$   r%   test_json_to_doc_compat   s    .r   c                    s   t jddd t jddd d| j_g d| j_| jddgd}t | jj|dd	 t fd
dt	ddD s:J  jjdksBJ  jjg dksLJ | 
  
 ksVJ d S )NrE   FrF   rH   rI   rJ   rK   Trt   c                       g | ]
}  d | qS r]   has_extensionr   ir|   r$   r%   r         z/test_json_to_doc_underscore.<locals>.<listcomp>r   r+   )r   rN   rM   rE   rH   r;   ru   rv   allrangerz   rB   r$   r   r%   test_json_to_doc_underscore  s    r   c                    s  t jddd t jddd tjddd tjddd d| j_g d| j_d	| d
d j_d| d
d j_d| d
 j_d| d j_| j	g dd}t
t
|}t | jj|dd t fddtddD slJ  jjdkstJ  jjg dks~J  d
 jjdksJ  d jjdksJ  d
d jjd	ksJ  d
d jjdksJ  j| jksJ  jdgd| jdgdksJ d S )NrE   FrF   rH   rP   rQ   rI   rJ   rR   r   r   rS   r*   rT   rU   rW   rK   Trt   c                    r   r   r   r   r   r$   r%   r   -  r   z?test_json_to_doc_with_token_span_attributes.<locals>.<listcomp>r+   ra   )exclude)r   rN   r   r   rM   rE   rH   rQ   rP   r;   r?   r@   rA   ru   rv   r   r   ra   rz   rB   r$   r   r%   +test_json_to_doc_with_token_span_attributes  s4    r   c                 C   s  t | ddddt | dddddg| jd< |  }t| jj|dd	}t|jdks+J t|jd dks6J tdD ]J}|jd | j| jd | jksNJ |jd | j	| jd | j	ks`J |jd | j
| jd | j
ksrJ |jd | j| jd | jksJ q:d
S )z1Test that Doc.from_json() includes correct.spans.r   r*   rr   )r/   r      )r/   kb_idTrt   N)r   r[   r;   r   ru   rv   r<   r   r-   r.   r/   r   )r&   rC   r|   r   r$   r$   r%   test_json_to_doc_spans:  s   
$$$&r   c                 C   sr   | |fD ]2}|  }t| jj|dd}dd |jD dd |jD ks&J dd |D dd |D ks6J qdS )z1Test that Doc.from_json() includes correct.sents.Trt   c                 S      g | ]}|j qS r$   rn   )r   sentr$   r$   r%   r   P      z*test_json_to_doc_sents.<locals>.<listcomp>c                 S   r   r$   )is_sent_startr   r$   r$   r%   r   S  r   N)r;   r   ru   rv   r8   )r&   r(   test_docrC   r|   r$   r$   r%   test_json_to_doc_sentsK  s   r   c                 C   s>   ddd}|| _ |  }t| jj|dd}|j |ksJ dS )z1Test that Doc.from_json() includes correct .cats.g333333?gffffff?)ABTrt   N)catsr;   r   ru   rv   )r&   r   rC   r|   r$   r$   r%   test_json_to_doc_catsX  s
   
r   c                  C   s>   t dd} |  }t| jj|dd}| j|jksJ dS )z5Test that Doc.from_json() preserves spaces correctly.enzThis is just brilliant.Trt   N)spacyblankr;   r   ru   rv   r7   r{   r$   r$   r%   test_json_to_doc_spacesa  s   r   c                 C   sZ   |   }|d d d tt t| j| W d   dS 1 s&w   Y  dS )zbTest that Doc.from_json() raises an exception if tokens don't all have the same set of properties.r9   r   r3   Nr;   poprg   rh   ri   r   ru   rv   r&   r:   r$   r$   r%   &test_json_to_doc_attribute_consistencyi  s
   "r   c                 C   sV   |   }|d tt t| jj|dd W d   dS 1 s$w   Y  dS )zLTest that Doc.from_json() raises an exception when validating invalid input.r9   Trt   Nr   r   r$   r$   r%   !test_json_to_doc_validation_errorq  s
   
"r   c                 C   s@   dd }t jd|d | jdgd}|d d || ksJ d S )Nc                 S   s
   t | jS rm   )r<   r7   rj   r$   r$   r%   get_text_lengthz  s   
z<test_to_json_underscore_doc_getters.<locals>.get_text_lengthtext_length)getterrK   rM   )r   rN   r;   )r&   r   r:   r$   r$   r%   #test_to_json_underscore_doc_gettersy  s   r   )"rg   r?   r   r   spacy.tokensr   r   r   test_underscorer   fixturer&   r(   r:   rD   rO   r\   rb   rd   re   rk   rq   rs   r}   r   r   r   r   r   r   r   r   r   r   r$   r$   r$   r%   <module>   s@    


(	