o
    i                     @   s   d dl Z d dlZd dlmZ d dlmZmZ d dlmZ e j	
ddd Ze j	
dd	d
 Ze j	
ddd Zdd Zdd Ze j	dg ddd ZdS )    N)English)DocDocBin)
Underscorei  c                   C   s$   t   t dgd t g dd dS )zTest that docbin init goes wellLEMMAattrs)r   ENT_IOBENT_TYPEN)r    r   r   _/home/ubuntu/.local/lib/python3.10/site-packages/spacy/tests/serialize/test_serialize_docbin.pytest_issue4367	   s   r   i  c                 C   s   t | ddgd}d|jd< d|jd< tdd}|| | }tdd|}t|| d	 }|jd dks:J |jd dksCJ d
S )z6Test that user_data is correctly serialized in DocBin.helloworldwordsbarfoo)z._.r   NNTstore_user_datar   N)r   	user_datar   addto_bytes
from_byteslistget_docs)en_vocabdocdoc_bindoc_bin_bytesnew_doc_binnew_docr   r   r   test_issue4528   s   



r"   i  c                 C   sR   t ddgd}t|| g ksJ | }t  |}t|| g ks'J dS )z6Ensure an empty DocBin does not crash on serializationDEPHEADr   N)r   r   r   r   r   )r   r   r   	doc_bin_2r   r   r   test_issue5141!   s
   r&   c            	      C   sf  t g ddd} g d}ddi}t }||D ])}||_|dd }d	|_d
|_d|_|g|jd< d|d _d|d _	| 
| q|  }td}t  |} t| |j}t|D ]S\}}|j|| ksjJ |j|ksqJ t|jdkszJ |jd d jd	ksJ |jd d jd
ksJ |jd d jdksJ |d jdksJ |d j	dksJ q]d S )N)r   r	   r
   NORMENT_IDT)r   r   )z	Some textzLots of texts...z...Ag      ?r      UNUSUAL_SPAN_LABELUNUSUAL_SPAN_IDUNUSUAL_SPAN_KB_IDstartUNUSUAL_TOKEN_NORMUNUSUAL_TOKEN_ENT_IDen   )r   r   pipecatslabel_id_kb_id_spansnorm_ent_id_r   r   spacyblankr   r   r   vocab	enumeratetextlen)	r   textsr4   nlpr   span
bytes_datareloaded_docsir   r   r   test_serialize_doc_bin+   s<   


rG   c                 C   s   t | ddgd}|jsJ |jdksJ t | ddgddgd}|jr$J |jdks+J t t||gd }|| \}}|jsDJ |jdksKJ |jrPJ |jdksWJ d S )	Nthatz'sr   zthat 's F)r   spaceszthat's)docs)r   has_unknown_spacesr?   r   r   r   r   )r   doc1doc2r   re_doc1re_doc2r   r   r   %test_serialize_doc_bin_unknown_spacesM   s   



rP   z$writer_flag,reader_flag,reader_value))TTr   )TFr   )FTnothing)FFrQ   c           	      C   s~   t jddd t | ddgd}d|j_t|d}|| | }t|d|}t|	| d	 }|jj|ks:J i t
_d
S )z?Test that custom extensions are correctly serialized in DocBin.r   rQ   )defaultr   r   r   r   r   r   N)r   set_extension_r   r   r   r   r   r   r   r   doc_extensions)	r   writer_flagreader_flagreader_valuer   	doc_bin_1r   r%   doc_2r   r   r   test_serialize_custom_extension]   s   


r[   )pytestr;   spacy.lang.enr   spacy.tokensr   r   spacy.tokens.underscorer   markissuer   r"   r&   rG   rP   parametrizer[   r   r   r   r   <module>   s$    





	"	