o
    i                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ e jddd	Zd
d Zdd Zdd Zdd Zdd Zdd Zdd Zdd ZdS )    N)get_current_ops)Doc)split_bilu_label)make_tempdir)Vocabrc                 c   s     t j| d}|V  |  d S )N)mode)tempfileTemporaryFileclose)r   f r   D/home/ubuntu/.local/lib/python3.10/site-packages/spacy/tests/util.pymake_tempfile   s   r   c                 C   sX   t  }g }d}td| d D ]}dd t||| D }|t||d ||7 }q|S )Nr      c                 S      g | ]}t |qS r   str.0ir   r   r   
<listcomp>       zget_batch.<locals>.<listcomp>words)r   rangeappendr   )
batch_sizevocabdocsstartsizenumbersr   r   r   	get_batch   s   
r#   c                 C   s&   t  }dd td| D }t||dS )Nc                 S   r   r   r   r   r   r   r   r   &   r   z"get_random_doc.<locals>.<listcomp>r   r   )r   r   r   )n_wordsr   r"   r   r   r   get_random_doc#   s   r%   c                 C   sl   |D ]}d|v rt |\}}| | q| |}|D ]}|| qW d   dS 1 s/w   Y  dS )zXPerform a series of pre-specified transitions, to put the parser in a
    desired state.-N)r   	add_labelstep_through
transition)parserdocsequenceaction_namemovelabelstepwiser)   r   r   r   apply_transition_sequence*   s   
"r1   c                 C   s<   t |d d }| j|d |D ]\}}| j||d q| S )zuAdd list of vector tuples to given vocab. All vectors need to have the
    same length. Format: [("text", [1, 2, 3])]r   r   )width)vector)lenreset_vectors
set_vector)r   vectorslengthwordvecr   r   r   add_vecs_to_vocab6   s
   r;   c                 C   sJ   t  }||| }|||}t||tj|tj|  S )z Get cosine for two given vectors)r   to_numpyasarraynumpydotlinalgnorm)vec1vec2OPSv1v2r   r   r   
get_cosine@   s   $rG   c                 C   s\  dd | D dd |D ksJ dd | D dd |D ks J dd | D dd |D ks0J dd | D dd |D ks@J dd | D dd |D ksPJ dd | D dd |D ks`J dd | D dd |D kspJ d	d | D d	d |D ksJ t | j|jD ]$\}}|j|jksJ |j|jksJ |j|jksJ |j|jksJ qd
S )zmCompare two Doc objects and assert that they're equal. Tests for tokens,
    tags, dependencies and entities.c                 S      g | ]}|j qS r   )orthr   tr   r   r   r   K       z%assert_docs_equal.<locals>.<listcomp>c                 S   rH   r   )posrJ   r   r   r   r   M   rL   c                 S   rH   r   )tagrJ   r   r   r   r   N   rL   c                 S   s   g | ]}|j jqS r   )headr   rJ   r   r   r   r   P   r   c                 S   rH   r   )deprJ   r   r   r   r   Q   rL   c                 S   rH   r   )is_sent_startrJ   r   r   r   r   R   rL   c                 S   rH   r   )ent_typerJ   r   r   r   r   T   rL   c                 S   rH   r   )ent_iobrJ   r   r   r   r   U   rL   N)zipentsr    endr/   kb_id)doc1doc2ent1ent2r   r   r   assert_docs_equalH   s           r\   c                 C   sx   t | }t |}t| t| ksJ tt| t| D ]\\}}\}}||ks3J ||ks9J q%dS )z2Assert that two packed msgpack messages are equal.N)srslymsgpack_loadssortedkeysrT   items)b1b2msg1msg2k1rE   k2rF   r   r   r   assert_packed_msg_equal]   s   

*rh   c                 C   s   t dd| S )Nz\s+ )resub)sr   r   r   normalize_whitespaceg   s   rm   )r   )
contextlibrj   r	   r>   r]   	thinc.apir   spacy.tokensr   spacy.trainingr   
spacy.utilr   spacy.vocabr   contextmanagerr   r#   r%   r1   r;   rG   r\   rh   rm   r   r   r   r   <module>   s(    

