o
    i|                     @   s   d dl Z d dlZd dlmZmZmZmZmZ d dlm	Z	 ej
ddd Zdd Zd	d
 Zdd Zdd Zdd Zdd Zej
dddgdgdd Zdd Zdd ZdS )    N)DEPMORPHORTHPOSSHAPE)Doci  c           	         s   g d}g d}g d} fdd|D } fdd|D }t  |d}|dtj|d	d
 |dtj|d	d
 dd |D |ksEJ dd |D |ksPJ |ddg}t |j|dddg|}dd |D |ksoJ dd |D |kszJ dS )z5Test that lemmas are set correctly in doc.from_array.)Iz'llsurvive)PRPMDVB)z-PRON-willr	   c                       g | ]} j |qS  stringsadd).0tagen_vocabr   N/home/ubuntu/.local/lib/python3.10/site-packages/spacy/tests/doc/test_array.py
<listcomp>       z"test_issue2203.<locals>.<listcomp>c                    r   r   r   )r   lemmar   r   r   r      r   wordsTAGuint64)dtypeLEMMAc                 S      g | ]}|j qS r   tag_r   tr   r   r   r          c                 S   r!   r   lemma_r$   r   r   r   r      r&   c                 S   r!   r   r"   r$   r   r   r   r      r&   c                 S   r!   r   r'   r$   r   r   r   r      r&   N)r   
from_arraynumpyarrayto_arrayvocab)	r   r   tagslemmastag_ids	lemma_idsdoc	doc_arraynew_docr   r   r   test_issue2203   s   r5   c                 C   st   t | g dd}|jd }|j|jksJ |ttf}|d d |d d ks*J |d d |d d ks8J d S )NAnexamplesentencer   r8   r      r   r-   orthshaper,   r   r   r   r2   r8   feats_arrayr   r   r   test_doc_array_attr_of_token   s   
 r@   c                 C   s~   t | g dd}|jd }|j|jksJ |ttf}|d}|d d |d d ks/J |d d |d d ks=J d S )Nr6   r   r8   )r   r   r   r:   r;   )r   r2   r8   r?   feats_array_stringyr   r   r   $test_doc_stringy_array_attr_of_token&   s   

 rB   c                 C   sF   t | g dd}|jd }|j|jksJ |t}|jdks!J d S )Nr6   r   r8   )   )r   r-   r<   r=   r,   r   r>   r   r   r   test_doc_scalar_attr_of_token0   s
   

rD   c                 C   s   g d}g d}t | ||d}|d j|d j  kr+|d j  kr+|d jks.J  J |ttf}|d d |d jksBJ |d d |d jksOJ |d d |d jks\J |d d |d jksiJ d S )NAnicer9   .)DETADJNOUNPUNCT)r   posr   r:      rC   )r   rM   r,   r   r   )r   r   rM   r2   r?   r   r   r   test_doc_array_tag8   s   >rO   c                 C   s   g d}g d}t | ||d}|d t|d jksJ |d t|d jks)J |d t|d jks6J |ttf}|d d |d jjksKJ |d d |d jjksYJ |d d |d jjksgJ d S )N)Eatblueham)zFeat=VzFeat=JzFeat=N)r   morphsr   r:   rN   )r   strmorphr,   r   r   key)r   r   rU   r2   r?   r   r   r   test_doc_array_morphD   s    rW   c                 C   s   g d}g d}t | ||d}|ttf}|d d |d jks#J |d d |d jks0J |d d |d jks=J |d d |d jksJJ d S )NrE   )detamodROOTpunct)r   depsr   r:   rN   rC   )r   r,   r   r   dep)r   r   r\   r2   r?   r   r   r   test_doc_array_depR   s   r^   attrsr   r   IS_ALPHAc                 C   s2   g d}t | |d}t | |d||| dS )z|Test that both Doc.to_array and Doc.from_array accept string attrs,
    as well as single attrs and sequences of attrs.
    r6   r   N)r   r)   r,   )r   r_   r   r2   r   r   r   #test_doc_array_to_from_string_attrs]   s   ra   c                 C   sN   g d}t | |dd}|d dksJ |d dksJ |d dks%J d	S )
z7Test that Doc.to_array can retrieve token start indicesr6   r   IDXr   r:   rC   rN      N)r   r,   )r   r   offsetsr   r   r   test_doc_array_idxg   s
   re   c                 C   s   g d}t | |d}|D ]}|d |_q|dg}t | |d}|dg| |dg}tdtj|d< t | |d}t	t
 |dg| W d   n1 sUw   Y  |dg}tdtj|d< t | |d}t	t
 |dg| W d   dS 1 sw   Y  dS )zBTest that Doc.from_array doesn't set heads that are out of bounds.)Thisisar9   rH   r   r   HEADN   )r   headr,   r)   r*   int32astyper   pytestraises
ValueError)r   r   r2   tokenarrdoc_from_arrayr   r   r   #test_doc_from_array_heads_in_boundsp   s&   "ru   )r*   ro   spacy.attrsr   r   r   r   r   spacy.tokensr   markissuer5   r@   rB   rD   rO   rW   r^   parametrizera   re   ru   r   r   r   r   <module>   s     

	

		