o
    i!                     @   s   d dl mZ d dlmZ d dlZd dlmZ d dlmZm	Z	m
Z
 d dlmZ ejdd Zejd	d
 Zejdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zejddefd d!Zd"d# ZdS )$    )Random)ListN)Matcher)DocSpan	SpanGroup)filter_spansc              
   C      | d}t | jdd}|di i i i gg |di i gg |di gg ||}g }|D ]}|t||d |d | jj|d	   q/td
| t|dddi|d|j	d< |S Nz0 1 2 3 4 5 6T)validate421      r   *   SPANSkeyvaluenameattrsspans
r   vocabaddappendr   stringsr   shuffler   r   en_tokenizerdocmatchermatchesr   match r%   S/home/ubuntu/.local/lib/python3.10/site-packages/spacy/tests/doc/test_span_group.pyr!      s     r!   c              
   C   r	   r
   r   r   r%   r%   r&   	other_doc"   s     r'   c              
   C   s   | d}t | jdd}|di i i i gg |di i gg |di gg ||}g }|D ]}|t||d |d | jj|d	   q/td
| t|dddi|d|j	d< d S r
   r   r   r%   r%   r&   
span_group:   s    r(   c                 C   s  | j d }| }||ksJ |j|jksJ |j|jksJ t|t|ks)J t|t|ks3J d|_d|jd< |t| ddd |j|jksMJ |j|jksUJ |jd dks^J t|t|kshJ t| j	d	d
 | D d}t
t |j|d W d    n1 sw   Y  |  }| }||dd  ||dd  W d    n1 sw   Y  t| | dd | dd gd}t||j|dD ]\}}|j|jksJ |j|jksJ qd S )Nr   new_name	new_valuer   r      LABELr   c                 S   s   g | ]}|j d  qS )x)text).0tr%   r%   r&   
<listcomp>b   s    z(test_span_group_copy.<locals>.<listcomp>)words)r!   r      r   )r   copyr   r   lenlistr   r   r   r   pytestraises
ValueError
retokenizemerger   zip
start_charend_char)r!   r(   clonedoc2doc3retokenizerspan1span2r%   r%   r&   test_span_group_copyQ   s8   


 rF   c                 C   sb  | j d }d}|| }d|_| jjd |_|| j|jksJ || j|jks)J |||< || j|jks7J || j|jksAJ || j|jksKJ || j|jksUJ || |ks]J t	t
 ||d< W d    n1 sqw   Y  t	t
 ||d< W d    n1 sw   Y  t|dd}t	t |||< W d    d S 1 sw   Y  d S )	Nr      z	NEW LABELKB_IDd   r   r   )r   label_r   r   kb_idlabelstartendr8   r9   
IndexErrorr   r:   )r!   r'   r(   indexspanr%   r%   r&   test_span_group_set_itemq   s.   



"rS   c                 C   s   | j d }|js
J d S )Nr   )r   has_overlap)r!   r(   r%   r%   r&   test_span_group_has_overlap   s   
rU   c                 C   s0  | j d }| dd | dd g}t| dddd|d}||}|j|jks)J |jd	ddks3J t|t| }t|t|ksEJ t|t| }|j|d
d}||ksZJ |j|jksbJ |jd	ddkslJ t|t|ksvJ |j d }tt || W d    d S 1 sw   Y  d S )Nr   r   rG   r+   
MORE_SPANSr*   r   new_keyr   r   T)inplace)	r   r   _concatr   r   r7   r8   r9   r:   )r!   r'   span_group_1r   span_group_2span_group_3span_list_expectedr%   r%   r&   test_span_group_concat   s.   


"r_   c                 C   s   | j d }t|}d}|| }||d  }||= t||d ks"J || |ks*J || |ks2J tt |d= W d    n1 sEw   Y  tt |d= W d    d S 1 s^w   Y  d S )Nr   rG   r   rI   rJ   )r   r6   r8   r9   rP   )r!   r(   lengthrQ   rR   	next_spanr%   r%   r&   test_span_doc_delitem   s   
"rb   c                 C   s   | j d }| dd | dd g}t| dddd|d}||}|| }t|t|ks/J |jd	ddks9J t|t|ksCJ d S )
Nr   r   rG   r+   rV   r*   rW   r   r   )r   r   rZ   r6   r   r7   )r!   r[   r   r\   span_group_3_expectedr]   r%   r%   r&   test_span_group_add   s   

rd   c                 C   s   | j d  }| dd | dd g}t| dddd|d}||}||7 }t|t|ks1J |jd	ddks;J t|t|ksEJ | j d  }||7 }t|t|ksZJ |jd
d	ikscJ t|t|ksmJ d S Nr   r   rG   r+   rV   r*   rW   r   r   r   )r   r5   r   rZ   r6   r   r7   r!   r[   r   r\   span_group_1_expectedr%   r%   r&   test_span_group_iadd   s(   

rh   c                 C   s   | j d  }| dd | dd g}t| dddd|d}||}|| t|t|ks2J |jd	ddks<J t|t|ksFJ | j d }|| t|t|ksZJ |jd
d	ikscJ t|t|ksmJ d S re   )r   r5   r   rZ   extendr6   r   r7   rf   r%   r%   r&   test_span_group_extend   s$   



rj   c                 C   s:   t t t| j W d    d S 1 sw   Y  d S )N)r8   r9   AttributeErrorprintr!   )r(   r%   r%   r&   test_span_group_dealloc  s   "rm   i.  c                 C   sT   | j d }t|}t|D ]\}}|||   kr || ks#J  J qt| dS )zXTests whether typing of `SpanGroup` as `Iterable[Span]`-like object is accepted by mypy.r   N)r   r7   	enumerater   )r!   r(   r   irR   r%   r%   r&   test_span_group_typing  s
   
&rp   c                 C   s   | d}| d}t ||dd |dd gd}tt t ||dd |dd gd}W d   dS 1 s9w   Y  dS )z5Test that all spans must come from the specified doc.za b cr   r   r   r4   N)r   r8   r9   r:   )r    doc1rA   r(   r%   r%   r&   test_span_group_init_doc  s    ""rr   )randomr   typingr   r8   spacy.matcherr   spacy.tokensr   r   r   
spacy.utilr   fixturer!   r'   r(   rF   rS   rU   r_   rb   rd   rh   rj   rm   markissuerp   rr   r%   r%   r%   r&   <module>   s0    


 
	