o
    inE                     @   s  d dl Z d dlZd dlmZ d dlmZmZmZ d dlm	Z	 d dl
mZ d dlmZmZ ejddd	 Zejd
dd Zejddd Zejddd Zejddd Zejddd Zejddd Zdd Zdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) Zd*d+ Zd,d- Z d.d/ Z!d0d1 Z"d2d3 Z#d4d5 Z$d6d7 Z%d8d9 Z&d:d; Z'ejd<d=d> Z(ejd?dFdAdBZ)ejdCdDdE Z*dS )G    N)displacy)DependencyRendererEntityRendererSpanRenderer)English)Persian)DocSpani9	  c                 C   sH   d}g d}t | |dgt| d}t|}|D ]}||v s!J qdS )z#Test if < is escaped when rendering)z&lt;z&gt;z&amp;z&quot;)<>&"depwordsdepsN)r   lenr   render)de_vocabcharsr   dochtmlchar r   M/home/ubuntu/.local/lib/python3.10/site-packages/spacy/tests/test_displacy.pytest_issue2361   s   
r   i
  c                 C   sp   t | g dd}t|ddddg|_tj|dd}d	|v sJ t|dd
ddg|_tj|dd}d	|v s6J dS )z9Test that displaCy ENT visualizer escapes HTML correctly.)testz	<RELEASE>r   r   r      TESTlabelentstylez&lt;RELEASE&gt;   N)r   r	   entsr   r   en_vocabr   r   r   r   r   test_issue2728   s   r)   i  c                 C   sN   g d}g d}g d}t | |||d}tjt|dfdd|_t| dS )	zTest that retokenization works correctly via displaCy when punctuation
    is merged onto the preceeding token and tensor is resized.)HelloWorld!Whenisthisbreaking?)r   r   r      r2      r2   r2   )intjROOTpunctadvmodr5   detnsubjr6   )r   headsr   `   float32)dtypeN)r   numpyzerosr   tensorr   r   )r(   r   r:   r   r   r   r   r   test_issue3288"   s   rA   i  c               
   C   s   ddddddddddddd	d
ddddgdddddddddddddddddddddddddgd} dddddgd }t j| d!d"d#}|sQJ t j|d$d"d#}|s]J d%S )&z;Test that displaCy renderer doesn't require "settings" key.ButCCONJtexttagGooglePROPNr.   VERBstartingfromADPzbehind.ADVr      ccleftstartendr!   dirr   r9   r%   auxr2   prepright   pcompr   arcs#But Google is starting from behind.
   ORGrR   rS   r!   rE   r&   r   Tr$   manualr"   Nr   r   )example_depexample_entdep_htmlent_htmlr   r   r   test_issue3531.   s*   	rh   i*  c                 C   s0   t | ddgddgd}t |jd< t| dS )z^Test that displaCy doesn't serialize the doc.user_data when making a
    copy of the Doc.
    r*   worldr   r   r   N)r   set	user_datar   
parse_deps)r(   r   r   r   r   test_issue3882L   s   rm   iG  c               	   C   s   t  } ddddddddddddg}d	d
ddddddddddddddddddd
ddddg}| ||dg | jdksEJ dS )zITest that overlapping arcs get separate levels, unless they're identical.ThisDTrD   r.   VBZaz	sentence.NNr   r   r9   rP   rQ   r%   rN   r8   overlap)rS   r!   rR   rT   attrrZ   N)r   r   highest_levelrendererr   r[   r   r   r   test_issue5447V   s   rx   i  c                  C   sN   d} t  }|| }t|ddddg|_tj|dd}|d}|d	ks%J d S )
Nz8First line
Second line, with ent
Third line
Fourth line
      r   r    r"   r#   z<br>r2   )r   r	   r&   r   r   count)sample_textnlpr   r   foundr   r   r   test_issue5838k   s   
r   c              
   C   s   t | g dd}t|dddt|dddg|jd< t|}t|ts%J |d	 d
ks-J |d ddddddddddddddddgksGJ dS )z>Test that spans on a Doc are converted into displaCy's format.WelcometotheBankofChinar   rN   r3   r^   rX   GPEscrE   Welcome to the Bank of China spans       #rR   rS   start_token	end_tokenr!   kb_idkb_url   Nr   r	   r   r   parse_spans
isinstancedictr(   r   r   r   r   r   test_displacy_parse_spansx   s.   "

r   c              
   C   s   t | g dd}t|dddddt|ddd	d
dg|jd< t|ddi}t|ts,J |d dks4J |d ddddddddddddd	d
ddgksNJ dS )zHTest that spans with kb_id on a Doc are converted into displaCy's formatr   r   rN   r3   r^   Q790068)r   rX   r   Q148r   kb_url_templatezhttps://wikidata.org/wiki/{}rE   r   r   r   r   z!https://wikidata.org/wiki/Q790068r   r   zhttps://wikidata.org/wiki/Q148Nr   r   r   r   r   ,test_displacy_parse_spans_with_kb_id_options   s6   

r   c              	   C   s   t | g dd}t|dddt|dddg|jd< t|ddd	g|jd
< tj|dd
id}t|ts4J |d dks<J |d ddddd	dddgksMJ dS )z7Test that spans in a different spans key will be parsedr   r   rN   r3   r^   rX   r   r   BANKcustom	spans_key)optionsrE   r   r   r   r   r   r   r   Nr   r   r   r   r   -test_displacy_parse_spans_different_spans_key   s    "r   c                 C   sp   t | g dd}t|dddg|jd< tjtdd t|}W d	   n1 s*w   Y  t|t	s6J d	S )
z:Test that having an unset spans key doesn't raise an errorr   r   rN   r3   r   r   W117)matchN)
r   r	   r   pytestwarnsUserWarningr   r   r   r   r   r   r   r   #test_displacy_parse_empty_spans_key   s   r   c                 C   s   t | g dd}t|dd|jjd dg|_t|}t|ts"J |d dks*J |d	 d
dddddgks9J t|dd|jjd ddg|_t|}t|tsTJ |d dks\J |d	 d
dddddgkskJ dS )zGTest that named entities on a Doc are converted into displaCy's format.rB   rG   r.   rJ   rK   behindr   r   r%   r^   r    rE   #But Google is starting from behind r&   r2   r]   r   r   rR   rS   r!   r   r   Q95r!   r   N	r   r	   vocabstringsr&   r   
parse_entsr   r   r(   r   r&   r   r   r   test_displacy_parse_ents   s   


r   c                 C   s~   t | g dd}t|dd|jjd ddg|_t|dd	i}t|ts&J |d
 dks.J |d ddddddgks=J dS )zRTest that named entities with kb_id on a Doc are converted into displaCy's format.r   r   r   r%   r^   r   r   r   z https://www.wikidata.org/wiki/{}rE   r   r&   r2   r]   z!https://www.wikidata.org/wiki/Q95r   Nr   r   r   r   r   +test_displacy_parse_ents_with_kb_id_options   s   r   c                 C   s  g d}g d}g d}g d}g d}t | |||||d}t|}t|ts*J |d d|d	 |d	 d
d|d |d d
d|d |d d
d|d |d d
gksVJ |d d	ddddddddddddddgkspJ t|dd }t|tsJ |d d|d	 |d	 d
d|d |d d
d|d |d d
d|d |d d
gksJ |d d	ddddddddddddddgksJ dS )zFTest that deps and tags on a Doc are converted into displaCy's format.)rn   r.   rq   sentence)r   r   rN   r   )DETrI   r   NOUN)ro   rp   ro   rr   )r9   r5   r8   rt   )r   r:   postagsr   r   Nr   )lemmarE   rF   r   r%   rN   r[   r9   rP   rQ   r8   rt   rW   )r   r   rl   r   r   )r(   r   r:   r   r   r   r   r   r   r   test_displacy_parse_deps  s@   



r   c                  C   sx   t  } ddddddg}dddd	d
dddd	d
g}tt | ||dg W d    d S 1 s5w   Y  d S )Nrn   r   rD   r.   rI   r   r   r9   rP   rQ   r%   r8   rZ   )r   r   raises
ValueErrorr   rv   r   r   r   test_displacy_invalid_arcs&  s   "r   c                 C   sT   t | g dd}t|dd|jjd dg|_tj|dd dd	}|d
s(J dS )z$Test that displaCy can render Spans.r   r   r   r%   r^   r    r2   r"   r#   z<divN)r   r	   r   r   r&   r   r   
startswithr'   r   r   r   test_displacy_spans1  s   r   c                 C   s:   t t td W d    d S 1 sw   Y  d S )Nzhello world)r   r   r   r   r   )r(   r   r   r   #test_displacy_raises_for_wrong_type9  s   "r   c                  C   s   g d} g d}g d}g d}t  }t|j| |||d}t|dddd	g|_tj|d
dd}d|v s5J d|v s;J d|j d|v sFJ tj|d
dd}d|v sTJ d|j d|v s_J d S )N)u   ماu
   بسیارu   کتابu   می‌خوانیم)PROrM   N_PLV_SUB)foobarr   baz)r   r   rN   r   )r   r   r:   r   r   rN   r   r    Tr   )pager$   zdirection: rtlzdirection="rtl"zlang="r   r"   )r   r   r   r	   r&   r   r   lang)r   r   r   r:   r}   r   r   r   r   r   test_displacy_rtl>  s   r   c                 C   sz   dd }t | t| g dd}t|dd|jjd dg|_t j|d	d
}|ds-J |	ds4J t dd  dS )z4Test that displaCy accepts custom rendering wrapper.c                 S   s   d|  d S )Nr   r   r   r   r   r   wrapperT  s   z-test_displacy_render_wrapper.<locals>.wrapperr   r   r   r%   r^   r    r"   r#   zTEST<divz	/div>TESTc                 S   s   | S )Nr   r   r   r   r   <lambda>^  s    z.test_displacy_render_wrapper.<locals>.<lambda>N)
r   set_render_wrapperr   r	   r   r   r&   r   r   endswith)r(   r   r   r   r   r   r   test_displacy_render_wrapperQ  s   
r   c                  C   s   ddddddddddddgd	d
ddddddddd
ddddgdd} t j| gddd}| d D ]}|d |v s?J |d |v sGJ q5dS )z3Test displacy.render with manual data for dep stylern   ro   rD   r.   rp   rq   r   rr   r   r   r9   rP   rQ   r%   rN   r8   rt   rW   Title)r   r[   titler   Tra   r   rE   rF   Nrc   )
parsed_depr   wordr   r   r   test_displacy_render_manual_depa  s    r   c                  C   sx   dddddgddddd	dgd
dg} t j| ddd}| D ]}|d d d |v s-J d|v r9|d |v s9J qdS )z3Test displacy.render with manual data for ent styler\   r2   r]   r^   r_   r`   id   COMPANYr   rE   r&   r   r"   Tra   r&   r   r!   r   Nrc   )parsed_entsr   
parsed_entr   r   r   test_displacy_render_manual_entw  s   r   c                  C   s   dddddddddgg dd	dddddddddgg dd
dg} t j| ddd}| D ]}|d d d |v s=J d|v rI|d |v sIJ q/dS )z4Test displacy.render with manual data for span stylezWelcome to the Bank of China.rN   r3   r^   r   r   r!   rX   r   r   r   r   r   r   r   .)rE   r   tokensr   )rE   r   r   r   spanTra   r   r   r!   r   Nrc   )parsed_spansr   parsed_spanr   r   r    test_displacy_render_manual_span  s*   

	

r   c                     s   ddg} ddd}t | |d}d}g d  fd	d
tt|D }|d|d d}d|d v r9d|d v s;J d|d v rGd|d v sIJ d|d v rUd|d v sWJ d|d v rcd|d v seJ d S )Nr   BARredgreen)FOOr   )r&   colorsabcd)r   r   r   r   c                    s    g | ]}||d   | dqS )r   r_   r   .0ilabelsr   r   
<listcomp>       z.test_displacy_options_case.<locals>.<listcomp>abcdez

r   r   r   r%   r   rN   )r   ranger   render_entssplit)r&   r   rw   rE   r   resultr   r   r   test_displacy_options_case  s   
 r   i)  c                  C   sL   dddddddddgd d	} t j| d
dd}|d|dk s$J d S )Nr\         SECONDr_   r2   r]   FIRSTr   r"   Tra   )r   r   find)r   r   r   r   r   $test_displacy_manual_sorted_entities  s   

	r   i2  returnc                 C   s|   t | ddgd}t|ddddg|jd< tj|dd	}d
|v s!J |jd t|dddd tj|dd	}d
|v s<J dS )zKTest that displaCy's span visualizer escapes annotated HTML tags correctly.r   z<TEST>r   r   r   r    r   r   r#   z&lt;TEST&gt;r%   N)r   r	   r   r   r   appendr'   r   r   r   test_issue12816  s   r  i 3  c                     s   ddddddddddddg} g d	}t j| |d
 t t|ks&J t fdddD s3J t fdddD s@J  d d d d dksNJ  d d d d dks\J  d d d d dksjJ  d d d d dksxJ dS )zITest whether span stacking works properly for multiple overlapping spans.r%   rX   SkillNCr   r   Skillr   rN   r   r   r   c                        g | ]}t  | d  dkqS )entitiesr   r   r   per_token_infor   r   r     r   z/test_displacy_span_stacking.<locals>.<listcomp>)r   rN   r2   c                    r  )r  r%   r  r   r	  r   r   r     r   )r   r%   r  render_slotN)r   _assemble_per_token_infor   allr  r   r	  r   test_displacy_span_stacking  s   


 r  )r   N)+r>   r   spacyr   spacy.displacy.renderr   r   r   spacy.lang.enr   spacy.lang.far   spacy.tokensr   r	   markissuer   r)   rA   rh   rm   rx   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r   r   r   r   <module>   sV    











	


#
%



