o
    iFD                     @   s(  d dl Z d dlmZmZ d dl mZ d dlmZ d dlmZm	Z	m
Z
mZmZ d dlmZmZ d dlmZ d dlmZ d	g d
g ddggZddg dg dgiggZddg diggZe jdd Ze jdd Zdd Zdd Zdd Zdd Zdd  Zd!d" Zd#d$ Z d%d& Z!d'd( Z"d)d* Z#dS )+    N)assert_almost_equalassert_array_almost_equal)approx)English)PRFScoreROCAUCScoreScorer_roc_auc_score
_roc_curve)DocSpan)Example)offsets_to_biluo_tagsz7Apple is looking at buying U.K. startup for $ 1 billion)   r   r   r            r   
   r      )nsubjauxROOTpreppcompcompounddobjr   quantmodr   pobjheadsdepsz	100 - 200entities)r   r   CARDINAL)r   	   r"   z6Apple is looking at buying U.K. startup for $1 billion))r      ORG)      GPE),   6   MONEYc                  C   s   d} g d}g d}g d}t  }|| }tt|D ]"}|| || _|| || _|| ||  |dkr=d|| _q|S )Nz1Sarah's sister flew to Silicon Valley via London.)
NNPPOSNNVBDINr,   r,   r0   r,   .)
PROPNPARTNOUNVERBADPr2   r2   r6   r2   PUNCT)
NounType=prop|Number=singzPoss=yesNumber=singzTense=past|VerbForm=fin r8   r8   r:   r8   zPunctType=perir   F)r   rangelentag_pos_	set_morphis_sent_start)texttagsposmorphsnlpdoci rH   K/home/ubuntu/.local/lib/python3.10/site-packages/spacy/tests/test_scorer.py
tagged_doc-   s   
rJ   c                  C   sJ   d} t  }|| }tt|D ]}|d dkrd|| _qd|| _q|S )Nz-One sentence. Two sentences. Three sentences.r   r   TF)r   r;   r<   r@   )rA   rE   rF   rG   rH   rH   rI   
sented_docT   s   rK   c                 C   s*  t  }ddd | D i}t| |}||g}|d dks J t }t|jg dg dd|_d	|jd
 _||g}|d dksDJ |d dksLJ |d t	dksVJ |d dks^J t  }|j|gdd}|d d dkssJ |d d dks}J |d d t	dksJ |d d dksJ d S )Nsent_startsc                 S      g | ]}|j qS rH   
sent_start.0trH   rH   rI   
<listcomp>c       z%test_tokenization.<locals>.<listcomp>	token_acc      ?)Onez	sentence.Two
sentences.ThreerY   )TTTTTF)wordsspacesF         ?token_ptoken_rQUU?token_fg?Tper_component	tokenizer)
r   r   	from_dictscorer   r   vocab	predictedr@   r   )rK   scorergoldexamplescoresrE   rH   rH   rI   test_tokenizationa   s.   rn   c                 C   s   t  }ddd | D i}t| |}||g}|d dks J d|d d< d|d d	< t| |}||g}|d td
ksBJ d S )NrL   c                 S   rM   rH   rN   rP   rH   rH   rI   rS      rT   ztest_sents.<locals>.<listcomp>sents_frV   r   r   r]   r   g1UU?r   r   rf   rg   r   )rK   rj   rk   rl   rm   rH   rH   rI   
test_sents~   s   rq   c           	      C   s>  t  }g }tD ]'\}}t| |d|d |d d}|d |d d}t||}|| q||}|d dks<J |d dksDJ |d	 d
 d dksPJ |d	 d
 d dks\J |d	 d
 d dkshJ |d	 d d dkstJ |d	 d d dksJ |d	 d d dksJ t  }g }tD ],\}}t| |d|d |d d}|d |d d}d|d _t||}|| q||}|d dksJ t	|d d |d	 d
 d dksJ |d	 d
 d dksJ |d	 d
 d dksJ t	|d	 d d d |d	 d d dksJ |d	 d d dksJ d S )N r   r    )r[   r   r    r   dep_uasrV   dep_lasdep_las_per_typer   prfr   r   g*E?gTUU?皙?)
r   test_las_appler   splitr   rf   appendrg   dep_r   )	en_vocabrj   examplesinput_annotrF   rk   rl   resultsrH   rH   rI   test_las_per_type   sJ   


r   c           	      C   s  t  }g }tD ]0\}}t| |dg dd}t||d }t|d|i}d|jd _d|j	d _|
| q||}|d dksEJ |d	 dksMJ |d
 dksUJ |d d d dksaJ |d d d dksmJ |d d d dksyJ t  }g }tD ]0\}}t| |dg dd}t||d }t|d|i}d|jd _d|j	d _|
| q||}|d tdksJ |d	 tdksJ |d
 tdksJ d|d v sJ d|d v sJ d|d v sJ |d d d dksJ |d d d dksJ |d d d dksJ |d d d dksJ |d d d dks,J |d d d dks9J |d d d dksFJ |d d d dksSJ |d d d tdksbJ d S )Nrr   )
B-CARDINALOr   )r[   entsr!   Fr]   ents_prV   ents_rents_fents_per_typer"   rv   rw   rx   )
B-ORGr   r   r   r   zB-GPEr   r   r   r   g1UU?r(   r+   r%   r   r^   )r   test_ner_cardinalr   r{   r   r   rf   ri   r@   	referencer|   rg   test_ner_appler   )	r~   rj   r   r   r   rF   r!   rl   r   rH   rH   rI   test_ner_per_type   s^   

"r   c                 C   s  t  }dd | D dd | D dd | D dd | D d}t| |}||g}|d dks2J |d	 dks:J |d
 dksBJ |d dksJJ |d d d dksVJ t  }dd | D }d|d< dd | D }d|d< dd | D }d|d< d|d< ||||d d}t| |}||g}|d dksJ |d	 dksJ |d
 tdksJ |d tdksJ |d d d dksJ |d d d dksJ |d d d td ksJ t  }|j|gd!d"}|d# d dksJ |d$ d	 dksJ |d$ d
 tdks	J d S )%Nc                 S   rM   rH   r=   rP   rH   rH   rI   rS      rT   z"test_tag_score.<locals>.<listcomp>c                 S   rM   rH   r>   rP   rH   rH   rI   rS      rT   c                 S      g | ]}t |jqS rH   strmorphrP   rH   rH   rI   rS          c                 S   s   g | ]	}|j r	d ndqS )r]   )r@   rP   rH   rH   rI   rS      s    )rB   rC   rD   rL   tag_accrV   pos_acc	morph_accmorph_micro_fmorph_per_featNounTyperx   c                 S   rM   rH   r   rP   rH   rH   rI   rS     rT   r.   r   c                 S   rM   rH   r   rP   rH   rH   rI   rS   
  rT   Xr]   c                 S   r   rH   r   rP   rH   rH   rI   rS     r   r9   zNumber=plurr   rL   ?ry   g"[L"?Poss        NumbergupE?Trc   taggermorphologizerrp   )rJ   rj   rk   rl   r   rB   rC   rD   rH   rH   rI   test_tag_score   sP   r   c                 C   s  | d}d|d _ d|d _|d d d|d _| d}d|_t||}t }||g}|D ]}|dr9q1|| d u sAJ q1| d}d|_d|d	 _ d|d	 _|d	 d d|d	 _t||}t }||g}|d
 d u suJ |d dks}J |d dksJ |d dksJ |d dksJ |d dksJ |d d u sJ | d}d|_d|d _ d|d _|d	 d d|d	 _t||}t }||g}|d
 d u sJ |d dksJ |d dksJ |d dksJ |d dksJ |d dksJ |d d u s
J d S )Nz	a b c d eAr   r   zFeat=ValdepTcatsr]   rU   r   r   r   r   rs   rV   rt   ro   )	r=   r>   r?   r}   has_unknown_spacesr   r   rg   
startswith)en_tokenizerpred_docref_docrl   rj   rm   keyrH   rH   rI   test_partial_annotation(  s\   












r   c                  C   s  ddg} ddg}t | |\}}}t| |}t|g d t|g d t|d ddg} ddg}t | |\}}}t| |}t|g d t|g d t|d ddg} ddg}t | |\}}}t| |}t|ddg t|ddg t|d ddg} ddg}t | |\}}}t| |}t|g d t|g d t|d ddg} ddg}t | |\}}}t| |}t|ddg t|ddg t|d t }|dd |dd t|jd ddg} dd	g}tt	 t| | W d    n1 sw   Y  t }|dd |d	d tt	 |j}W d    n	1 s"w   Y  ddg} dd	g}tt	 t| | W d    n	1 sEw   Y  t }|dd |d	d tt	 |j}W d    d S 1 snw   Y  d S )
Nr   r]   )r   r   r]   )r   r]   r]   rV   r   r^         ?      ?)
r
   r	   r   r   r   	score_setrg   pytestraises
ValueError)y_truey_scoretprfpr_roc_aucrg   rH   rH   rI   test_roc_auc_score`  sz   









$r   c            	         s  t  } d}d}| |}| | g }||jdddd ||jdddd ||jd	d
dd ||j|< dd }|j| j d j|< t |}tj|g||d}|| d dksbJ || d dk smJ |j| j d j|< t |}tj|g||dd}|| d dksJ || d dksJ  fdd|D }| j|< t |}tj|g||dd}|| d dksJ || d dksJ | d|v sJ tj|g||ddd}|| d dksJ || d dksJ | d|vsJ d S )NzThis is just a random sentence.my_spansr   r   PERSONlabelr   r%         c                 S   s
   | j | S )N)spans)rF   span_keyrH   rH   rI   span_getter  s   
z%test_score_spans.<locals>.span_getter)rF   )attrgetter_prV   _rT)r   r   allow_overlapc                    s    g | ]}t  |j|jd dqS )WRONGr   )r   startend)rQ   spanpredrH   rI   rS     s     z$test_score_spans.<locals>.<listcomp>r   	_per_typeF)r   r   r   labeled)	r   make_docr|   	char_spanr   copyr   r   score_spans)	rE   rA   r   rk   r   r   egrm   	new_spansrH   r   rI   test_score_spans  sD   






r   c                  C   s   ddh} ddh}t  }t }|j| |d |j|j|jftdks#J t }|j| |d |j|j|jftdks;J || }|j|j|jftdksMJ ||7 }|j|j|jft|j|j|jfkseJ d S )Nhihoyo)candrk   )r^   r^   r^   )r   r   r   )r   r^   ra   )setr   r   	precisionrecallfscorer   )r   gold1gold2abcrH   rH   rI   test_prf_score  s   r   c                 C   s
  d}| |}ddd|_ | |}ddd|_ t||}tj|gdt|j  dd	d
d}tj|gdt|j  dd	dd}|d dksEJ |d dksMJ ||ksSJ tj|gdt|j  ddd}|d dkskJ tj|gdt|j  dd
d}|d dksJ d S )Nz	some textrV   r   )POSITIVENEGATIVEr   r   r   Fr   g?)labelsmulti_labelpositive_label	thresholdr   
cats_scoreT)r   r   r   cats_macro_fr^   )r   r   r   
score_catslistkeys)r   rA   gold_docr   rl   scores1scores2rm   rH   rH   rI   test_score_cats  sR   
r   )$r   numpy.testingr   r   r   spacy.lang.enr   spacy.scorerr   r   r   r	   r
   spacy.tokensr   r   spacy.trainingr   spacy.training.iob_utilsr   rz   r   r   fixturerJ   rK   rn   rq   r   r   r   r   r   r   r   r   rH   rH   rH   rI   <module>   sD    

&
-:48H/