o
    Mi.                     @   s   d dl mZ d dlZd dlZd dlmZmZ d dlZd dlZe	e
ZedddgZddd	d
Zdd ZG dd deZG dd deZG dd deZdS )    )print_functionN)chainproductEvaluationConfignum_samplessample_sizezFilename   : {name}
Num samples: {samplesize_count}
Sample size: {samplesize_avg}
F-score    : {fscore_avg:.3}
Precision  : {precision_avg:.3}
Recall     : {recall_avg:.3}z?{name:10} {precision_avg:6.3} {recall_avg:6.3} {fscore_avg:6.3}zB{name} & {precision_avg:.3} & {recall_avg:.3} & {fscore_avg:.3} \\)defaulttablelatexc                 C   s   t || |S )zNCreate a specific size sample from the compound list using a specific
    seed)randomRandomsample)compound_listsizeseed r   H/home/ubuntu/.local/lib/python3.10/site-packages/morfessor/evaluation.py_sample   s   r   c                   @   s^   e Zd ZdZdd eeeedZdddZ	dd	 Z
d
d Zdd Zdd Zdd Zdd ZdS )MorfessorEvaluationResulta  A MorfessorEvaluationResult is returned by a MorfessorEvaluation
    object. It's purpose is to store the evaluation data and provide nice
    formatting options.

    Each MorfessorEvaluationResult contains the data of 1 evaluation
    (which can have multiple samples).

    c                 C   s   t | t|  S N)sumlenxr   r   r   <lambda>*   s    z"MorfessorEvaluationResult.<lambda>)avgminmaxvaluescountNc                 C   s(   || _ g | _g | _g | _g | _d | _d S r   )	meta_data	precisionrecallfscore
samplesize_cache)selfr    r   r   r   __init__1   s   
z"MorfessorEvaluationResult.__init__c                 C   s   | j du r	|   | j | S )zRProvide dict style interface for all values (standard values and
        metadata)Nr%   _fill_cache)r&   itemr   r   r   __getitem__;   s   

z%MorfessorEvaluationResult.__getitem__c                 C   s:   | j | | j| | j| | j| d| _dS )z^Method used by MorfessorEvaluation to add the results of a single
        sample to the objectN)r!   appendr"   r#   r$   r%   )r&   r!   r"   f_scorer   r   r   r   add_data_pointC   s
   
z(MorfessorEvaluationResult.add_data_pointc                 C   s   |  td S )z Method for default visualizationr   )formatFORMAT_STRINGSr&   r   r   r   __str__N   s   z!MorfessorEvaluationResult.__str__c                    s&    fdddD  _  j  j dS )zQ Pre calculate all variable / function combinations and put them in
        cachec                    s8   i | ]} j  D ]\}}d |||t |q	qS )z{}_{})print_functionsitemsr/   getattr).0val	func_namefuncr1   r   r   
<dictcomp>U   s    z9MorfessorEvaluationResult._fill_cache.<locals>.<dictcomp>)r!   r"   r#   r$   N)r%   updater    r1   r   r1   r   r)   R   s   
z%MorfessorEvaluationResult._fill_cachec                 C   s   | j du r	|   | j S )z, Fill the cache (if necessary) and return itNr(   r1   r   r   r   
_get_cache[   s   
z$MorfessorEvaluationResult._get_cachec                 C   s   |j di |  S )z Format this object. The format string can contain all variables,
        e.g. fscore_avg, precision_values or any item from metadataNr   )r/   r<   )r&   format_stringr   r   r   r/   a   s   z MorfessorEvaluationResult.formatr   )__name__
__module____qualname____doc__r   r   listr   r3   r'   r+   r.   r2   r)   r<   r/   r   r   r   r   r       s    	

	r   c                   @   sx   e Zd ZdZdd ZeddfddZeddfdd	Zd
d Ze	dd Z
edddfddZedddfddZdS )MorfessorEvaluationa   Do the evaluation of one model, on one testset. The basic procedure is
    to create, in a stable manner, a number of samples and evaluate them
    independently. The stable selection of samples makes it possible to use
    the resulting values for Pair-wise statistical significance testing.

    reference_annotations is a standard annotation dictionary:
    {compound => ([annoation1],.. ) }
    c                    s>   i  _ | D ]\}}t fdd|D  j |< qi  _d S )Nc                 3   s    | ]
}t  |V  qd S r   tuple_segmentation_indices)r6   ar1   r   r   	<genexpr>t   s    
z/MorfessorEvaluation.__init__.<locals>.<genexpr>)	referencer4   rB   _samples)r&   reference_annotationscompoundanalysesr   r1   r   r'   p   s   
zMorfessorEvaluation.__init__
   i  c                    sR   t | jjj k rtd t| j   fddtjD | j	< dS )zbCreate, in a stable manner, n testsets of size x as defined in
        test_configuration
        z.The test set is too small for this sample sizec                    s   g | ]	}t  j|qS r   )r   r   )r6   ir   configurationr   r   
<listcomp>   s    z7MorfessorEvaluation._create_samples.<locals>.<listcomp>N)
r   rI   r   r   _loggerwarningsortedkeysrangerJ   r&   rQ   r   rP   r   _create_samplesy   s   
z#MorfessorEvaluation._create_samplesc                 C   s   || j vr
| | | j | S )a  Get a list of samples. A sample is a list of compounds.

        This method is stable, so each time it is called with a specific
        test_set and configuration it will return the same samples. Also this
        method caches the samples in the _samples variable.

        )rJ   rY   rX   r   r   r   get_samples   s   


zMorfessorEvaluation.get_samplesc           	         s   dd  t t| t| j @ }d}d}|D ]3}t|dk r"q|t fddt|| | j| D 7 }|t fddt|| | j| D 7 }q|t| }|t| }dd	| d	|   }|||t|fS )
z9Helper method to get the precision and recall of 1 samplec                 S   s<   t | dkrdS t t| t| }t | | tt |  S )Nr         ?)r   setfloat)refpreddiffr   r   r   calc_prop_distance   s   z9MorfessorEvaluation._evaluate.<locals>.calc_prop_distanceg           c                 3   s    | ]
\}} ||V  qd S r   r   r6   prra   r   r   rH          z0MorfessorEvaluation._evaluate.<locals>.<genexpr>c                 3   s    | ]
\}} ||V  qd S r   r   rc   rf   r   r   rH      rg   g       @r[   )rU   r\   rV   rI   r   r   r   )	r&   
predictionwordlist
recall_sum
precis_sumwordr!   r"   r-   r   rf   r   	_evaluate   s*   
zMorfessorEvaluation._evaluatec                 c   s.    d}| dd D ]}|t |7 }|V  q	dS )z>Method to transform a annotation into a tuple of split indicesr   Nr   )
annotationcur_lenrG   r   r   r   rF      s   z)MorfessorEvaluation._segmentation_indicesNc           	      C   s|   |du rddi}t |}t| |D ](\}}td| i }|D ]}t| ||d g||< q!|j| 	|  q|S )zGet the prediction of the test samples from the model and do the
        evaluation

        The meta_data object has preferably at least the key 'name'.

        NnameUNKNOWNEvaluating sample %sr   )
r   	enumeraterZ   rS   debugrE   rF   viterbi_segmentr.   rm   )	r&   modelrQ   r    merrO   r   rh   rL   r   r   r   evaluate_model   s   z"MorfessorEvaluation.evaluate_modelc                    s   dd   fdd|D }|du rddi}t |}t|D ]\}td| fd	d| D }|j|  q!|S )
z.Method for evaluating an existing segmentationc                 S   s,   | d }t dt| D ]}|| |  }q|S )Nr      )rW   r   )constructionsrL   rO   r   r   r   merge_constructions   s   zFMorfessorEvaluation.evaluate_segmentation.<locals>.merge_constructionsc                    s*   i | ]} |d  t |d  gqS )r{   rD   r6   r   )r}   r&   r   r   r:      s
    
z=MorfessorEvaluation.evaluate_segmentation.<locals>.<dictcomp>Nrr   rs   rt   c                    s   i | ]\}}| v r||qS r   r   )r6   kv)r   r   r   r:      s    )r   ru   rZ   rS   rv   r4   r.   rm   )r&   segmentationrQ   r    ry   rO   rh   r   )r}   r   r&   r   evaluate_segmentation   s   z)MorfessorEvaluation.evaluate_segmentation)r>   r?   r@   rA   r'   r   rY   rZ   rm   staticmethodrF   rz   r   r   r   r   r   rC   g   s    	

rC   c                   @   sP   e Zd ZdZedddZedd Zedd	 Z	
	dddZedd Z	dS )WilcoxonSignedRankzClass for doing statistical signficance testing with the Wilcoxon
    Signed-Rank test

    It implements the Pratt method for handling zero-differences and
    applies a 0.5 continuity correction for the z-statistic.

    prattTc                    s(  |dvrt |dkrttdd   t }tdd  D }tdd t| D }td	d t| D }t||}||d
  d }||d
  d| d
  }	 fddt	|D }
|	dtdd t
|
 D  8 }	t|	d }	|r||kr~dnd}nd}|| | |	 }dtt| S )N)wilcoxr   r   c                 S   s   | dkS Nr   r   )rG   r   r   r   r          z.WilcoxonSignedRank._wilcoxon.<locals>.<lambda>c                 S   s   g | ]}t |qS r   )absr6   r   r   r   r   rR         z0WilcoxonSignedRank._wilcoxon.<locals>.<listcomp>c                 s   s     | ]\}}|d kr|V  qdS r   Nr   r6   re   r   r   r   r   rH         z/WilcoxonSignedRank._wilcoxon.<locals>.<genexpr>c                 s   s     | ]\}}|d k r|V  qdS r   r   r   r   r   r   rH     r   r{   g      ?rb   c                    s    g | ]\}} | d kr|qS )r   r   )r6   rO   re   dr   r   rR     s           ?c                 s   s     | ]}||| d   V  qdS )r{   Nr   r~   r   r   r   rH     r   g      8@g      r   )
ValueErrorrB   filterr   r   	_rankdatar   zipr   ru   collectionsCounterr   mathsqrt_norm_cum_pdfr   )r   method
correctionr   ranksrank_sum_posrank_sum_negtestmeanstdevno_zero_rankszr   r   r   	_wilcoxon   s*   
zWilcoxonSignedRank._wilcoxonc                    sz   t  }| D ]
}||  d7  < qi  d}t| dd dD ]\}}||| d  d  |< ||7 }q fdd| D S )Nr{   c                 S   s   | d S r   r   r   r   r   r   r   !  r   z.WilcoxonSignedRank._rankdata.<locals>.<lambda>)keyrb   c                    s   g | ]} | qS r   r   r   	rank_dictr   r   rR   %  r   z0WilcoxonSignedRank._rankdata.<locals>.<listcomp>)r   r   rU   r4   )r   odr   cur_rankr7   r   r   r   r   r     s   
zWilcoxonSignedRank._rankdatac                 C   s   ddt | t d   S )z@Pure python implementation of the normal cumulative pdf functionr   rb   )r   erfr   )r   r   r   r   r   '  s   z WilcoxonSignedRank._norm_cum_pdffscore_valuesrr   c                    s    fdd|D }t dd | D rtd i S i }t| | D ]\}}| dd t|| || D |||f< q'|S )zTakes a set of evaluations (which should have the same
        test-configuration) and calculates the p-value for the Wilcoxon signed
        rank test

        Returns a dictionary with (name1,name2) keys and p-values as values.
        c                    s   i | ]	}|  | qS r   r   r6   re   name_propertyval_propertyr   r   r:   4  s    z8WilcoxonSignedRank.significance_test.<locals>.<dictcomp>c                 s   s    | ]	}t |d k V  qdS )rN   Nro   r~   r   r   r   rH   5  s    z7WilcoxonSignedRank.significance_test.<locals>.<genexpr>z1Too small number of samples for the Wilcoxon testc                 S   s   g | ]\}}|| qS r   r   )r6   v1v2r   r   r   rR   :  s    z8WilcoxonSignedRank.significance_test.<locals>.<listcomp>)anyr   rS   errorr   rV   r   r   )r&   evaluationsr   r   resultsrd   r1r2r   r   r   significance_test,  s   

z$WilcoxonSignedRank.significance_testc                 C   s   t tdd |  D }ttdd |D d}tdg|D ]}tdj||ddd	 qt  |D ]$}tdj||ddd	 |D ]}td
j| ||f |ddd	 qAt  q2dS )z>Nicely format a results table as returned by significance_testc                 s   s    | ]}|d  V  qdS r   r   r   r   r   r   rH   C      z1WilcoxonSignedRank.print_table.<locals>.<genexpr>c                 s   s    | ]}t |V  qd S r   ro   )r6   nr   r   r   rH   E  r       z
{:{width}})width|)endz{:{width}.5}N)rU   r\   rV   r   r   printr/   )r   names	col_widthhrr   name2r   r   r   print_table@  s   zWilcoxonSignedRank.print_tableN)r   T)r   rr   )
r>   r?   r@   rA   r   r   r   r   r   r   r   r   r   r   r      s    


r   )
__future__r   r   logging	itertoolsr   r   r   r   	getLoggerr>   rS   
namedtupler   r0   r   objectr   rC   r   r   r   r   r   <module>   s&    
G 
