o
    5tiP                     @   s   d Z ddlmZ ddlmZ ddlmZ ddlZddlZddlmZ ddl	Z
ddlZddlmZ G dd	 d	ed	g d
ZG dd deejdZG dd dedg dZG dd deZdd ZdS )zLibrary for scoring and evaluation of text samples.

Aggregation functions use bootstrap resampling to compute confidence intervals
as per the original ROUGE perl implementation.
    )absolute_import)division)print_functionN)Dict)rangec                   @      e Zd ZdZdS )Scorez9Tuple containing precision, recall, and f-measure values.N__name__
__module____qualname____doc__ r   r   G/home/ubuntu/.local/lib/python3.10/site-packages/rouge_score/scoring.pyr   "       r   )	precisionrecallfmeasurec                	   @   s4   e Zd ZdZejdededeeef fddZ	dS )
BaseScorerzBase class for Scorer objects.target
predictionreturnc                 C   s   dS )a  Calculates score between the target and prediction.

    Args:
      target: Text containing the target (ground truth) text.
      prediction: Text containing the predicted text.

    Returns:
      A dict mapping each score_type (string) to Score object.
    Nr   )selfr   r   r   r   r   score*   s    zBaseScorer.scoreN)
r
   r   r   r   abcabstractmethodstrr   r   r   r   r   r   r   r   '   s    $r   )	metaclassc                   @   r   )AggregateScorez1Tuple containing confidence intervals for scores.Nr	   r   r   r   r   r   7   r   r   lowmidhighc                   @   s2   e Zd ZdZdddZdd Zdd	 Zd
d ZdS )BootstrapAggregatora  Aggregates scores to provide confidence intervals.

  Sample usage:
    scorer = rouge_scorer.RougeScorer(['rouge1', 'rougeL'])
    aggregator = Aggregator()
    aggregator.add_scores(scorer.score("one two three", "one two"))
    aggregator.add_scores(scorer.score("one two five six", "seven eight"))
    result = aggregator.aggregate()
    print result
    {'rougeL': AggregateScore(
         low=Score(precision=0.0, recall=0.0, fmeasure=0.0),
         mid=Score(precision=0.5, recall=0.33, fmeasure=0.40),
         high=Score(precision=1.0, recall=0.66, fmeasure=0.80)),
     'rouge1': AggregateScore(
         low=Score(precision=0.0, recall=0.0, fmeasure=0.0),
         mid=Score(precision=0.5, recall=0.33, fmeasure=0.40),
         high=Score(precision=1.0, recall=0.66, fmeasure=0.80))}
  ffffff?  c                 C   sD   |dk s|dkrt d|dkrt d|| _|| _tt| _dS )a  Initializes a BootstrapAggregator object.

    Args:
      confidence_interval: Confidence interval to compute on the mean as a
        decimal.
      n_samples: Number of samples to use for bootstrap resampling.

    Raises:
      ValueError: If invalid argument is given.
    r      z+confidence_interval must be in range [0, 1]zn_samples must be positiveN)
ValueError
_n_samples_confidence_intervalcollectionsdefaultdictlist_scores)r   confidence_interval	n_samplesr   r   r   __init__P   s   zBootstrapAggregator.__init__c                 C   s(   t |D ]\}}| j| | qdS )zAdds a sample for future aggregation.

    Args:
      scores: Dict mapping score_type strings to a namedtuple object/class
        representing a score.
    N)six	iteritemsr-   append)r   scores
score_typer   r   r   r   
add_scorese   s   zBootstrapAggregator.add_scoresc                    sp   i }t | jD ]-\}tt}| | t fddtdD }t|d |d |d d||< q|S )zAggregates scores previously added using add_scores.

    Returns:
      A dict mapping score_type to AggregateScore objects.
    c                 3   s*    | ]}d  j  |ddf  V  qdS )r   N)	__class__).0jpercentilesr4   r   r   	<genexpr>   s   ( z0BootstrapAggregator.aggregate.<locals>.<genexpr>   r   r&      r   )	r1   r2   r-   npvstacktuple_bootstrap_resampler   r   )r   resultr5   score_matrix	intervalsr   r:   r   	aggregatep   s   
zBootstrapAggregator.aggregatec                 C   s   t | j|jd f}t| jD ](}t jjt |jd |jd d}||ddf }t j|dd||ddf< qd| j	 d }dt 
|dd| g }t j||ddS )	a  Performs bootstrap resampling on a matrix of scores.

    Args:
      matrix: A 2-d matrix of (sample, measure).

    Returns:
      A 2-d matrix of (bounds, measure). There are three bounds: low (row 0),
      mid (row 1) and high (row 2). Mid is always the mean, while low and high
      bounds are specified by self._confidence_interval (which defaults to 0.95
      meaning it will return the 2.5th and 97.5th percentiles for a 95%
      confidence interval on the mean).
    r&   r   )sizeN)axisr>   d   g      ?)r?   zerosr(   shaper   randomchoicearangemeanr)   array
percentile)r   matrixsample_meani
sample_idxsamplepercentile_deltaqr   r   r   rB      s   z'BootstrapAggregator._bootstrap_resampleN)r$   r%   )r
   r   r   r   r0   r6   rF   rB   r   r   r   r   r#   <   s    
r#   c                 C   s$   | | dkrd|  | | |  S dS )z5Computes f-measure given precision and recall values.r   r>   g        r   )r   r   r   r   r   r      s   r   )r   
__future__r   r   r   r   r*   typingr   numpyr?   r1   	six.movesr   
namedtupler   objectABCMetar   r   r#   r   r   r   r   r   <module>   s&   

e