o
    5tiA                     @   s   d Z ddlZddlZddlZddlmZmZ ddlmZm	Z	m
Z
mZmZ ddlmZ edZG dd	 d	ZG d
d dZG dd dedZdS )zThe base `Score`, `Metric` and `Signature` classes to derive from.

`Metric` is an abstract class that enforces the implementation of a set
of abstract methods. This way, a correctly implemented metric will work
seamlessly with the rest of the codebase.
    N)ABCMetaabstractmethod)AnyDictListOptionalSequence   )__version__	sacrebleuc                   @   sd   e Zd ZdZdedefddZ				dd	ed
edededef
ddZ	de
d  fddZdd ZdS )ScorezA base score class to derive from.

    :param name: The name of the underlying metric.
    :param score: A floating point number for the final metric.
    namescorec                 C   s"   || _ || _d| _d| _d| _dS )z`Score` initializer.g       N)r   r   _mean_ci_verbose)selfr   r    r   J/home/ubuntu/.local/lib/python3.10/site-packages/sacrebleu/metrics/base.py__init__   s
   
zScore.__init__r	   Fr   width
score_only	signatureis_jsonreturnc                 C   s2  | j t| jd| d|d}| jd| d}| jdkrR| jd| d}| jd| d}d| d| }	|d|	 d7 }|rRt||d	< t||d
< |	|d< |r\| j  d| n| j }
|
 d| }
| jrv|
d| j 7 }
| j|d< |rz|S |r|dD ]}|d\}}|||< qtj|dddS |
S )a  Returns a pretty representation of the score.
        :param width: Floating point decimal precision width.
        :param score_only: If `True`, and the format is not `json`,
        returns a single score string.
        :param signature: A string representation of the given `Signature`
        instance.
        :param is_json: If `True`, will output the score in JSON string.
        :return: A plain or JSON-formatted string representation.
        .f)r   r   r   r   u   μ = u    ± z ()confidence_meanconfidence_var
confidence|z =  verbose_score:   F)indentensure_ascii)	r   floatr   r   r   r   splitjsondumps)r   r   r   r   r   dscr   r    confidence_str
full_scoreparamkeyvaluer   r   r   format&   s6   


zScore.formatscoresc                 C   s^   t dd |D }t|}|d }|| d }|| || }}d||  | _t|| _dS )zTakes a list of scores and stores mean, stdev and 95% confidence
        interval around the mean.

        :param scores: A list of `Score` objects obtained from bootstrap
        resampling for example.
        c                 S   s   g | ]}|j qS r   )r   .0xr   r   r   
<listcomp>c       z%Score.estimate_ci.<locals>.<listcomp>(   r&   g      ?N)sortedlenr   
statisticsmeanr   )r   r5   
raw_scoresn	lower_idx	upper_idxlowerupperr   r   r   estimate_ci[   s   zScore.estimate_cic                 C      |   S )z&Returns a human readable score string.r4   r   r   r   r   __repr__m      zScore.__repr__N)r	   Fr   F)__name__
__module____qualname____doc__strr)   r   intboolr4   r   rF   rJ   r   r   r   r   r      s(    
5r   c                   @   sT   e Zd ZdZdefddZddedefdd	Zd
ede	fddZ
dd Zdd ZdS )	SignaturezA convenience class to represent sacreBLEU reproducibility signatures.

    :param args: key-value dictionary passed from the actual metric instance.
    argsc                 C   s   ddddddddd	d
	| _ d|vrtd|d }|dkrd}t||ddd|dd|dd|dd|dd|ddd	| _dS )z`Signature` initializer.v#tlSobsarrs)	versionnrefstestlangsubsetoriglangr[   r\   seednum_refsz?Number of references unknown, please evaluate the metric first.varn_bootstrapNrd   test_setlangpairrc   rb   )	r^   r_   r[   r\   rd   r`   ra   rc   rb   )_abbr
ValueErrorr
   getinfo)r   rT   re   r   r   r   r   x   s6   





zSignature.__init__Fshortr   c                 C   s   g }t | j }|d |dg D ])}| j| }|dur<t|tr)|r'dnd}|r0| j| n|}|| d|  qd|S )zReturns a string representation of the signature.

        :param short: If True, shortened signature is produced.
        :return: A string representation of the signature.
        r^   Nyesnor%   r"   )	listrn   keysremove
isinstancerR   rk   appendjoin)r   ro   pairsrs   r   r3   
final_namer   r   r   r4      s   



zSignature.formatr2   r3   c                 C   s   || j |< dS )zAdd a new item or update an existing one.

        :param key: The key to use in the dictionary.
        :param value: The associated value for the `key`.
        N)rn   )r   r2   r3   r   r   r   update   s   zSignature.updatec                 C   rG   z*Returns a human-readable signature string.rH   rI   r   r   r   __str__   rK   zSignature.__str__c                 C   rG   r{   rH   rI   r   r   r   rJ      rK   zSignature.__repr__N)F)rL   rM   rN   rO   dictr   rR   rP   r4   r   rz   r|   rJ   r   r   r   r   rS   r   s    'rS   c                	   @   s|  e Zd ZdZeZdd Zdedee fddZ	dee de
eee   fd	d
Zedeee  defddZedee defddZededefddZedee deeef fddZedededee fddZdeee  dee fddZdee de
eee   defddZdedee defd d!Z	"d)dee de
eee   d#edefd$d%Zdefd&d'Zd(S )*MetriczA base class for all metrics that ensures the implementation of some
    methods. Much of the common functionality is moved to this base class
    from other metrics.c                 C   s   d| _ d| _d| _d| _dS )z`Metric` initializer.NT)
_ref_cache_forcerh   rd   rI   r   r   r   r      s   
zMetric.__init__hyprefsc                 C   sn   | j j}d}t|tsd}nt|tst|tsd}nt|d ts*|d dur*d}|r5t| d| dS )zPerforms sanity checks on `sentence_score` method's arguments.

        :param hyp: A single hypothesis string.
        :param refs: A sequence of reference strings.
        Nz&The argument `hyp` should be a string.z4The argument `refs` should be a sequence of strings.r   z*Each element of `refs` should be a string.: )	__class__rL   ru   rP   r   	TypeError)r   r   r   prefixerr_msgr   r   r   _check_sentence_score_args   s   
z!Metric._check_sentence_score_argshypsc                 C   s   | j j}d}t|tsd}nt|d tsd}ntdd |D r#d}|durLt|ts/d}nt|d ts9d	}nt|d d tsL|d d durLd}|rWt| d
| dS )a1  Performs sanity checks on `corpus_score` method's arguments.

        :param hypses: A sequence of hypothesis strings.
        :param refs: A sequence of reference documents with document being
        defined as a sequence of reference strings. If `None`, cached references
        will be used.
        Nz'`hyps` should be a sequence of strings.r   z*Each element of `hyps` should be a string.c                 s   s    | ]}|d u V  qd S Nr   )r7   liner   r   r   	<genexpr>   s    z2Metric._check_corpus_score_args.<locals>.<genexpr>z$Undefined line in hypotheses stream!z3`refs` should be a sequence of sequence of strings.z7Each element of `refs` should be a sequence of strings.r   )r   rL   ru   r   rP   anyr   )r   r   r   r   r   r   r   r   _check_corpus_score_args   s$   

"zMetric._check_corpus_score_argsstatsr   c                 C      dS )zComputes the final score given the pre-computed match statistics.

        :param stats: A list of segment-level statistics.
        :return: A `Score` instance.
        Nr   r   r   r   r   r   _aggregate_and_compute     zMetric._aggregate_and_computec                 C   r   )zComputes the final score from already aggregated statistics.

        :param stats: A list or numpy array of segment-level statistics.
        :return: A `Score` object.
        Nr   r   r   r   r   _compute_score_from_stats  r   z Metric._compute_score_from_statssentc                 C   r   )zA wrapper around the metric's tokenization and pre-processing logic.
        This should be implemented for reference caching to work correctly.

        :param sent: The input sentence.
        :return: The pre-processed output sentence.
        Nr   )r   r   r   r   r   _preprocess_segment     zMetric._preprocess_segmentc                 C   r   )a  Given a list of reference segments, extract the required
        information (such as n-grams for BLEU and chrF). This should be implemented
        for the generic `_cache_references()` to work across all metrics.

        :param refs: A sequence of strings.
        Nr   )r   r   r   r   r   _extract_reference_info)  r   zMetric._extract_reference_info
hypothesis
ref_kwargsc                 C   r   )a  Given a (pre-processed) hypothesis sentence and already computed
        reference info, returns the best match statistics across the
        references. The return type is usually a List of ints or floats.

        :param hypothesis: A pre-processed hypothesis sentence.
        :param ref_kwargs: A dictionary with reference-related information
        within. This is formulated as a dictionary as different metrics may
        require different information regarding a reference segment.
        Nr   )r   r   r   r   r   r   _compute_segment_statistics3  s   z"Metric._compute_segment_statistics
referencesc                    s~   g }t  }t| D ]!}dd |D }|t|  fdd|D }| | q	t|dkr:t|d  _|S d _|S )a&  Given the full set of document references, extract segment n-grams
        (or other necessary information) for caching purposes.

        :param references: A sequence of reference documents with document being
        defined as a sequence of reference strings. A particular reference
        segment can be '' or `None` to allow the use of variable number
        of references per segment.
        :return: A list where each element is a tuple of segment n-grams and
        reference lengths, as returned by `_extract_reference_info()`.
        c                 S   s   g | ]}|d ur|qS r   r   r6   r   r   r   r9   T  s    z,Metric._cache_references.<locals>.<listcomp>c                    s   g | ]}  |qS r   )r   r6   rI   r   r   r9   Z  s    r&   r   rf   )setzipaddr=   rv   r   rr   re   )r   r   	ref_cachere   r   linesr   rI   r   _cache_referencesB  s   zMetric._cache_references
hypothesesc                 C   s   |r|  |}n| jr| j}ntdg }d}t||D ]\}}| js,|dr,|d7 }| |}|| || q|dkrNt	
d t	
d t	
d |S )	a  Reads the corpus and returns sentence-level match statistics for
        faster re-computations esp. during statistical tests.

        :param hypotheses: A sequence of hypothesis strings.
        :param references: A sequence of reference documents with document being
        defined as a sequence of reference strings. If `None`, cached references
        will be used.
        :return: A list where each sublist corresponds to segment statistics.
        z.No references provided and the cache is empty.r   z .r&   d   z5That's 100 lines that end in a tokenized period ('.')zQIt looks like you forgot to detokenize your test data, which may hurt your score.zpIf you insist your data is detokenized, or don't care, you can suppress this message with the `force` parameter.)r   r   RuntimeErrorr   r   endswithr   rv   r   sacreloggerwarning)r   r   r   r   r   	tok_countr   r   r   r   r   _extract_corpus_statisticsg  s*   

z!Metric._extract_corpus_statisticsc                 C   s.   |  || | |gdd |D }| |S )zCompute the metric for a single sentence against a single (or multiple) reference(s).

        :param hypothesis: A single hypothesis string.
        :param references: A sequence of reference strings.
        :return: A `Score` object.
        c                 S   s   g | ]}|gqS r   r   )r7   r   r   r   r   r9     r:   z)Metric.sentence_score.<locals>.<listcomp>)r   r   r   )r   r   r   r   r   r   r   sentence_score  s
   
zMetric.sentence_scorer&   rh   c                 C   s\   |  || | ||}| |}|dkr,ddlm} || _||| |\| _}|| |S )a  Compute the metric for a corpus against a single (or multiple) reference(s).

        :param hypotheses: A sequence of hypothesis strings.
        :param references: A sequence of reference documents with document being
        defined as a sequence of reference strings. If `None`, cached references
        will be used.
        :param n_bootstrap: If > 1, provides 95% confidence interval around true mean
        using bootstrap resampling with `n_bootstrap` samples.
        :return: A `Score` object.
        r&   r	   )_bootstrap_resample)r   r   r   significancer   rh   rd   rF   )r   r   r   rh   r   actual_scorer   	bs_scoresr   r   r   corpus_score  s   

zMetric.corpus_scorec                 C   s   |  | jS )zCreates and returns the signature for the metric. The creation
        of signatures is delayed as the number of references is resolved
        only at the point of reference caching.)_SIGNATURE_TYPE__dict__rI   r   r   r   get_signature  s   zMetric.get_signatureN)r&   )rL   rM   rN   rO   rS   r   r   rP   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rQ   r   r   r   r   r   r   r~      s\    
 	 	%
-
$r~   )	metaclass)rO   r+   loggingr>   abcr   r   typingr   r   r   r   r   r^   r
   	getLoggerr   r   rS   r~   r   r   r   r   <module>   s    
_S