o
    5tiD                     @   s   d Z ddlZddlZddlmZ ddlmZmZmZm	Z	m
Z
 ddlmZmZ ddlmZmZmZ dd	lmZ ed
ZdZddddddddddddZdefddZG dd deZG dd deZG dd  d eZdS )!z>The implementation of the BLEU metric (Papineni et al., 2002).    N)import_module)ListSequenceOptionalDictAny   )my_logsum_of_lists   )Score	SignatureMetric)extract_all_word_ngrams	sacrebleu   ztokenizer_none.NoneTokenizerztokenizer_zh.TokenizerZhztokenizer_13a.Tokenizer13az(tokenizer_intl.TokenizerV14Internationalztokenizer_char.TokenizerCharz#tokenizer_ja_mecab.TokenizerJaMecabz#tokenizer_ko_mecab.TokenizerKoMecabztokenizer_spm.TokenizerSPMz tokenizer_spm.Flores101Tokenizerz tokenizer_spm.Flores200Tokenizerztokenizer_spm.spBLEU1KTokenizer)nonezh13aintlcharja-mecabko-mecabspm	flores101	flores200z	spBLEU-1Knamec                 C   s*   t |  dd\}}ttd| d|S )z6Dynamically import tokenizer as importing all is slow..r   z.tokenizers.r   )_TOKENIZERSrsplitgetattrr   )r   module_name
class_name r#   J/home/ubuntu/.local/lib/python3.10/site-packages/sacrebleu/metrics/bleu.py_get_tokenizer"   s
   r%   c                       s&   e Zd ZdZdef fddZ  ZS )BLEUSignaturezA convenience class to represent the reproducibility signature for BLEU.

    :param args: key-value dictionary passed from the actual metric instance.
    argsc                    s   t  | | jddddd |d }tj| }|dur1|d }|du r(|}|d	|d
d7 }| j|d r:dnd|d rAdnd|d |d dS )z`BLEUSignature` initializer.cetoks)caseeffr*   smoothsmooth_methodNsmooth_value[z.2f]	lowercaselcmixedeffective_orderyesnotokenizer_signature)super__init___abbrupdateBLEUSMOOTH_DEFAULTSinfo)selfr'   
smooth_str
smooth_def
smooth_val	__class__r#   r$   r;   /   s(   
zBLEUSignature.__init__)__name__
__module____qualname____doc__dictr;   __classcell__r#   r#   rE   r$   r&   *   s    r&   c                       sJ   e Zd ZdZdedee dee dee dededef fd	d
Z  ZS )	BLEUScorea  A convenience class to represent BLEU scores.

    :param score: The BLEU score.
    :param counts: List of counts of correct ngrams, 1 <= n <= max_ngram_order
    :param totals: List of counts of total ngrams, 1 <= n <= max_ngram_order
    :param precisions: List of precisions, 1 <= n <= max_ngram_order
    :param bp: The brevity penalty.
    :param sys_len: The cumulative system length.
    :param ref_len: The cumulative reference length.
    scorecountstotals
precisionsbpsys_lenref_lenc                    s   t  d| || _|| _|| _|| _|| _|| _ddd | jD | _	| jr.| j| j nd| _
| j	 d| jdd| _|  jd	| j
dd
| jdd7  _|  jd| jdd7  _dS )z`BLEUScore` initializer.r>   /c                 S   s   g | ]}|d qS )z.1fr#   .0pr#   r#   r$   
<listcomp>g       z&BLEUScore.__init__.<locals>.<listcomp>r   z (BP = z.3f zratio = z hyp_len = dz
ref_len = )N)r:   r;   rR   rO   rP   rS   rT   rQ   joinprec_strratio_verbose)rA   rN   rO   rP   rQ   rR   rS   rT   rE   r#   r$   r;   [   s   $zBLEUScore.__init__)	rG   rH   rI   rJ   floatr   intr;   rL   r#   r#   rE   r$   rM   P   s    
rM   c                       s  e Zd ZU dZdddddZeeee f e	d< e
 ZdZdd	d
dZeZdddddedddf	dededee dedee dedededeeee   f fddZedddefdee dee dedededededefd d!Zd"edefd#d$Zd%ee defd&d'Zd%eee  defd(d)Zd*ed+ee defd,d-Zd.ee deeef fd/d0Zd1ed2edee fd3d4Z d1edee def fd5d6Z!  Z"S )7r>   aw  Computes the BLEU metric given hypotheses and references.

    :param lowercase: If True, lowercased BLEU is computed.
    :param force: Ignore data that looks already tokenized.
    :param tokenize: The tokenizer to use. If None, defaults to language-specific tokenizers with '13a' as the fallback default.
    :param smooth_method: The smoothing method to use ('floor', 'add-k', 'exp' or 'none').
    :param smooth_value: The smoothing value for `floor` and `add-k` methods. `None` falls back to default value.
    :param max_ngram_order: If given, it overrides the maximum n-gram order (default: 4) when computing precisions.
    :param effective_order: If `True`, stop including n-gram orders for which precision is 0. This should be
        `True`, if sentence-level BLEU will be computed.
    :param trg_lang: An optional language code to raise potential tokenizer warnings.
    :param references: A sequence of reference documents with document being
    defined as a sequence of reference strings. If given, the reference n-grams
    and lengths will be pre-computed and cached for faster BLEU computation
    across many systems.
    Ng?r   )r   flooradd-kexpr?   r   r   r   r   )r   jakoFrf    r3   forcetokenizer/   r0   max_ngram_orderr6   trg_lang
referencesc
                    s   t    || _|| _|| _|| _|| _|| _|| _| j| j	
 v s&J d|du r:| j}
| j| jv r9| j| j }
n,|}
| jdkrJ|
dkrJtd | jdkrX|
dkrXtd | jdkrf|
d	krftd
 t|
 | _| j | _|	dur~| |	| _dS dS )z`BLEU` initializer.z,Unknown smooth_method {self.smooth_method!r}Nr   z7Consider using the 'zh' or 'spm' tokenizer for Chinese.rg   r   z>Consider using the 'ja-mecab' or 'spm' tokenizer for Japanese.rh   r   z<Consider using the 'ko-mecab' or 'spm' tokenizer for Korean.)r:   r;   _forcerm   r3   r0   r/   rl   r6   r?   keysTOKENIZER_DEFAULT_TOKENIZER_MAPsacreloggerwarningr%   	tokenizer	signaturer9   _cache_references
_ref_cache)rA   r3   rj   rk   r/   r0   rl   r6   rm   rn   best_tokenizerrE   r#   r$   r;      sD   

zBLEU.__init__r   correcttotalrS   rT   returnc                 C   s  |t j v sJ d|du rt j| }d}||k r)|dkr'td||  nd}dg| }	t| s<td| ||	|||S d}
|}tdt|	d D ]p}|dkrg|dkrg| |d   |7  < ||d   |7  < ||d  dkrq nI|ru|}| |d  dkr|dkr|
d	9 }
d
|
||d    |	|d < qI|dkrd
| ||d   |	|d < qId
| |d   ||d   |	|d < qI|tt	dd |	d| D |  }t|| ||	|||S )aE  Computes BLEU score from its sufficient statistics with smoothing.

        Smoothing methods (citing "A Systematic Comparison of Smoothing Techniques for Sentence-Level BLEU",
        Boxing Chen and Colin Cherry, WMT 2014: http://aclweb.org/anthology/W14-3346)

        - none: No smoothing.
        - floor: Method 1 (requires small positive value (0.1 in the paper) to be set)
        - add-k: Method 2 (Generalizing Lin and Och, 2004)
        - exp: Method 3 (NIST smoothing method i.e. in use with mteval-v13a.pl)

        :param correct: List of counts of correct ngrams, 1 <= n <= max_ngram_order
        :param total: List of counts of total ngrams, 1 <= n <= max_ngram_order
        :param sys_len: The cumulative system length
        :param ref_len: The cumulative reference length
        :param smooth_method: The smoothing method to use ('floor', 'add-k', 'exp' or 'none')
        :param smooth_value: The smoothing value for `floor` and `add-k` methods. `None` falls back to default value.
        :param effective_order: If `True`, stop including n-gram orders for which precision is 0. This should be
            `True`, if sentence-level BLEU will be computed.
        :param max_ngram_order: If given, it overrides the maximum n-gram order (default: 4) when computing precisions.
        :return: A `BLEUScore` instance.
        z'Unknown smooth_method {smooth_method!r}Ng      ?r   r   g        re   rf   r   g      Y@rd   c                 S   s   g | ]}t |qS r#   )r	   rV   r#   r#   r$   rY   #  rZ   z%BLEU.compute_bleu.<locals>.<listcomp>)
r>   r?   rp   mathrf   anyrM   rangelensum)rz   r{   rS   rT   r/   r0   r6   rl   rR   rQ   smooth_mteval	eff_ordernrN   r#   r#   r$   compute_bleu   sB   

&zBLEU.compute_bleusentc                 C   s   | j r| }| | S )zGiven a sentence, lowercases (optionally) and tokenizes it
        :param sent: The input sentence string.
        :return: The pre-processed output string.
        )r3   lowerru   rstrip)rA   r   r#   r#   r$   _preprocess_segment'  s   zBLEU._preprocess_segmentstatsc              
   C   sN   | j |dd| j  |d| j d t|d t|d | j| j| j| jdS )zComputes the final score from already aggregated statistics.

        :param stats: A list or numpy array of segment-level statistics.
        :return: A `BLEUScore` object.
        r   Nr   r   )rz   r{   rS   rT   r/   r0   r6   rl   )r   rl   rc   r/   r0   r6   rA   r   r#   r#   r$   _compute_score_from_stats0  s   zBLEU._compute_score_from_statsc                 C   s   |  t|S )zComputes the final BLEU score given the pre-computed corpus statistics.

        :param stats: A list of segment-level statistics
        :return: A `BLEUScore` instance.
        )r   r
   r   r#   r#   r$   _aggregate_and_compute?  s   zBLEU._aggregate_and_computehyp_lenref_lensc                 C   sP   d\}}|D ]}t || }|dks||k r|}|}q||kr%||k r%|}q|S )a  Given a hypothesis length and a list of reference lengths, returns
        the closest reference length to be used by BLEU.

        :param hyp_len: The hypothesis length.
        :param ref_lens: A list of reference lengths.
        :return: The closest reference length.
        )r   r   )abs)rA   r   r   closest_diffclosest_lenrT   diffr#   r#   r$   _get_closest_ref_lenG  s   zBLEU._get_closest_ref_lenrefsc           	      C   sj   d}g }|D ])}t |d| j\}}|| |du r|}q| D ]\}}t|| |||< q!q||dS )aX  Given a list of reference segments, extract the n-grams and reference lengths.
        The latter will be useful when comparing hypothesis and reference lengths for BLEU.

        :param refs: A sequence of strings.
        :return: A dictionary that will be passed to `_compute_segment_statistics()`
        through keyword arguments.
        Nr   )
ref_ngramsr   )r   rl   appenditemsmax)	rA   r   ngramsr   refthis_ngramsrT   ngramcountr#   r#   r$   _extract_reference_info[  s   

zBLEU._extract_reference_info
hypothesis
ref_kwargsc                 C   s   |d |d }}t |d| j\}}| ||}dd t| jD }|dd }	| D ]#\}
}t|
d }|	|  |7  < |
|v rO||  t|||
 7  < q,||g| |	 S )a  Given a (pre-processed) hypothesis sentence and already computed
        reference n-grams & lengths, returns the best match statistics across the
        references.

        :param hypothesis: Hypothesis sentence.
        :param ref_kwargs: A dictionary with `refs_ngrams`and `ref_lens` keys
        that denote the counter containing all n-gram counts and reference lengths,
        respectively.
        :return: A list of integers with match statistics.
        r   r   r   c                 S   s   g | ]}d qS )r   r#   )rW   ir#   r#   r$   rY     s    z4BLEU._compute_segment_statistics.<locals>.<listcomp>N)r   rl   r   r   r   r   min)rA   r   r   r   r   
hyp_ngramsr   rT   rz   r{   	hyp_ngram	hyp_countr   r#   r#   r$   _compute_segment_statisticsv  s   z BLEU._compute_segment_statisticsc                    s   | j std t ||S )zCompute the metric for a single sentence against a single (or multiple) reference(s).

        :param hypothesis: A single hypothesis string.
        :param references: A sequence of reference strings.
        :return: a `BLEUScore` object.
        zFIt is recommended to enable `effective_order` for sentence-level BLEU.)r6   rs   rt   r:   sentence_score)rA   r   rn   rE   r#   r$   r     s
   zBLEU.sentence_score)#rG   rH   rI   rJ   r?   r   strr   rb   __annotations__r   rp   
TOKENIZERSrq   rr   r&   _SIGNATURE_TYPEMAX_NGRAM_ORDERboolrc   r   r;   staticmethodr   rM   r   r   r   r   r   r   r   r   r   rL   r#   r#   rE   r$   r>   p   s   
 8R	
&%r>   )rJ   r}   logging	importlibr   typingr   r   r   r   r   utilsr	   r
   baser   r   r   helpersr   	getLoggerrs   r   r   r   r%   r&   rM   r>   r#   r#   r#   r$   <module>   s4    
& 