o
    eic                     @   s$   d Z ddlmZ G dd deZdS )zLibrary for computing the BLEU score based on SacreBLEU

SacreBLEU github: https://github.com/mjpost/sacrebleu

Authors
 * Titouan Parcollet 2025
 * Mirco Ravanelli 2021
    )MetricStatsc                   @   s4   e Zd ZdZdddZdd Zddd	Zd
d ZdS )	BLEUStatsa+  A class for tracking corpus-level BLEU (https://www.aclweb.org/anthology/P02-1040.pdf). Each hypothesis can be matched against one or multiple references.

    Arguments
    ---------
    max_ngram_order: int, default 4
        The maximum length of the ngrams to use for BLEU scoring. Default is 4.

    Example
    -------
    >>> bleu = BLEUStats()
    >>> bleu.append(
    ...     ids=['utterance1', 'utterance2'],
    ...     predict=[
    ...         'The dog bit the man.',
    ...         'It was not surprising.'],
    ...     targets=[
    ...                ['The dog bit the man.', 'It was not unexpected.'],
    ...                ['The dog had bit the man.', 'No one was surprised.']
    ...             ]
    ... )
    >>> stats = bleu.summarize()
    >>> stats['BLEU']
    74.19446627365011
       c                 C   sP   zddl m} W n ty   td Y nw |   ||d| _g | _d | _d S )Nr   )BLEUz^Please install sacrebleu (https://pypi.org/project/sacrebleu/) in order to use the BLEU metric)max_ngram_order)sacrebleu.metricsr   ImportErrorprintclearbleupredictstargets)selfr   r    r   T/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/speechbrain/utils/bleu.py__init__'   s   
zBLEUStats.__init__c                 C   sn   | j | | j| | jdu r|| _dS t| jt|ks!J tt| jD ]}| j| ||  q(dS )a  Add stats to the relevant containers.
        * See MetricStats.append()
        Arguments
        ---------
        ids : list
            List of ids corresponding to utterances.
        predict : list[str]
            A str which represent the hypotheses. Of dimension [nb_hypotheses]
        targets : list[list[str]]
            List of list of reference. The dimensions are as follow:
            [nb_references, nb_hypotheses].
        N)idsextendr   r   lenrange)r   r   predictr   ir   r   r   append6   s   

zBLEUStats.appendNc                 C   s   | j | j| j}i }|j|d< |j|d< |j|j |d< |j|d< |j|d< |j|d< || _	|| _
| j
d | j
d< |durC| j
| S | j
S )	zaSummarize the BLEU and return relevant statistics.
        * See MetricStats.summarize()
        r   BPratiohyp_lenref_len
precisions
bleu_scoreN)r   corpus_scorer   r   scorebpsys_lenr   r   scoressummary)r   fieldr#   detailsr   r   r   	summarizeN   s   





zBLEUStats.summarizec                 C   s    | j s|   t| j|d dS )zoWrite all relevant info (e.g., error rate alignments) to file.
        * See MetricStats.write_stats()
        )fileN)r$   r'   r	   r#   )r   
filestreamr   r   r   write_statsf   s   zBLEUStats.write_stats)r   )N)__name__
__module____qualname____doc__r   r   r'   r*   r   r   r   r   r      s    

r   N)r.   speechbrain.utils.metric_statsr   r   r   r   r   r   <module>   s    	