o
    .wi                     @   sJ  d dl mZ d dlmZ d dlmZmZmZ d dlZd dlm	Z	m
Z
 dee dedefd	d
Zdedee fddZdefdee deee  de	de	de	de	dedeegee f dee	e	f fddZde	de	de	de	dedee dede	fddZ			ddeeee f deeeee f  dededeee  de	fddZdS )     )Counter)Sequence)CallableOptionalUnionN)Tensortensorngram_input_listn_gramreturnc                 C   s\   t  }td|d D ]!}tt| | d D ]}t| |||  }||  d7  < qq
|S )a  Count how many times each word appears in a given text with ngram.

    Args:
        ngram_input_list: A list of translated text or reference texts
        n_gram: gram value ranged 1 to 4

    Return:
        ngram_counter: a collections.Counter object of ngram

       )r   rangelentuple)r	   r
   ngram_counterij	ngram_key r   ^/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/torchmetrics/functional/text/bleu.py_count_ngram   s   r   sentencec                 C   s   |   S )zTokenizes sentence into list of words.

    Args:
        sentence: A sentence separated by white space.

    Return:
        List of words

    )split)r   r   r   r   _tokenize_fn0   s   
r      predstarget	numeratordenominator	preds_len
target_len	tokenizerc                    s   fdd|D }fdd| D }	t |	|D ]c\ }
|t 7 }dd |
D } fdd|D }|||t| 7 }t |}t }|
D ]	}|t||O }qF||@ }|D ]}|t|d   || 7  < qV|D ]}|t|d   || 7  < qiq||fS )a  Update and returns variables required to compute the BLEU score.

    Args:
        preds: An iterable of machine translated corpus
        target: An iterable of iterables of reference corpus
        numerator: Numerator of precision score (true positives)
        denominator: Denominator of precision score (true positives + false positives)
        preds_len: count of words in a candidate prediction
        target_len: count of words in a reference translation
        target: count of words in a reference translation
        n_gram: gram value ranged 1 to 4
        tokenizer: A function that turns sentence into list of words

    c                    s   g | ]} fd d|D qS )c                       g | ]
}|r
 |ng qS r   r   .0liner!   r   r   
<listcomp>U       z1_bleu_score_update.<locals>.<listcomp>.<listcomp>r   )r$   tr&   r   r   r'   U   s    z&_bleu_score_update.<locals>.<listcomp>c                    r"   r   r   r#   r&   r   r   r'   V   r(   c                 S   s   g | ]}t |qS r   )r   r$   tgtr   r   r   r'   Z   s    c                    s   g | ]
}t t | qS r   )absr   )r$   x)predr   r   r'   [   r(   r   )zipr   indexminr   r   )r   r   r   r   r   r    r
   r!   target_preds_targetstarget_len_listtarget_len_diffpreds_countertarget_counterr+   ngram_counter_clipcounter_clipcounterr   )r.   r!   r   _bleu_score_update=   s$   
r<   weightssmoothc              
   C   s   |j }t|dkrtd|dS |r4tt|tj||dt|tj||d}|d |d  |d< n|| }t||dt| }	tt	|	}
| |krUtd|dntd||   }||
 S )a  Compute the BLEU score.

    Args:
        preds_len: count of words in a candidate translation
        target_len: count of words in a reference translation
        numerator: Numerator of precision score (true positives)
        denominator: Denominator of precision score (true positives + false positives)
        n_gram: gram value ranged 1 to 4
        weights: Weights used for unigrams, bigrams, etc. to calculate BLEU score.
        smooth: Whether to apply smoothing

            )devicer         ?r   )
r@   r1   r   torchdivaddoneslogexpsum)r   r    r   r   r
   r=   r>   r@   precision_scoreslog_precision_scoresgeometric_meanbrevity_penaltyr   r   r   _bleu_score_computen   s   &rM   Fc              	   C   s   t | tr| gn| }dd |D }t|t|kr'tdt| dt| |dur=t||kr=tdt| d| |du rHd| g| }t|}t|}td}	td}
t|||||	|
|t\}	}
t	|	|
|||||S )	a3  Calculate `BLEU score`_ of machine translated text with one or more references.

    Args:
        preds: An iterable of machine translated corpus
        target: An iterable of iterables of reference corpus
        n_gram: Gram value ranged from 1 to 4
        smooth: Whether to apply smoothing - see [2]
        weights:
            Weights used for unigrams, bigrams, etc. to calculate BLEU score.
            If not provided, uniform weights are used.

    Return:
        Tensor with BLEU Score

    Raises:
        ValueError: If ``preds`` and ``target`` corpus have different lengths.
        ValueError: If a length of a list of weights is not ``None`` and not equal to ``n_gram``.

    Example:
        >>> from torchmetrics.functional.text import bleu_score
        >>> preds = ['the cat is on the mat']
        >>> target = [['there is a cat on the mat', 'a cat is on the mat']]
        >>> bleu_score(preds, target)
        tensor(0.7598)

    References:
        [1] BLEU: a Method for Automatic Evaluation of Machine Translation by Papineni,
        Kishore, Salim Roukos, Todd Ward, and Wei-Jing Zhu `BLEU`_

        [2] Automatic Evaluation of Machine Translation Quality Using Longest Common Subsequence
        and Skip-Bigram Statistics by Chin-Yew Lin and Franz Josef Och `Machine Translation Evolution`_

    c                 S   s    g | ]}t |tr|gn|qS r   )
isinstancestrr*   r   r   r   r'      s     zbleu_score.<locals>.<listcomp>zCorpus has different size z != Nz5List of weights has different weights than `n_gram`: rA   r?   )
rN   rO   r   
ValueErrorrB   zerosr   r<   r   rM   )r   r   r
   r>   r=   r3   r2   r   r   r   r    r   r   r   
bleu_score   s    (

rR   )r   FN)collectionsr   collections.abcr   typingr   r   r   rB   r   r   rO   intr   r   r   r<   floatboolrM   rR   r   r   r   r   <module>   sz   

	
1
+
