o
    yiu                     @   sF  d dl mZ d dlmZmZmZmZmZ d dlZd dlm	Z	m
Z
 dee dedefdd	Zd
edee fddZdefdee deee  de	de	de	de	dedeegee f dee	e	f fddZde	de	de	de	dedee dede	fddZ			ddeeee f deeeee f  dededeee  de	fddZdS )    )Counter)CallableOptionalSequenceTupleUnionN)Tensortensorngram_input_listn_gramreturnc                 C   s\   t  }td|d D ]!}tt| | d D ]}t| |||  }||  d7  < qq
|S )a  Counting how many times each word appears in a given text with ngram.

    Args:
        ngram_input_list: A list of translated text or reference texts
        n_gram: gram value ranged 1 to 4

    Return:
        ngram_counter: a collections.Counter object of ngram
       )r   rangelentuple)r
   r   ngram_counterij	ngram_key r   U/home/ubuntu/.local/lib/python3.10/site-packages/torchmetrics/functional/text/bleu.py_count_ngram   s   r   sentencec                 C   s   |   S )zTokenizes sentence into list of words.

    Args:
        sentence: A sentence separated by white space.

    Return:
        List of words
    )split)r   r   r   r   _tokenize_fn/   s   	r      predstarget	numeratordenominator	preds_len
target_len	tokenizerc                    s   fdd|D }fdd| D }	t |	|D ]c\ }
|t 7 }dd |
D } fdd|D }|||t| 7 }t |}t }|
D ]	}|t||O }qF||@ }|D ]}|t|d   || 7  < qV|D ]}|t|d   || 7  < qiq||fS )aB  Updates and returns variables required to compute the BLEU score.

    Args:
        preds: An iterable of machine translated corpus
        target: An iterable of iterables of reference corpus
        numerator: Numerator of precision score (true positives)
        denominator: Denominator of precision score (true positives + false positives)
        preds_len: count of words in a candidate prediction
        target: count of words in a reference translation
        n_gram: gram value ranged 1 to 4
        tokenizer: A function that turns sentence into list of words
    c                    s   g | ]} fd d|D qS )c                       g | ]
}|r
 |ng qS r   r   .0liner"   r   r   
<listcomp>Q       z1_bleu_score_update.<locals>.<listcomp>.<listcomp>r   )r%   tr'   r   r   r(   Q   s    z&_bleu_score_update.<locals>.<listcomp>c                    r#   r   r   r$   r'   r   r   r(   R   r)   c                 S   s   g | ]}t |qS r   )r   r%   tgtr   r   r   r(   V   s    c                    s   g | ]
}t t | qS r   )absr   )r%   x)predr   r   r(   W   r)   r   )zipr   indexminr   r   )r   r   r   r   r    r!   r   r"   target_preds_targetstarget_len_listtarget_len_diffpreds_countertarget_counterr,   ngram_counter_clipcounter_clipcounterr   )r/   r"   r   _bleu_score_update;   s$   
r=   weightssmoothc              
   C   s   |j }t|dkrtd|dS |r4tt|tj||dt|tj||d}|d |d  |d< n|| }t||dt| }	tt	|	}
| |krUtd|dntd||   }||
 }|S )a  Computes the BLEU score.

    Args:
        preds_len: count of words in a candidate translation
        target_len: count of words in a reference translation
        numerator: Numerator of precision score (true positives)
        denominator: Denominator of precision score (true positives + false positives)
        n_gram: gram value ranged 1 to 4
        weights: Weights used for unigrams, bigrams, etc. to calculate BLEU score.
        smooth: Whether to apply smoothing
            )devicer         ?r   )
rA   r2   r	   torchdivaddoneslogexpsum)r    r!   r   r   r   r>   r?   rA   precision_scoreslog_precision_scoresgeometric_meanbrevity_penaltybleur   r   r   _bleu_score_computej   s   &rO   Fc              	   C   s   t | tr| gn| }dd |D }t|t|kr'tdt| dt| |dur=t||kr=tdt| d| |du rHd| g| }t|}t|}td}	td}
t|||||	|
|t\}	}
t	|	|
|||||S )	u/  Calculate `BLEU score`_ of machine translated text with one or more references.

    Args:
        preds: An iterable of machine translated corpus
        target: An iterable of iterables of reference corpus
        n_gram: Gram value ranged from 1 to 4
        smooth: Whether to apply smoothing – see [2]
        weights:
            Weights used for unigrams, bigrams, etc. to calculate BLEU score.
            If not provided, uniform weights are used.

    Return:
        Tensor with BLEU Score

    Raises:
        ValueError: If ``preds`` and ``target`` corpus have different lengths.
        ValueError: If a length of a list of weights is not ``None`` and not equal to ``n_gram``.

    Example:
        >>> from torchmetrics.functional import bleu_score
        >>> preds = ['the cat is on the mat']
        >>> target = [['there is a cat on the mat', 'a cat is on the mat']]
        >>> bleu_score(preds, target)
        tensor(0.7598)

    References:
        [1] BLEU: a Method for Automatic Evaluation of Machine Translation by Papineni,
        Kishore, Salim Roukos, Todd Ward, and Wei-Jing Zhu `BLEU`_

        [2] Automatic Evaluation of Machine Translation Quality Using Longest Common Subsequence
        and Skip-Bigram Statistics by Chin-Yew Lin and Franz Josef Och `Machine Translation Evolution`_
    c                 S   s    g | ]}t |tr|gn|qS r   )
isinstancestrr+   r   r   r   r(      s     zbleu_score.<locals>.<listcomp>zCorpus has different size z != Nz5List of weights has different weights than `n_gram`: rB   r@   )
rP   rQ   r   
ValueErrorrC   zerosr	   r=   r   rO   )r   r   r   r?   r>   r4   r3   r   r   r    r!   r   r   r   
bleu_score   s    '

rT   )r   FN)collectionsr   typingr   r   r   r   r   rC   r   r	   rQ   intr   r   r=   floatboolrO   rT   r   r   r   r   <module>   sx   

	
/
,
