o
    yi0                     @   sh   d dl mZmZmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ dZG dd	 d	eZd
S )    )AnyOptionalSequence)Literal)_bleu_score_update)_SacreBLEUTokenizer)	BLEUScore)_REGEX_AVAILABLEnone13azhintlcharc                       s   e Zd ZU dZdZeed< dZeed< dZeed< 						dd
e	dede
d dedeee  def fddZdee deee  dd	fddZ  ZS )SacreBLEUScorea  Calculate `BLEU score`_ of machine translated text with one or more references. This implementation follows
    the behaviour of `SacreBLEU`_.

    The SacreBLEU implementation differs from the NLTK BLEU implementation in tokenization techniques.

    As input to ``forward`` and ``update`` the metric accepts the following input:

    - ``preds`` (:class:`~Sequence`): An iterable of machine translated corpus
    - ``target`` (:class:`~Sequence`): An iterable of iterables of reference corpus

    As output of ``forward`` and ``compute`` the metric returns the following output:

    - ``sacre_bleu`` (:class:`~torch.Tensor`): A tensor with the SacreBLEU Score

    Args:
        n_gram: Gram value ranged from 1 to 4
        smooth: Whether to apply smoothing, see `SacreBLEU`_
        tokenize: Tokenization technique to be used.
            Supported tokenization: ``['none', '13a', 'zh', 'intl', 'char']``
        lowercase:  If ``True``, BLEU score over lowercased text is calculated.
        kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.
        weights:
            Weights used for unigrams, bigrams, etc. to calculate BLEU score.
            If not provided, uniform weights are used.

    Raises:
        ValueError:
            If ``tokenize`` not one of 'none', '13a', 'zh', 'intl' or 'char'
        ValueError:
            If ``tokenize`` is set to 'intl' and `regex` is not installed
        ValueError:
            If a length of a list of weights is not ``None`` and not equal to ``n_gram``.


    Example:
        >>> from torchmetrics import SacreBLEUScore
        >>> preds = ['the cat is on the mat']
        >>> target = [['there is a cat on the mat', 'a cat is on the mat']]
        >>> sacre_bleu = SacreBLEUScore()
        >>> sacre_bleu(preds, target)
        tensor(0.7598)

    Additional References:

        - Automatic Evaluation of Machine Translation Quality Using Longest Common Subsequence
          and Skip-Bigram Statistics by Chin-Yew Lin and Franz Josef Och `Machine Translation Evolution`_
    Fis_differentiableThigher_is_betterfull_state_update   r   Nn_gramsmoothtokenizer
   	lowercaseweightskwargsc                    s\   t  jd|||d| |tvrtdt d| d|dkr&ts&tdt||| _d S )N)r   r   r   z*Argument `tokenize` expected to be one of z	 but got .r   zv`'intl'` tokenization requires that `regex` is installed. Use `pip install regex` or `pip install torchmetrics[text]`. )super__init__AVAILABLE_TOKENIZERS
ValueErrorr	   ModuleNotFoundErrorr   	tokenizer)selfr   r   r   r   r   r   	__class__r   P/home/ubuntu/.local/lib/python3.10/site-packages/torchmetrics/text/sacre_bleu.pyr   U   s   	zSacreBLEUScore.__init__predstargetreturnc              	   C   s.   t ||| j| j| j| j| j| j\| _| _dS )z*Update state with predictions and targets.N)r   	numeratordenominator	preds_len
target_lenr   r"   )r#   r'   r(   r   r   r&   updatei   s   zSacreBLEUScore.update)r   Fr   FN)__name__
__module____qualname____doc__r   bool__annotations__r   r   intr   r   r   floatr   r   strr.   __classcell__r   r   r$   r&   r       s0   
 0
*r   N)typingr   r   r   typing_extensionsr   !torchmetrics.functional.text.bleur   'torchmetrics.functional.text.sacre_bleur   torchmetrics.text.bleur   torchmetrics.utilities.importsr	   r   r   r   r   r   r&   <module>   s   