o
    .wi*                     @   s   d dl Z d dlmZ d dlmZmZmZmZmZ d dl	Z	d dl	m
Z
 d dlmZ d dlmZmZmZmZmZmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZmZ esYdgZes_ddgZG dd deZdS )    N)Sequence)AnyClassVarListOptionalUnion)Tensor)_load_tokenizer_and_model)$_ALLOWED_INFORMATION_MEASURE_LITERAL_get_dataloader_get_special_tokens_map_infolm_compute_infolm_update_InformationMeasure)Metric)dim_zero_cat)_MATPLOTLIB_AVAILABLE_TRANSFORMERS_GREATER_EQUAL_4_4)_AX_TYPE_PLOT_OUT_TYPEInfoLM.plotInfoLMc                       s  e Zd ZU dZdZee ed< ee ed< ee ed< ee ed< dddddddddd	Ze	ed	< 	
											d,de
eejf dedededee dee dee
eejf  dee dededededeeef ddf fddZedefd d!Zd"e
eee f d#e
eee f ddfd$d%Zde
eeeef f fd&d'Z	d-d(ee
eee f  d)ee defd*d+Z   Z!S ).r   u  Calculate `InfoLM`_.

    InfoLM measures a distance/divergence between predicted and reference sentence discrete distribution using one of
    the following information measures:

        - `KL divergence`_
        - `alpha divergence`_
        - `beta divergence`_
        - `AB divergence`_
        - `Rényi divergence`_
        - L1 distance
        - L2 distance
        - L-infinity distance
        - `Fisher-Rao distance`_

    `InfoLM`_ is a family of untrained embedding-based metrics which addresses some famous flaws of standard
    string-based metrics thanks to the usage of pre-trained masked language models. This family of metrics is mainly
    designed for summarization and data-to-text tasks.

    The implementation of this metric is fully based HuggingFace ``transformers``' package.

    As input to ``forward`` and ``update`` the metric accepts the following input:

    - ``preds`` (:class:`~Sequence`): An iterable of hypothesis corpus
    - ``target`` (:class:`~Sequence`): An iterable of reference corpus

    As output of ``forward`` and ``compute`` the metric returns the following output:

    -  ``infolm`` (:class:`~torch.Tensor`): If `return_sentence_level_score=True` return a tuple with a tensor
       with the corpus-level InfoLM score and a list of sentence-level InfoLM scores, else return a corpus-level
       InfoLM score

    Args:
        model_name_or_path:
            A name or a model path used to load ``transformers`` pretrained model.
            By default the `"bert-base-uncased"` model is used.
        temperature:
            A temperature for calibrating language modelling. For more information, please reference `InfoLM`_ paper.
        information_measure:
            A name of information measure to be used. Please use one of: ['kl_divergence', 'alpha_divergence',
            'beta_divergence', 'ab_divergence', 'renyi_divergence', 'l1_distance', 'l2_distance', 'l_infinity_distance',
            'fisher_rao_distance']
        idf:
            An indication of whether normalization using inverse document frequencies should be used.
        alpha:
            Alpha parameter of the divergence used for alpha, AB and Rényi divergence measures.
        beta:
            Beta parameter of the divergence used for beta and AB divergence measures.
        device:
            A device to be used for calculation.
        max_length:
            A maximum length of input sequences. Sequences longer than ``max_length`` are to be trimmed.
        batch_size:
            A batch size used for model processing.
        num_threads:
            A number of threads to use for a dataloader.
        verbose:
            An indication of whether a progress bar to be displayed during the embeddings calculation.
        return_sentence_level_score:
            An indication whether a sentence-level InfoLM score to be returned.

    Example:
        >>> from torchmetrics.text.infolm import InfoLM
        >>> preds = ['he read the book because he was interested in world history']
        >>> target = ['he was interested in world history because he read the book']
        >>> infolm = InfoLM('google/bert_uncased_L-2_H-128_A-2', idf=False)
        >>> infolm(preds, target)
        tensor(-0.1784)

    Fpreds_input_idspreds_attention_masktarget_input_idstarget_attention_maskT)	kl_divergencealpha_divergencebeta_divergenceab_divergencerenyi_divergencel1_distancel2_distancel_infinity_distancefisher_rao_distance%_information_measure_higher_is_betterbert-base-uncased      ?r   N@   r   model_name_or_pathtemperatureinformation_measureidfalphabetadevice
max_length
batch_sizenum_threadsverbosereturn_sentence_level_scorekwargsreturnc                    s   t  jdi | || _|| _|| _|| _|| _|| _t	|p d| _
|	| _|
| _|| _|| _t||\| _| _t|||| _|pE| jjj| _t| j| _| jdg dd | jdg dd | jdg dd | jdg dd d S )	Ncpur   cat)dist_reduce_fxr   r   r    )super__init__r)   r*   r+   r,   r-   r.   torchr/   _devicer1   r2   r3   r4   r	   	tokenizermodelr   information_measure_clsconfigr0   r   special_tokens_map	add_state)selfr)   r*   r+   r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   	__class__r:   U/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/torchmetrics/text/infolm.pyr<      s(   zInfoLM.__init__c                 C   s   | j | j S )zReturns a bool indicating whether a higher value of the information measure is better.

        Done this way as depends on if the information measure is positive or negative.

        )r%   r+   )rE   r:   r:   rH   higher_is_better   s   zInfoLM.higher_is_betterpredstargetc                 C   sN   t ||| j| j\}}}}| j| | j| | j| | j| dS )z*Update state with predictions and targets.N)r   r?   r0   r   appendr   r   r   )rE   rJ   rK   r   r   r   r   r:   r:   rH   update   s   zInfoLM.updatec              	   C   s   t t| jt| j| j| j| jd}t t| jt| j| j| j| jd}t	| j
||| j| j| j| j| j}| jr>| |fS | S )zLCalculate selected information measure using the pre-trained language model.)	input_idsattention_maskr,   r1   num_workers)r   r   r   r   r,   r1   r2   r   r   r   r@   r*   rA   rC   r3   r4   mean)rE   preds_dataloadertarget_dataloaderinfo_lm_scorer:   r:   rH   compute   s6   zInfoLM.computevalaxc                 C   s   |  ||S )a1  Plot a single or multiple values from the metric.

        Args:
            val: Either a single result from calling `metric.forward` or `metric.compute` or a list of these results.
                If no value is provided, will automatically call `metric.compute` and plot that result.
            ax: An matplotlib axis object. If provided will add plot to that axis

        Returns:
            Figure and Axes object

        Raises:
            ModuleNotFoundError:
                If `matplotlib` is not installed

        .. plot::
            :scale: 75

            >>> # Example plotting a single value
            >>> from torchmetrics.text.infolm import InfoLM
            >>> metric = InfoLM('google/bert_uncased_L-2_H-128_A-2', idf=False)
            >>> preds = ['he read the book because he was interested in world history']
            >>> target = ['he was interested in world history because he read the book']
            >>> metric.update(preds, target)
            >>> fig_, ax_ = metric.plot()

        .. plot::
            :scale: 75

            >>> # Example plotting multiple values
            >>> from torchmetrics.text.infolm import InfoLM
            >>> metric = InfoLM('google/bert_uncased_L-2_H-128_A-2', idf=False)
            >>> preds = ["this is the prediction", "there is an other sample"]
            >>> target = ["this is the reference", "there is another one"]
            >>> values = [ ]
            >>> for _ in range(10):
            ...     values.append(metric(preds, target))
            >>> fig_, ax_ = metric.plot(values)

        )_plot)rE   rV   rW   r:   r:   rH   plot   s   *r   )r&   r'   r   TNNNNr(   r   TF)NN)"__name__
__module____qualname____doc__is_differentiabler   r   __annotations__r%   r   r   strosPathLikefloatr
   boolr   r=   r/   intdictr   r<   propertyrI   r   rM   tuplerU   r   r   rY   __classcell__r:   r:   rF   rH   r   *   s   
 G	

'.
") ra   collections.abcr   typingr   r   r   r   r   r=   r   4torchmetrics.functional.text.helper_embedding_metricr	   #torchmetrics.functional.text.infolmr
   r   r   r   r   r   torchmetrics.metricr   torchmetrics.utilities.datar   torchmetrics.utilities.importsr   r   torchmetrics.utilities.plotr   r   __doctest_skip__r   r:   r:   r:   rH   <module>   s     