o
    .wÖi8  ã                   @   sB  U d dl mZ d dlmZmZmZmZmZ d dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZ d dlmZmZ d dlmZmZ es`dgZdZ e!e"d< er€er€d dl#m$Z$m%Z% ddd„Z&ee&ƒsddgZnddgZdee	 dee	 de'e!e	f fdd„Z(G dd„ deƒZ)dS )é    )ÚSequence)ÚAnyÚCallableÚListÚOptionalÚUnionN)ÚTensor)ÚModule)Ú
bert_score)Ú_preprocess_text)ÚMetric)Úrank_zero_warn)Ú_SKIP_SLOW_DOCTESTÚ_try_proceed_with_timeout)Údim_zero_cat)Ú_MATPLOTLIB_AVAILABLEÚ_TRANSFORMERS_GREATER_EQUAL_4_4)Ú_AX_TYPEÚ_PLOT_OUT_TYPEúBERTScore.plotzroberta-largeÚ_DEFAULT_MODEL)Ú	AutoModelÚAutoTokenizerÚreturnc                   C   s    t jtdd tjtdd dS )zDownload intensive operations.T)Úresume_downloadN)r   Úfrom_pretrainedr   r   © r   r   úS/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/torchmetrics/text/bert.pyÚ_download_model_for_bert_score'   s   r   Ú	BERTScoreÚ	input_idsÚattention_maskc                 C   s   t  | ¡t  |¡dœS )z]Create an input dictionary of ``input_ids`` and ``attention_mask`` for BERTScore calculation.©r    r!   )ÚtorchÚcatr"   r   r   r   Ú_get_input_dict2   s   r%   c                )       sÄ  e Zd ZU dZdZeed< dZeed< dZeed< dZ	e
ed< d	Ze
ed
< ee ed< ee ed< ee ed< ee ed< 																		d4dee dee dedee dee deeeeeef gef  dededeeeejf  dededed ed!ed"ed#ee d$ee d%ed&ed'df(‡ fd(d)„Zd*eeee f d+eeee f d'dfd,d-„Zd'eeeeee
 ef f fd.d/„Z	d5d0eeeee f  d1ee d'efd2d3„Z‡  Z S )6r   ay  `Bert_score Evaluating Text Generation`_ for measuring text similarity.

    BERT leverages the pre-trained contextual embeddings from BERT and matches words in candidate and reference
    sentences by cosine similarity. It has been shown to correlate with human judgment on sentence-level and
    system-level evaluation. Moreover, BERTScore computes precision, recall, and F1 measure, which can be useful for
    evaluating different language generation tasks. This implementation follows the original implementation from
    `BERT_score`_.

    As input to ``forward`` and ``update`` the metric accepts the following input:

    - ``preds`` (:class:`~List`): An iterable of predicted sentences
    - ``target`` (:class:`~List`): An iterable of reference sentences

    As output of ``forward`` and ``compute`` the metric returns the following output:

    - ``score`` (:class:`~Dict`): A dictionary containing the keys ``precision``, ``recall`` and ``f1`` with
      corresponding values

    Args:
        preds: An iterable of predicted sentences.
        target: An iterable of target sentences.
        model_type: A name or a model path used to load ``transformers`` pretrained model.
        num_layers: A layer of representation to use.
        all_layers:
            An indication of whether the representation from all model's layers should be used.
            If ``all_layers=True``, the argument ``num_layers`` is ignored.
        model:  A user's own model. Must be of `torch.nn.Module` instance.
        user_tokenizer:
            A user's own tokenizer used with the own model. This must be an instance with the ``__call__`` method.
            This method must take an iterable of sentences (`List[str]`) and must return a python dictionary
            containing `"input_ids"` and `"attention_mask"` represented by :class:`~torch.Tensor`.
            It is up to the user's model of whether `"input_ids"` is a :class:`~torch.Tensor` of input ids or embedding
            vectors. This tokenizer must prepend an equivalent of ``[CLS]`` token and append an equivalent of ``[SEP]``
            token as ``transformers`` tokenizer does.
        user_forward_fn:
            A user's own forward function used in a combination with ``user_model``. This function must take
            ``user_model`` and a python dictionary of containing ``"input_ids"`` and ``"attention_mask"`` represented
            by :class:`~torch.Tensor` as an input and return the model's output represented by the single
            :class:`~torch.Tensor`.
        verbose: An indication of whether a progress bar to be displayed during the embeddings' calculation.
        idf: An indication whether normalization using inverse document frequencies should be used.
        device: A device to be used for calculation.
        max_length: A maximum length of input sequences. Sequences longer than ``max_length`` are to be trimmed.
        batch_size: A batch size used for model processing.
        num_threads: A number of threads to use for a dataloader.
        return_hash: An indication of whether the correspodning ``hash_code`` should be returned.
        lang: A language of input sentences.
        rescale_with_baseline:
            An indication of whether bertscore should be rescaled with a pre-computed baseline.
            When a pretrained model from ``transformers`` model is used, the corresponding baseline is downloaded
            from the original ``bert-score`` package from `BERT_score`_ if available.
            In other cases, please specify a path to the baseline csv/tsv file, which must follow the formatting
            of the files from `BERT_score`_.
        baseline_path: A path to the user's own local csv/tsv file with the baseline scale.
        baseline_url: A url path to the user's own  csv/tsv file with the baseline scale.
        truncation: An indication of whether the input sequences should be truncated to the ``max_length``.
        kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.

    Example:
        >>> from pprint import pprint
        >>> from torchmetrics.text.bert import BERTScore
        >>> preds = ["hello there", "general kenobi"]
        >>> target = ["hello there", "master kenobi"]
        >>> bertscore = BERTScore()
        >>> pprint(bertscore(preds, target))
        {'f1': tensor([1.0000, 0.9961]), 'precision': tensor([1.0000, 0.9961]), 'recall': tensor([1.0000, 0.9961])}

    FÚis_differentiableTÚhigher_is_betterÚfull_state_updateg        Úplot_lower_boundg      ð?Úplot_upper_boundÚpreds_input_idsÚpreds_attention_maskÚtarget_input_idsÚtarget_attention_maskNé   é@   r   ÚenÚmodel_name_or_pathÚ
num_layersÚ
all_layersÚmodelÚuser_tokenizerÚuser_forward_fnÚverboseÚidfÚdeviceÚ
max_lengthÚ
batch_sizeÚnum_threadsÚreturn_hashÚlangÚrescale_with_baselineÚbaseline_pathÚbaseline_urlÚ
truncationÚkwargsr   c                    s  t ƒ jdi |¤Ž |pt| _|| _|| _|| _|| _|| _|| _	|	| _
|
| _|| _|| _|| _|| _|| _|| _|| _|| _|rG|| _d| _n"tsMtdƒ‚ddlm} |d u r_tdt›dƒ | | j¡| _d| _| jdg d	d
 | jdg d	d
 | jdg d	d
 | jdg d	d
 d S )NTz±`BERTScore` metric with default tokenizers requires `transformers` package be installed. Either install with `pip install transformers>=4.4` or `pip install torchmetrics[text]`.r   )r   z¤The argument `model_name_or_path` was not specified while it is required when the default `transformers` model is used. It will use the default recommended model - Ú.Fr+   r$   )Údist_reduce_fxr,   r-   r.   r   )ÚsuperÚ__init__r   r2   r3   r4   r5   r7   r8   r9   Úembedding_devicer;   r<   r=   r>   r?   r@   rA   rB   rC   Ú	tokenizerr6   r   ÚModuleNotFoundErrorÚtransformersr   r   r   Ú	add_state)Úselfr2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   rC   rD   r   ©Ú	__class__r   r   rH   ˆ   sL   
ÿþÿzBERTScore.__init__ÚpredsÚtargetc                 C   s¨   t |tƒs	t|ƒ}t |tƒst|ƒ}t|| j| j| jd| jd\}}t|| j| j| jd| jd\}}| j |d ¡ | j	 |d ¡ | j
 |d ¡ | j |d ¡ dS )z¡Store predictions/references for computing BERT scores.

        It is necessary to store sentences in a tokenized form to ensure the DDP mode working.

        F)rC   Úsort_according_lengthÚown_tokenizerr    r!   N)Ú
isinstanceÚlistr   rJ   r;   rC   r6   r+   Úappendr,   r-   r.   )rN   rQ   rR   Ú
preds_dictÚ_Útarget_dictr   r   r   ÚupdateÊ   s0   


ú
ú	zBERTScore.updatec                 C   s:  t | jƒt | jƒdœ}t | jƒt | jƒdœ}tdi d|“d|“d| j“d| j“d| j“d| j	“d| j
r6| jn4d	“d
| j“d| j“d| j“d| j“d| j“d| j“d| j“d| j“d| j“d| j“d| j“d| j“ŽS “d
| j“d| j“d| j“d| j“d| j“d| j“d| j“d| j“d| j“d| j“d| j“d| j“ŽS )zCalculate BERT scores.r"   rQ   rR   r2   r3   r4   r5   r6   Nr7   r8   r9   r:   r;   r<   r=   r>   r?   r@   rA   rB   r   )r   r+   r,   r-   r.   r
   r2   r3   r4   r5   r6   rJ   r7   r8   r9   rI   r;   r<   r=   r>   r?   r@   rA   rB   )rN   rQ   rR   r   r   r   Úcomputeë   sŽ   þþÿþýüûúùø	÷
öõôóòñðïîíùø	÷
öõôóòñðïîízBERTScore.computeÚvalÚaxc                 C   s.   |du r|   ¡ }dd„ | ¡ D ƒ}|  ||¡S )a,  Plot a single or multiple values from the metric.

        Args:
            val: Either a single result from calling `metric.forward` or `metric.compute` or a list of these results.
                If no value is provided, will automatically call `metric.compute` and plot that result.
            ax: An matplotlib axis object. If provided will add plot to that axis

        Returns:
            Figure and Axes object

        Raises:
            ModuleNotFoundError:
                If `matplotlib` is not installed

        .. plot::
            :scale: 75

            >>> # Example plotting a single value
            >>> from torchmetrics.text.bert import BERTScore
            >>> preds = ["hello there", "general kenobi"]
            >>> target = ["hello there", "master kenobi"]
            >>> metric = BERTScore()
            >>> metric.update(preds, target)
            >>> fig_, ax_ = metric.plot()

        .. plot::
            :scale: 75

            >>> # Example plotting multiple values
            >>> from torch import tensor
            >>> from torchmetrics.text.bert import BERTScore
            >>> preds = ["hello there", "general kenobi"]
            >>> target = ["hello there", "master kenobi"]
            >>> metric = BERTScore()
            >>> values = []
            >>> for _ in range(10):
            ...     val = metric(preds, target)
            ...     val = {k: tensor(v).mean() for k,v in val.items()}  # convert into single value per key
            ...     values.append(val)
            >>> fig_, ax_ = metric.plot(values)

        Nc                 S   s    i | ]\}}|t  |¡ ¡ “qS r   )r#   ÚtensorÚmean)Ú.0ÚkÚvr   r   r   Ú
<dictcomp>:  s     z"BERTScore.plot.<locals>.<dictcomp>)r\   ÚitemsÚ_plot)rN   r]   r^   r   r   r   Úplot  s   -r   )NNFNNNFFNr/   r0   r   Fr1   FNNF)NN)!Ú__name__Ú
__module__Ú__qualname__Ú__doc__r&   ÚboolÚ__annotations__r'   r(   r)   Úfloatr*   r   r   r   ÚstrÚintr	   r   r   Údictr   r#   r:   rH   r   r[   rV   r\   r   r   rg   Ú__classcell__r   r   rO   r   r   7   s    
 Eíþýüûúùø	÷
öõôóòñðïîíìë.B$!!ÿÿÿþ)r   N)*Úcollections.abcr   Útypingr   r   r   r   r   r#   r   Útorch.nnr	   Ú!torchmetrics.functional.text.bertr
   Ú4torchmetrics.functional.text.helper_embedding_metricr   Útorchmetrics.metricr   Útorchmetrics.utilitiesr   Útorchmetrics.utilities.checksr   r   Útorchmetrics.utilities.datar   Útorchmetrics.utilities.importsr   r   Útorchmetrics.utilities.plotr   r   Ú__doctest_skip__r   ro   rm   rL   r   r   r   rq   r%   r   r   r   r   r   Ú<module>   s2   
€&