o
    yi,                     @   s  d dl mZmZmZmZmZmZ d dlmZ d dl	Z	d dl	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZ dZerad dlmZmZ dddZer`ees`dgZndgZdee
 dee
 deee
f fddZG dd deZ dS )    )AnyCallableDictListOptionalUnion)warnN)Tensor)Module)
bert_score)_preprocess_text)Metric)_SKIP_SLOW_DOCTEST_try_proceed_with_timeout)_TRANSFORMERS_AVAILABLEzroberta-large)	AutoModelAutoTokenizerreturnc                   C   s   t t tt dS )zDownload intensive operations.N)r   from_pretrained_DEFAULT_MODELr    r   r   J/home/ubuntu/.local/lib/python3.10/site-packages/torchmetrics/text/bert.py_download_model!   s   
r   	BERTScore	input_idsattention_maskc                 C   s   t | t |d}|S )z]Create an input dictionary of ``input_ids`` and ``attention_mask`` for BERTScore calculation.r   r   )torchcat)r   r   output_dictr   r   r   _get_input_dict,   s   r    c                %       s^  e Zd ZU dZdZeed< dZeed< dZeed< e	e
 ed< e	e
 ed< e	e
 ed	< e	e
 ed
< 																	d+dee dee dedee dee deeeee
f ge
f dededeeeejf  dededededededee d ee d!ef$ fd"d#Zd$e	e d%e	e d&dfd'd(Zd&eeee	e ef f fd)d*Z  ZS ),r   a7  `Bert_score Evaluating Text Generation`_ leverages the pre-trained contextual embeddings from BERT and
    matches words in candidate and reference sentences by cosine similarity. It has been shown to correlate with
    human judgment on sentence-level and system-level evaluation. Moreover, BERTScore computes precision, recall,
    and F1 measure, which can be useful for evaluating different language generation tasks.

    This implemenation follows the original implementation from `BERT_score`_.

    As input to ``forward`` and ``update`` the metric accepts the following input:

    - ``preds`` (:class:`~List`): An iterable of predicted sentences
    - ``target`` (:class:`~List`): An iterable of reference sentences

    As output of ``forward`` and ``compute`` the metric returns the following output:

    - ``score`` (:class:`~Dict`): A dictionary containing the keys ``precision``, ``recall`` and ``f1`` with
      corresponding values

    Args:
        preds: An iterable of predicted sentences.
        target: An iterable of target sentences.
        model_type: A name or a model path used to load ``transformers`` pretrained model.
        num_layers: A layer of representation to use.
        all_layers:
            An indication of whether the representation from all model's layers should be used.
            If ``all_layers=True``, the argument ``num_layers`` is ignored.
        model:  A user's own model. Must be of `torch.nn.Module` instance.
        user_tokenizer:
            A user's own tokenizer used with the own model. This must be an instance with the ``__call__`` method.
            This method must take an iterable of sentences (`List[str]`) and must return a python dictionary
            containing `"input_ids"` and `"attention_mask"` represented by :class:`~torch.Tensor`.
            It is up to the user's model of whether `"input_ids"` is a :class:`~torch.Tensor` of input ids or embedding
            vectors. This tokenizer must prepend an equivalent of ``[CLS]`` token and append an equivalent of ``[SEP]``
            token as ``transformers`` tokenizer does.
        user_forward_fn:
            A user's own forward function used in a combination with ``user_model``. This function must take
            ``user_model`` and a python dictionary of containing ``"input_ids"`` and ``"attention_mask"`` represented
            by :class:`~torch.Tensor` as an input and return the model's output represented by the single
            :class:`~torch.Tensor`.
        verbose: An indication of whether a progress bar to be displayed during the embeddings' calculation.
        idf: An indication whether normalization using inverse document frequencies should be used.
        device: A device to be used for calculation.
        max_length: A maximum length of input sequences. Sequences longer than ``max_length`` are to be trimmed.
        batch_size: A batch size used for model processing.
        num_threads: A number of threads to use for a dataloader.
        return_hash: An indication of whether the correspodning ``hash_code`` should be returned.
        lang: A language of input sentences.
        rescale_with_baseline:
            An indication of whether bertscore should be rescaled with a pre-computed baseline.
            When a pretrained model from ``transformers`` model is used, the corresponding baseline is downloaded
            from the original ``bert-score`` package from `BERT_score`_ if available.
            In other cases, please specify a path to the baseline csv/tsv file, which must follow the formatting
            of the files from `BERT_score`_.
        baseline_path: A path to the user's own local csv/tsv file with the baseline scale.
        baseline_url: A url path to the user's own  csv/tsv file with the baseline scale.
        kwargs: Additional keyword arguments, see :ref:`Metric kwargs` for more info.

    Example:
        >>> from torchmetrics.text.bert import BERTScore
        >>> preds = ["hello there", "general kenobi"]
        >>> target = ["hello there", "master kenobi"]
        >>> bertscore = BERTScore()
        >>> score = bertscore(preds, target)
        >>> from pprint import pprint
        >>> rounded_score = {k: [round(v, 3) for v in vv] for k, vv in score.items()}
        >>> pprint(rounded_score)
        {'f1': [1.0, 0.996], 'precision': [1.0, 0.996], 'recall': [1.0, 0.996]}
    Fis_differentiableThigher_is_betterfull_state_updatepreds_input_idspreds_attention_masktarget_input_idstarget_attention_maskN   @      enmodel_name_or_path
num_layers
all_layersmodeluser_tokenizeruser_forward_fnverboseidfdevice
max_length
batch_sizenum_threadsreturn_hashlangrescale_with_baselinebaseline_pathbaseline_urlkwargsc                    s  t  jdi | |pt| _|| _|| _|| _|| _|| _|| _	|	| _
|
| _|| _|| _|| _|| _|| _|| _|| _g g d| _g g d| _|rP|| _d| _ntsVtd|d u rbtdtd t| j| _d| _| jdg dd	 | jd
g dd	 | jdg dd	 | jdg dd	 d S )Nr   Tz`BERTScore` metric with default tokenizers requires `transformers` package be installed. Either install with `pip install transformers>=4.0` or `pip install torchmetrics[text]`.zThe argument `model_name_or_path` was not specified while it is required when the default `transformers` model is used. It will use the default recommended model - .Fr$   r   )dist_reduce_fxr%   r&   r'   r   )super__init__r   r,   r-   r.   r/   r1   r2   r3   embedding_devicer5   r6   r7   r8   r9   r:   r;   r<   predstarget	tokenizerr0   r   ModuleNotFoundErrorr   r   r   	add_state)selfr,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   	__class__r   r   rA      sL   
zBERTScore.__init__rC   rD   r   c                 C   s   t || j| jdd| jd\}}t || j| jdd| jd\}}| j|d  | j|d  | j|d  | j|d  dS )zStore predictions/references for computing BERT scores.

        It is necessary to store sentences in a tokenized form to ensure the DDP mode working.
        F)
truncationsort_according_lengthown_tokenizerr   r   N)	r   rE   r5   r0   r$   appendr%   r&   r'   )rH   rC   rD   
preds_dict_target_dictr   r   r   update   s(   

	zBERTScore.updatec                 C   s"  t di dt| j| jdt| j| jd| jd| jd| jd| j	d| j
r*| jn4dd	| jd
| jd| jd| jd| jd| jd| jd| jd| jd| jd| jd| jS d	| jd
| jd| jd| jd| jd| jd| jd| jd| jd| jd| jd| jS )zCalculate BERT scores.rC   rD   r,   r-   r.   r/   r0   Nr1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r   )r   r    r$   r%   r&   r'   r,   r-   r.   r/   r0   rE   r1   r2   r3   rB   r5   r6   r7   r8   r9   r:   r;   r<   )rH   r   r   r   compute   s   	
	
zBERTScore.compute)NNFNNNFFNr(   r)   r*   Fr+   FNN)__name__
__module____qualname____doc__r!   bool__annotations__r"   r#   r   r	   r   strintr
   r   r   r   r   r   r4   rA   rR   floatrS   __classcell__r   r   rI   r   r   2   s   
 D	
@*)r   N)!typingr   r   r   r   r   r   warningsr   r   r	   torch.nnr
   !torchmetrics.functional.text.bertr   4torchmetrics.functional.text.helper_embedding_metricr   torchmetrics.metricr   torchmetrics.utilities.checksr   r   torchmetrics.utilities.importsr   r   transformersr   r   r   __doctest_skip__rZ   r    r   r   r   r   r   <module>   s(    
&