o
    yiAO                  -   @   sJ  d dl Z d dlZd dlmZmZmZmZmZmZm	Z	 d dl
mZ d dlZd dlmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZmZ d dlmZmZ d d	lmZmZ d
Z ersd dl!m"Z"m#Z# dFddZ$erree$srdgZ%ndgZ%						dGdede&dedee	e'ej(f  dee& de)de)de)deeee'ef gef deeef fddZ*dede'dedefdd Z+d!ed"ed#ed$edeeeef f
d%d&Z,dHd'ee' dee& de)de'fd(d)Z-d*e'defd+d,Z.d-e'defd.d/Z/	0			dId1e'd'ee' d*ee' d-ee' dee f
d2d3Z0		dJd4ed5ed6ed7edee& de)deeeef fd8d9Z1										:	;	<		0			dKd=e	ee' ee'ef f d>e	ee' ee'ef f d'ee' dee& de)dee d?edeeee'ef gef de)de)dee	e'ej(f  d@e&dAe&dBe&dCe)d1e'dDe)d*ee' d-ee' dee'e	ee2 e'f f f(dEdZ3dS )L    N)AnyCallableDictListOptionalTupleUnion)warn)Tensor)Module)
DataLoader)TextDatasetTokenizedDataset_check_shape_of_model_output_get_progress_bar_input_data_collator_output_data_collator*_process_attention_mask_for_special_tokens)_SKIP_SLOW_DOCTEST_try_proceed_with_timeout)_TQDM_AVAILABLE_TRANSFORMERS_AVAILABLEzroberta-large)	AutoModelAutoTokenizerreturnc                   C   s   t t tt dS )zDownload intensive operations.N)r   from_pretrained_DEFAULT_MODELr    r   r   U/home/ubuntu/.local/lib/python3.10/site-packages/torchmetrics/functional/text/bert.py_download_model*   s   
r   
bert_scoreF
dataloader
target_lenmodeldevice
num_layers
all_layersidfverboseuser_forward_fnc	              	   C   s  g }	g }
t | |D ]}t X t||}|sB|s0||d |d dd}|j|dur,|nd }n|||}t||d  |d}n|rHtd||d |d dd}tjd	d
 |jD dd}W d   n1 sjw   Y  ||j	ddd }t
||d |\}}t|}td||}|	|  |r|d | n||j}||jddd }|
|  q	t|	}t|
}||fS )a:  Calculate sentence embeddings and the inverse-document-frequency scaling factor.
    Args:
        dataloader: dataloader instance.
        target_len: A length of the longest sequence in the data. Used for padding the model output.
        model: BERT model.
        device: A device to be used for calculation.
        num_layers: The layer of representation to use.
        all_layers: An indication whether representation from all model layers should be used for BERTScore.
        idf: An Indication whether normalization using inverse document frequencies should be used.
        verbose: An indication of whether a progress bar to be displayed during the embeddings' calculation.
        user_forward_fn:
            A user's own forward function used in a combination with ``user_model``. This function must
            take ``user_model`` and a python dictionary of containing ``"input_ids"`` and ``"attention_mask"``
            represented by :class:`~torch.Tensor` as an input and return the model's output represented by the single
            :class:`~torch.Tensor`.

    Return:
        A tuple of :class:`~torch.Tensor`s containing the model's embeddings and the normalized tokens IDF.
        When ``idf = False``, tokens IDF is not calculated, and a matrix of mean weights is returned instead.
        For a single sentence, ``mean_weight = 1/seq_len``, where ``seq_len`` is a sum over the corresponding
        ``attention_mask``.

    Raises:
        ValueError:
            If ``all_layers = True`` and a model, which is not from the ``transformers`` package, is used.
    	input_idsattention_maskT)output_hidden_statesN   zQThe option `all_layers=True` can be used only with default `transformers` models.c                 S   s   g | ]}| d qS )r.   )	unsqueeze).0or   r   r   
<listcomp>n   s    z1_get_embeddings_and_idf_scale.<locals>.<listcomp>dimzblsd, bs -> blsdinput_ids_idf)keepdim)r   torchno_gradr   hidden_statesr   r/   
ValueErrorcatnormr   r   einsumappendcputypedtypesum)r!   r"   r#   r$   r%   r&   r'   r(   r)   embeddings_listidf_scale_listbatchoutr+   processed_attention_maskr5   
embeddings	idf_scaler   r   r   _get_embeddings_and_idf_scale5   s@   %




rJ   cos_simmetricrI   c                 C   sF   |dkrdnd}| j |dj}td||d}|dd }|S )	zeHelper function that calculates precision or recall, transpose it and scale it with idf_scale factor.	precision      r3   zbls, bs -> blsr-   r   r.   )maxvaluesr7   r=   rB   	transposesqueeze)rK   rL   rI   r4   resr   r   r   _get_scaled_precision_or_recall   s
   rU   preds_embeddingstarget_embeddingspreds_idf_scaletarget_idf_scalec                 C   sV   t d| |}t|d|}t|d|}d| | ||  }|t |d}|||fS )a  Calculate precision, recall and F1 score over candidate and reference sentences.

    Args:
        preds_embeddings: Embeddings of candidate sentences.
        target_embeddings: Embeddings of reference sentences.
        preds_idf_scale: An IDF scale factor for candidate sentences.
        target_idf_scale: An IDF scale factor for reference sentences.

    Return:
        Tensors containing precision, recall and F1 score, respectively.
    zblpd, blrd -> blprrM   recallrO           )r7   r=   rU   masked_fillisnan)rV   rW   rX   rY   rK   rM   rZ   f1_scorer   r   r   _get_precision_recall_f1   s   
r_   model_name_or_pathc                 C   s   |  d| |r	dnd }|S )z+Compute `BERT_score`_ (copied and adjusted)_L_idfz_no-idfr   )r`   r%   r'   msgr   r   r   	_get_hash   s   rd   baseline_pathc                 C   sb   t | }t|}dd t|D }W d   n1 sw   Y  t|ddddf }|S )zHelper function which reads baseline the csv file from the local file.

    This method implemented to avoid `pandas` dependency.
    c                 S   s&   g | ]\}}|d krdd |D qS )r   c                 S      g | ]}t |qS r   floatr0   itemr   r   r   r2          z8_read_csv_from_local_file.<locals>.<listcomp>.<listcomp>r   r0   idxrowr   r   r   r2      s   & z-_read_csv_from_local_file.<locals>.<listcomp>Nr.   )opencsvreader	enumerater7   tensor)re   fnamecsv_filebaseline_listbaseliner   r   r   _read_csv_from_local_file   s   

rx   baseline_urlc                 C   s^   t j| }dd t|D }t|ddddf }W d   |S 1 s(w   Y  |S )z~Helper function which reads the baseline csv file from URL.

    This method is implemented to avoid `pandas` dependency.
    c                 S   s6   g | ]\}}|d krdd |  ddD qS )r   c                 S   rf   r   rg   ri   r   r   r   r2      rk   z1_read_csv_from_url.<locals>.<listcomp>.<listcomp>zutf-8,)stripdecodesplitrl   r   r   r   r2      s
    z&_read_csv_from_url.<locals>.<listcomp>Nr.   )urllibrequesturlopenrr   r7   rs   )ry   http_requestrv   rw   r   r   r   _read_csv_from_url   s   
r   enlangc                 C   s^   |rt |}|S |rt|}|S | r'|r'd}| d|  d| d}t|}|S d}td |S )z<Load a CSV file with the baseline values used for rescaling.zWhttps://raw.githubusercontent.com/Tiiiger/bert_score/master/bert_score/rescale_baseline/z.tsvNzFBaseline was not successfully loaded. No baseline is going to be used.)rx   r   r	   )r   r`   re   ry   rw   	_URL_BASEr   r   r   _load_baseline   s   
r   rM   rZ   r^   rw   c                 C   sd   |du r
|du r
d}t j| ||gdd}|r|dn|| }|| d|  }|d |d |d fS )	z<Rescale the computed metrics with the pre-computed baseline.NFr-   r3   r.   ).r   ).r.   ).rO   )r7   stackr/   )rM   rZ   r^   rw   r%   r&   all_metricsbaseline_scaler   r   r   _rescale_metrics_with_baseline   s   	r      @      predstargetuser_tokenizer
max_length
batch_sizenum_threadsreturn_hashrescale_with_baselinec           $   
   C   s  t | t |krtd|rtstd|du r9tstd|du r*tdt d t|p/t}t	|p6t}n|}|
  ||
 z|r\||jjkr\td| d| d	|jj W n tyj   td
 Y nw tdd | |fD }tdd | |fD }tdd | |fD }|rtd dgdgdgd}|r|dt|||	i |S |rt||||nd}|rt||||	d}t| |||	|jd}n|rtdi |d|	i}tdi | |	|jd}ntdt|||d}t|||d}t||j||
|||	||	\}}t||j||
|||	||	\}} t||| |\}!}"}#|dur0t|!|"|#|||\}!}"}#|! |" |# d}|rJ|dt|||	i |S )a  `Bert_score Evaluating Text Generation`_ leverages the pre-trained contextual embeddings from BERT and
    matches words in candidate and reference sentences by cosine similarity.

    It has been shown to correlate with human judgment on sentence-level and system-level evaluation.
    Moreover, BERTScore computes precision, recall, and F1 measure, which can be useful for evaluating different
    language generation tasks.

    This implemenation follows the original implementation from `BERT_score`_.

    Args:
        preds: Either an iterable of predicted sentences or a ``Dict[input_ids, attention_mask]``.
        target: Either an iterable of target sentences or a  ``Dict[input_ids, attention_mask]``.
        model_name_or_path: A name or a model path used to load ``transformers`` pretrained model.
        num_layers: A layer of representation to use.
        all_layers:
            An indication of whether the representation from all model's layers should be used.
            If ``all_layers = True``, the argument ``num_layers`` is ignored.
        model: A user's own model.
        user_tokenizer:
            A user's own tokenizer used with the own model. This must be an instance with the ``__call__`` method.
            This method must take an iterable of sentences (``List[str]``) and must return a python dictionary
            containing ``"input_ids"`` and ``"attention_mask"`` represented by :class:`~torch.Tensor`.
            It is up to the user's model of whether ``"input_ids"`` is a :class:`~torch.Tensor` of input ids
            or embedding vectors. his tokenizer must prepend an equivalent of ``[CLS]`` token and append an equivalent
            of ``[SEP]`` token as `transformers` tokenizer does.
        user_forward_fn:
            A user's own forward function used in a combination with ``user_model``.
            This function must take ``user_model`` and a python dictionary of containing ``"input_ids"``
            and ``"attention_mask"`` represented by :class:`~torch.Tensor` as an input and return the model's output
            represented by the single :class:`~torch.Tensor`.
        verbose: An indication of whether a progress bar to be displayed during the embeddings' calculation.
        idf: An indication of whether normalization using inverse document frequencies should be used.
        device: A device to be used for calculation.
        max_length: A maximum length of input sequences. Sequences longer than ``max_length`` are to be trimmed.
        batch_size: A batch size used for model processing.
        num_threads: A number of threads to use for a dataloader.
        return_hash: An indication of whether the correspodning ``hash_code`` should be returned.
        lang: A language of input sentences. It is used when the scores are rescaled with a baseline.
        rescale_with_baseline:
            An indication of whether bertscore should be rescaled with a pre-computed baseline.
            When a pretrained model from ``transformers`` model is used, the corresponding baseline is downloaded
            from the original ``bert-score`` package from `BERT_score`_ if available.
            In other cases, please specify a path to the baseline csv/tsv file, which must follow the formatting
            of the files from `BERT_score`_
        baseline_path: A path to the user's own local csv/tsv file with the baseline scale.
        baseline_url: A url path to the user's own  csv/tsv file with the baseline scale.

    Returns:
        Python dictionary containing the keys ``precision``, ``recall`` and ``f1`` with corresponding values.

    Raises:
        ValueError:
            If ``len(preds) != len(target)``.
        ModuleNotFoundError:
            If `tqdm` package is required and not installed.
        ModuleNotFoundError:
            If ``transformers`` package is required and not installed.
        ValueError:
            If ``num_layer`` is larger than the number of the model layers.
        ValueError:
            If invalid input is provided.

    Example:
        >>> from torchmetrics.functional.text.bert import bert_score
        >>> preds = ["hello there", "general kenobi"]
        >>> target = ["hello there", "master kenobi"]
        >>> score = bert_score(preds, target)
        >>> from pprint import pprint
        >>> rounded_score = {k: [round(v, 3) for v in vv] for k, vv in score.items()}
        >>> pprint(rounded_score)
        {'f1': [1.0, 0.996], 'precision': [1.0, 0.996], 'recall': [1.0, 0.996]}
    z=Number of predicted and reference sententes must be the same!zcAn argument `verbose = True` requires `tqdm` package be installed. Install with `pip install tqdm`.Nz`bert_score` metric with default models requires `transformers` package be installed. Either install with `pip install transformers>=4.0` or `pip install torchmetrics[text]`.zThe argument `model_name_or_path` was not specified while it is required when default `transformers` model are used.It is, therefore, used the default recommended model - .znum_layers=z is forbidden for z. Please use num_layers <= zXIt was not possible to retrieve the parameter `num_layers` from the model specification.c                 s   s&    | ]}t |tot|d kV  qdS r   N)
isinstancelistlenr0   textr   r   r   	<genexpr>s  s   $ zbert_score.<locals>.<genexpr>c                 s   s4    | ]}t |tot|d kot |d  tV  qdS r   )r   r   r   strr   r   r   r   r   t  s    $
c                 s   s(    | ]}t |tot |d  tV  qdS )r*   N)r   dictr
   r   r   r   r   r   w  s    
z%Predictions and references are empty.r[   )rM   rZ   f1hash)r'   )r'   
tokens_idfr'   zInvalid input provided.)r   num_workersr   )r   r:   r   ModuleNotFoundErrorr   r	   r   r   r   r   evaltoconfignum_hidden_layersAttributeErrorallupdaterd   r   r   r   r   r   rJ   r   r_   r   tolist)$r   r   r`   r%   r&   r#   r   r)   r(   r'   r$   r   r   r   r   r   r   re   ry   	tokenizer_are_empty_lists_are_valid_lists_are_valid_tensorsoutput_dictrw   target_datasetpreds_datasettarget_loaderpreds_loaderrW   rY   rV   rX   rM   rZ   r^   r   r   r   r       s   ]



)r   N)NNFFFN)NNF)r   NNN)NF)NNFNNNFFNr   r   r   Fr   FNN)4rp   r~   typingr   r   r   r   r   r   r   warningsr	   r7   r
   torch.nnr   torch.utils.datar   4torchmetrics.functional.text.helper_embedding_metricr   r   r   r   r   r   r   torchmetrics.utilities.checksr   r   torchmetrics.utilities.importsr   r   r   transformersr   r   r   __doctest_skip__intr   r$   boolrJ   rU   r_   rd   rx   r   r   r   rh   r    r   r   r   r   <module>   s.  $$	
	


O

$

	
