o
    .wi*V                  0   @   s  d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlm	Z	m
Z
mZmZmZ d dlZd dlmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZmZ d d	lmZ d d
lmZm Z  d dl!m"Z"m#Z# eded fddZ$dZ%e#rd dl&m'Z'm(Z( dJddZ)ere e)sdgZ*ndgZ*						dKdede+dedeee,ej-f  dee+ de.de.de.dee
ee/e,ef gef  de0eef fddZ1d ed!e,d"edefd#d$Z2d%ed&ed'ed(ede0eeef f
d)d*Z3dLd+ee, dee+ de.de,fd,d-Z4d.e,defd/d0Z5d1e,defd2d3Z6	4			dMd5e,d+ee, d.ee, d1ee, dee f
d6d7Z7		dNd8ed9ed:ed;edee+ de.de0eeef fd<d=Z8										>	?	 		4				dOd@ee,ee, e/e,ef f dAee,ee, e/e,ef f d+ee, dee+ de.dee dBe	dee
ee/e,ef gef  de.de.deee,ej-f  dCe+dDe+dEe+dFe.d5e,dGe.d.ee, d1ee, dHe.de/e,eee9e: e,f f f*dIdZ;dS )P    N)IteratorSequence)contextmanager)AnyCallableListOptionalUnion)Tensor)Module)
DataLoader)TextDatasetTokenizedDataset_check_shape_of_model_output_get_progress_bar_input_data_collator_output_data_collator*_process_attention_mask_for_special_tokens)rank_zero_warn)_SKIP_SLOW_DOCTEST_try_proceed_with_timeout)_TQDM_AVAILABLE_TRANSFORMERS_GREATER_EQUAL_4_4returnc               	   c   sD    t d} |  }z| t j dV  W | | dS | | w )z]Ignore irrelevant fine-tuning warning from transformers when loading the model for BertScore.ztransformers.modeling_utilsN)logging	getLoggergetEffectiveLevelsetLevelERROR)loggeroriginal_level r!   ^/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/torchmetrics/functional/text/bert.py_ignore_log_warning(   s   
r#   zroberta-large)	AutoModelAutoTokenizerc                   C   s@   t   tt tt W d   dS 1 sw   Y  dS )zDownload intensive operations.N)r#   r%   from_pretrained_DEFAULT_MODELr$   r!   r!   r!   r"   _download_model_for_bert_score:   s   
"r(   
bert_scoreF
dataloader
target_lenmodeldevice
num_layers
all_layersidfverboseuser_forward_fnc	              	   C   s  g }	g }
t | |D ]}t X t||}|sB|s0||d |d dd}|j|dur,|nd }n|||}t||d  |d}n|rHtd||d |d dd}tjd	d
 |jD dd}W d   n1 sjw   Y  ||j	ddd }t
||d |\}}t|}td||}|	|  |r|d | n||j}||jddd }|
|  q	t|	}t|
}||fS )a<  Calculate sentence embeddings and the inverse-document-frequency scaling factor.

    Args:
        dataloader: dataloader instance.
        target_len: A length of the longest sequence in the data. Used for padding the model output.
        model: BERT model.
        device: A device to be used for calculation.
        num_layers: The layer of representation to use.
        all_layers: An indication whether representation from all model layers should be used for BERTScore.
        idf: An Indication whether normalization using inverse document frequencies should be used.
        verbose: An indication of whether a progress bar to be displayed during the embeddings' calculation.
        user_forward_fn:
            A user's own forward function used in a combination with ``user_model``. This function must
            take ``user_model`` and a python dictionary of containing ``"input_ids"`` and ``"attention_mask"``
            represented by :class:`~torch.Tensor` as an input and return the model's output represented by the single
            :class:`~torch.Tensor`.

    Return:
        A tuple of :class:`~torch.Tensor`s containing the model's embeddings and the normalized tokens IDF.
        When ``idf = False``, tokens IDF is not calculated, and a matrix of mean weights is returned instead.
        For a single sentence, ``mean_weight = 1/seq_len``, where ``seq_len`` is a sum over the corresponding
        ``attention_mask``.

    Raises:
        ValueError:
            If ``all_layers = True`` and a model, which is not from the ``transformers`` package, is used.

    	input_idsattention_maskT)output_hidden_statesN   zQThe option `all_layers=True` can be used only with default `transformers` models.c                 S   s   g | ]}| d qS )r7   )	unsqueeze).0or!   r!   r"   
<listcomp>   s    z1_get_embeddings_and_idf_scale.<locals>.<listcomp>dimzblsd, bs -> blsdinput_ids_idf)keepdim)r   torchno_gradr   hidden_statesr   r8   
ValueErrorcatnormr   r   einsumappendcputypedtypesum)r*   r+   r,   r-   r.   r/   r0   r1   r2   embeddings_listidf_scale_listbatchoutr4   processed_attention_maskr>   
embeddings	idf_scaler!   r!   r"   _get_embeddings_and_idf_scaleF   s@   '




rS   cos_simmetricrR   c                 C   sB   |dkrdnd}| j |dj}td||d}|dd S )	zOCalculate precision or recall, transpose it and scale it with idf_scale factor.	precision      r<   zbls, bs -> blsr6   r   r7   )maxvaluesr@   rF   rK   	transposesqueeze)rT   rU   rR   r=   resr!   r!   r"   _get_scaled_precision_or_recall   s   r^   preds_embeddingstarget_embeddingspreds_idf_scaletarget_idf_scalec                 C   sV   t d| |}t|d|}t|d|}d| | ||  }|t |d}|||fS )a  Calculate precision, recall and F1 score over candidate and reference sentences.

    Args:
        preds_embeddings: Embeddings of candidate sentences.
        target_embeddings: Embeddings of reference sentences.
        preds_idf_scale: An IDF scale factor for candidate sentences.
        target_idf_scale: An IDF scale factor for reference sentences.

    Return:
        Tensors containing precision, recall and F1 score, respectively.

    zblpd, blrd -> blprrV   recallrX           )r@   rF   r^   masked_fillisnan)r_   r`   ra   rb   rT   rV   rc   f1_scorer!   r!   r"   _get_precision_recall_f1   s   
rh   model_name_or_pathc                 C   s   |  d| |rd S d S )z,Compute `BERT_score`_ (copied and adjusted)._L_idfz_no-idfr!   )ri   r.   r0   r!   r!   r"   	_get_hash   s   rl   baseline_pathc                 C   s^   t | }t|}dd t|D }W d   n1 sw   Y  t|ddddf S )zqRead baseline from csv file from the local file.

    This method implemented to avoid `pandas` dependency.

    c                 S   s&   g | ]\}}|d krdd |D qS )r   c                 S      g | ]}t |qS r!   floatr9   itemr!   r!   r"   r;          z8_read_csv_from_local_file.<locals>.<listcomp>.<listcomp>r!   r9   idxrowr!   r!   r"   r;      s   & z-_read_csv_from_local_file.<locals>.<listcomp>Nr7   )opencsvreader	enumerater@   tensor)rm   fnamecsv_filebaseline_listr!   r!   r"   _read_csv_from_local_file   s
   

r   baseline_urlc                 C   s\   t j| }dd t|D }t|ddddf W  d   S 1 s'w   Y  dS )ziRead baseline from csv file from URL.

    This method is implemented to avoid `pandas` dependency.

    c                 S   s6   g | ]\}}|d krdd |  ddD qS )r   c                 S   rn   r!   ro   rq   r!   r!   r"   r;      rs   z1_read_csv_from_url.<locals>.<listcomp>.<listcomp>zutf-8,)stripdecodesplitrt   r!   r!   r"   r;      s
    z&_read_csv_from_url.<locals>.<listcomp>Nr7   )urllibrequesturlopenrz   r@   r{   )r   http_requestr~   r!   r!   r"   _read_csv_from_url   s   $r   enlangc                 C   sZ   |rt |}|S |rt|}|S | r'|r'd}| d|  d| d}t|}|S td dS )z<Load a CSV file with the baseline values used for rescaling.zWhttps://raw.githubusercontent.com/Tiiiger/bert_score/master/bert_score/rescale_baseline/z.tsvzFBaseline was not successfully loaded. No baseline is going to be used.N)r   r   r   )r   ri   rm   r   baselineurl_baser!   r!   r"   _load_baseline   s   
r   rV   rc   rg   r   c                 C   sd   |du r
|du r
d}t j| ||gdd}|r|dn|| }|| d|  }|d |d |d fS )	z<Rescale the computed metrics with the pre-computed baseline.NFr6   r<   r7   ).r   ).r7   ).rX   )r@   stackr8   )rV   rc   rg   r   r.   r/   all_metricsbaseline_scaler!   r!   r"   _rescale_metrics_with_baseline   s   	r      @   predstargetuser_tokenizer
max_length
batch_sizenum_threadsreturn_hashrescale_with_baseline
truncationc           %   
   C   sn  t | t |krtdt |  dt | t| tttfs"t| } t|tttfs.t|}t|	ts;td|	 d|rCtsCtd|du r{t	sMtd|du rYt
dt d t  t|pbt}t|pit}W d   n1 suw   Y  n|}|  ||
 z+t|jd	rt|jjtr|r||jjkrtd
| d| d|jj nt
d W n ty   t
d Y nw tdd | |fD }tdd | |fD }tdd | |fD }|rt
d dgdgdgd}|r|dt|||	i |S |rt||||nd}|r"t||||	|d}t| |||	|j|d}n |r>tdi |d|	i}tdi | |	|jd}ntdt|||d}t|||d}t||j ||
|||	||	\}}t||j ||
|||	||	\} }!| |j!j" } ||j!j" }|!|j!j" }!||j!j" }t#| ||!|\}"}#}$|durt$|"|#|$|||\}"}#}$|"|#|$d}|r|dt|||	i |S )a  `Bert_score Evaluating Text Generation`_ for text similirity matching.

    This metric leverages the pre-trained contextual embeddings from BERT and matches words in candidate and reference
    sentences by cosine similarity. It has been shown to correlate with human judgment on sentence-level and
    system-level evaluation. Moreover, BERTScore computes precision, recall, and F1 measure, which can be useful for
    evaluating different language generation tasks.

    This implementation follows the original implementation from `BERT_score`_.

    Args:
        preds: Either an iterable of predicted sentences or a ``Dict[input_ids, attention_mask]``.
        target: Either an iterable of target sentences or a  ``Dict[input_ids, attention_mask]``.
        model_name_or_path: A name or a model path used to load ``transformers`` pretrained model.
        num_layers: A layer of representation to use.
        all_layers:
            An indication of whether the representation from all model's layers should be used.
            If ``all_layers = True``, the argument ``num_layers`` is ignored.
        model: A user's own model.
        user_tokenizer:
            A user's own tokenizer used with the own model. This must be an instance with the ``__call__`` method.
            This method must take an iterable of sentences (``List[str]``) and must return a python dictionary
            containing ``"input_ids"`` and ``"attention_mask"`` represented by :class:`~torch.Tensor`.
            It is up to the user's model of whether ``"input_ids"`` is a :class:`~torch.Tensor` of input ids
            or embedding vectors. his tokenizer must prepend an equivalent of ``[CLS]`` token and append an equivalent
            of ``[SEP]`` token as `transformers` tokenizer does.
        user_forward_fn:
            A user's own forward function used in a combination with ``user_model``.
            This function must take ``user_model`` and a python dictionary of containing ``"input_ids"``
            and ``"attention_mask"`` represented by :class:`~torch.Tensor` as an input and return the model's output
            represented by the single :class:`~torch.Tensor`.
        verbose: An indication of whether a progress bar to be displayed during the embeddings' calculation.
        idf: An indication of whether normalization using inverse document frequencies should be used.
        device: A device to be used for calculation.
        max_length: A maximum length of input sequences. Sequences longer than ``max_length`` are to be trimmed.
        batch_size: A batch size used for model processing.
        num_threads: A number of threads to use for a dataloader.
        return_hash: An indication of whether the correspodning ``hash_code`` should be returned.
        lang: A language of input sentences. It is used when the scores are rescaled with a baseline.
        rescale_with_baseline:
            An indication of whether bertscore should be rescaled with a pre-computed baseline.
            When a pretrained model from ``transformers`` model is used, the corresponding baseline is downloaded
            from the original ``bert-score`` package from `BERT_score`_ if available.
            In other cases, please specify a path to the baseline csv/tsv file, which must follow the formatting
            of the files from `BERT_score`_
        baseline_path: A path to the user's own local csv/tsv file with the baseline scale.
        baseline_url: A url path to the user's own  csv/tsv file with the baseline scale.
        truncation: An indication of whether the input sequences should be truncated to the maximum length.

    Returns:
        Python dictionary containing the keys ``precision``, ``recall`` and ``f1`` with corresponding values.

    Raises:
        ValueError:
            If ``len(preds) != len(target)``.
        ModuleNotFoundError:
            If `tqdm` package is required and not installed.
        ModuleNotFoundError:
            If ``transformers`` package is required and not installed.
        ValueError:
            If ``num_layer`` is larger than the number of the model layers.
        ValueError:
            If invalid input is provided.

    Example:
        >>> from pprint import pprint
        >>> from torchmetrics.functional.text.bert import bert_score
        >>> preds = ["hello there", "general kenobi"]
        >>> target = ["hello there", "master kenobi"]
        >>> pprint(bert_score(preds, target))
        {'f1': tensor([1.0000, 0.9961]), 'precision': tensor([1.0000, 0.9961]), 'recall': tensor([1.0000, 0.9961])}

    zLExpected number of predicted and reference sententes to be the same, but gotz and z1Expected argument `idf` to be a boolean, but got .zcAn argument `verbose = True` requires `tqdm` package be installed. Install with `pip install tqdm`.Nz`bert_score` metric with default models requires `transformers` package be installed. Either install with `pip install transformers>=4.4` or `pip install torchmetrics[text]`.zThe argument `model_name_or_path` was not specified while it is required when default `transformers` model are used.It is, therefore, used the default recommended model - num_hidden_layersznum_layers=z is forbidden for z. Please use num_layers <= zhModel config does not have `num_hidden_layers` as an integer attribute. Unable to validate `num_layers`.zXIt was not possible to retrieve the parameter `num_layers` from the model specification.c                 s   s&    | ]}t |tot|d kV  qdS r   N)
isinstancelistlenr9   textr!   r!   r"   	<genexpr>  s   $ zbert_score.<locals>.<genexpr>c                 s   s4    | ]}t |tot|d kot |d  tV  qdS r   )r   r   r   strr   r!   r!   r"   r     s    $
c                 s   s(    | ]}t |tot |d  tV  qdS )r3   N)r   dictr
   r   r!   r!   r"   r     s    
z%Predictions and references are empty.rd   )rV   rc   f1hash)r0   r   )r0   
tokens_idfr   r0   )r0   r   zInvalid input provided.)r   num_workersr!   )%r   rC   r   r   r   r   boolr   ModuleNotFoundErrorr   r   r'   r#   r%   r&   r$   evaltohasattrconfigr   intAttributeErrorallupdaterl   r   r   r   r   r   rS   r   datasetsorting_indicesrh   r   )%r   r   ri   r.   r/   r,   r   r2   r1   r0   r-   r   r   r   r   r   r   rm   r   r   	tokenizer_are_empty_lists_are_valid_lists_are_valid_tensorsoutput_dictr   target_datasetpreds_datasettarget_loaderpreds_loaderr`   rb   r_   ra   rV   rc   rg   r!   r!   r"   r)     s   ^




)r   N)NNFFFN)NNF)r   NNN)NF)NNFNNNFFNr   r   r   Fr   FNNF)<rx   r   r   collections.abcr   r   
contextlibr   typingr   r   r   r   r	   r@   r
   torch.nnr   torch.utils.datar   4torchmetrics.functional.text.helper_embedding_metricr   r   r   r   r   r   r   torchmetrics.utilitiesr   torchmetrics.utilities.checksr   r   torchmetrics.utilities.importsr   r   r#   r'   transformersr$   r%   r(   __doctest_skip__r   r   r-   r   r   tuplerS   r^   rh   rl   r   r   r   r   r   rp   r)   r!   r!   r!   r"   <module>   s>  $	
	


Q	
$

	
