o
    yii@                     @   s,  d dl mZmZmZ d dlZd dlmZ d dlmZ d dlm	Z	m
Z
mZmZ dededed	eeeef fd
dZ		d/dededeeef deded	efddZ		d0deded dee d	dfddZ	d1dededee d	dfddZdeded	efddZ				d2dedededed dee ded	efd d!Z		d0d"ededed dee d	df
d#d$Z	d1deded"edee d	df
d%d&Zdeded	efd'd(Z				d2deded"ededed dee ded	efd)d*Z						d3deded+ed, deded d"ee dee ded	efd-d.ZdS )4    )OptionalTupleUnionN)Tensor)Literal)_binary_confusion_matrix_format*_binary_confusion_matrix_tensor_validation#_multiclass_confusion_matrix_format._multiclass_confusion_matrix_tensor_validationconfidences
accuraciesbin_boundariesreturnc                 C   s   |j | jd}tjt|d | j| jd}tjt|d | j| jd}tjt|d | j| jd}t| |d }|jd|t| d |jd|| d t	|| }|jd||d t	|| }||
  }|||fS )a  Compute calibration bins using ``torch.bucketize``. Use for pytorch >= 1.6.

    Args:
        confidences: The confidence (i.e. predicted prob) of the top1 prediction.
        accuracies: 1.0 if the top-1 prediction was correct, 0.0 otherwise.
        bin_boundaries: Bin boundaries separating the ``linspace`` from 0 to 1.

    Returns:
        tuple with binned accuracy, binned confidence and binned probabilities
    )dtype   )devicer   r   )dimindexsrc)tor   torchzeroslenr   	bucketizescatter_add_	ones_like
nan_to_numsum)r   r   r   acc_binconf_bin	count_binindicesprop_bin r#   l/home/ubuntu/.local/lib/python3.10/site-packages/torchmetrics/functional/classification/calibration_error.py_binning_bucketize   s   
r%   l1Fnormdebiasc           
      C   s:  t |trtjdd|d tj| jd}|dvrtd| dt  t| ||\}}}W d   n1 s7w   Y  |dkrNt	t
|| | }|S |d	kr^tt
|| }|S |d
krt	t|| d| }|r||d  | || d  d  }	|t	t|	7 }|dkrt|ntd}|S )a  Computes the calibration error given the provided bin boundaries and norm.

    Args:
        confidences: The confidence (i.e. predicted prob) of the top1 prediction.
        accuracies: 1.0 if the top-1 prediction was correct, 0.0 otherwise.
        bin_boundaries: Bin boundaries separating the ``linspace`` from 0 to 1.
        norm: Norm function to use when computing calibration error. Defaults to "l1".
        debias: Apply debiasing to L2 norm computation as in
            `Verified Uncertainty Calibration`_. Defaults to False.

    Raises:
        ValueError: If an unsupported norm function is provided.

    Returns:
        Tensor: Calibration error scalar.
    r   r   )r   r   >   r&   l2maxzNorm z6 is not supported. Please select from l1, l2, or max. Nr&   r*   r)      )
isinstanceintr   linspacefloatr   
ValueErrorno_gradr%   r   absr*   powsizer   sqrttensor)
r   r   r   r'   r(   r   r   r"   cedebias_binsr#   r#   r$   _ce_compute<   s(   


$r9   n_binsr&   r)   r*   ignore_indexc                 C   sj   t | tr	| dk rtd|  d}||vr!td| d| d|d ur1t |ts3td| d S d S )Nr   CExpected argument `n_bins` to be an integer larger than 0, but got r;   &Expected argument `norm` to be one of 
, but got .LExpected argument `ignore_index` to either be `None` or an integer, but got r,   r-   r0   )r:   r'   r<   allowed_normr#   r#   r$   (_binary_calibration_error_arg_validationl   s   rD   predstargetc                 C   s(   t | || |  std| j d S NzdExpected argument `preds` to be floating tensor with probabilities/logits but got tensor with dtype )r   is_floating_pointr0   r   )rE   rF   r<   r#   r#   r$   +_binary_calibration_error_tensor_validationz   s   rI   c                 C   s   | |}}||fS Nr#   )rE   rF   r   r   r#   r#   r$    _binary_calibration_error_update   s   
rK      Tvalidate_argsc                 C   sN   |rt ||| t| || t| |d|dd\} }t| |\}}t||||S )aY
  `Top-label Calibration Error`_ for binary tasks. The expected calibration error can be used to quantify how
    well a given model is calibrated e.g. how well the predicted output probabilities of the model matches the
    actual probabilities of the ground truth distribution.

    Three different norms are implemented, each corresponding to variations on the calibration error metric.

    .. math::
        \text{ECE} = \sum_i^N b_i \|(p_i - c_i)\|, \text{L1 norm (Expected Calibration Error)}

    .. math::
        \text{MCE} =  \max_{i} (p_i - c_i), \text{Infinity norm (Maximum Calibration Error)}

    .. math::
        \text{RMSCE} = \sqrt{\sum_i^N b_i(p_i - c_i)^2}, \text{L2 norm (Root Mean Square Calibration Error)}

    Where :math:`p_i` is the top-1 prediction accuracy in bin :math:`i`, :math:`c_i` is the average confidence of
    predictions in bin :math:`i`, and :math:`b_i` is the fraction of data points in bin :math:`i`. Bins are constructed
    in an uniform way in the [0,1] range.

    Accepts the following input tensors:

    - ``preds`` (float tensor): ``(N, ...)``. Preds should be a tensor containing probabilities or logits for each
      observation. If preds has values outside [0,1] range we consider the input to be logits and will auto apply
      sigmoid per element.
    - ``target`` (int tensor): ``(N, ...)``. Target should be a tensor containing ground truth labels, and therefore
      only contain {0,1} values (except if `ignore_index` is specified). The value 1 always encodes the positive class.

    Additional dimension ``...`` will be flattened into the batch dimension.

    Args:
        preds: Tensor with predictions
        target: Tensor with true labels
        n_bins: Number of bins to use when computing the metric.
        norm: Norm used to compare empirical and expected probability bins.
        ignore_index:
            Specifies a target value that is ignored and does not contribute to the metric calculation
        validate_args: bool indicating if input arguments and tensors should be validated for correctness.
            Set to ``False`` for faster computations.

    Example:
        >>> from torchmetrics.functional.classification import binary_calibration_error
        >>> preds = torch.tensor([0.25, 0.25, 0.55, 0.75, 0.75])
        >>> target = torch.tensor([0, 0, 1, 1, 1])
        >>> binary_calibration_error(preds, target, n_bins=2, norm='l1')
        tensor(0.2900)
        >>> binary_calibration_error(preds, target, n_bins=2, norm='l2')
        tensor(0.2918)
        >>> binary_calibration_error(preds, target, n_bins=2, norm='max')
        tensor(0.3167)
    g        F)	thresholdr<   convert_to_labels)rD   rI   r   rK   r9   )rE   rF   r:   r'   r<   rM   r   r   r#   r#   r$   binary_calibration_error   s   :

rP   num_classesc                 C   s   t | tr	| dk rtd|  t |tr|dk r td| d}||vr1td| d| d|d urAt |tsCtd	| d S d S )
Nr+   zHExpected argument `num_classes` to be an integer larger than 1, but got r   r=   r;   r>   r?   r@   rA   rB   )rQ   r:   r'   r<   rC   r#   r#   r$   ,_multiclass_calibration_error_arg_validation   s   rR   c                 C   s*   t | ||| |  std| j d S rG   )r
   rH   r0   r   )rE   rF   rQ   r<   r#   r#   r$   /_multiclass_calibration_error_tensor_validation   s   rS   c                 C   sJ   t d| k| dk s| d} | jdd\}}||}| | fS )Nr   r   )r   )r   allsoftmaxr*   eqr/   )rE   rF   r   predictionsr   r#   r#   r$   $_multiclass_calibration_error_update   s
   

rX   c           	      C   sP   |rt |||| t| ||| t| ||dd\} }t| |\}}t||||S )aW  `Top-label Calibration Error`_ for multiclass tasks. The expected calibration error can be used to quantify
    how well a given model is calibrated e.g. how well the predicted output probabilities of the model matches the
    actual probabilities of the ground truth distribution.

    Three different norms are implemented, each corresponding to variations on the calibration error metric.

    .. math::
        \text{ECE} = \sum_i^N b_i \|(p_i - c_i)\|, \text{L1 norm (Expected Calibration Error)}

    .. math::
        \text{MCE} =  \max_{i} (p_i - c_i), \text{Infinity norm (Maximum Calibration Error)}

    .. math::
        \text{RMSCE} = \sqrt{\sum_i^N b_i(p_i - c_i)^2}, \text{L2 norm (Root Mean Square Calibration Error)}

    Where :math:`p_i` is the top-1 prediction accuracy in bin :math:`i`, :math:`c_i` is the average confidence of
    predictions in bin :math:`i`, and :math:`b_i` is the fraction of data points in bin :math:`i`. Bins are constructed
    in an uniform way in the [0,1] range.

    Accepts the following input tensors:

    - ``preds`` (float tensor): ``(N, C, ...)``. Preds should be a tensor containing probabilities or logits for each
      observation. If preds has values outside [0,1] range we consider the input to be logits and will auto apply
      softmax per sample.
    - ``target`` (int tensor): ``(N, ...)``. Target should be a tensor containing ground truth labels, and therefore
      only contain values in the [0, n_classes-1] range (except if `ignore_index` is specified).

    Additional dimension ``...`` will be flattened into the batch dimension.

    Args:
        preds: Tensor with predictions
        target: Tensor with true labels
        num_classes: Integer specifing the number of classes
        n_bins: Number of bins to use when computing the metric.
        norm: Norm used to compare empirical and expected probability bins.
        ignore_index:
            Specifies a target value that is ignored and does not contribute to the metric calculation
        validate_args: bool indicating if input arguments and tensors should be validated for correctness.
            Set to ``False`` for faster computations.

    Example:
        >>> from torchmetrics.functional.classification import multiclass_calibration_error
        >>> preds = torch.tensor([[0.25, 0.20, 0.55],
        ...                       [0.55, 0.05, 0.40],
        ...                       [0.10, 0.30, 0.60],
        ...                       [0.90, 0.05, 0.05]])
        >>> target = torch.tensor([0, 1, 2, 0])
        >>> multiclass_calibration_error(preds, target, num_classes=3, n_bins=3, norm='l1')
        tensor(0.2000)
        >>> multiclass_calibration_error(preds, target, num_classes=3, n_bins=3, norm='l2')
        tensor(0.2082)
        >>> multiclass_calibration_error(preds, target, num_classes=3, n_bins=3, norm='max')
        tensor(0.2333)
    F)rO   )rR   rS   r	   rX   r9   )	rE   rF   rQ   r:   r'   r<   rM   r   r   r#   r#   r$   multiclass_calibration_error   s   ?rY   task)binary
multiclassc                 C   s^   |dusJ |dkrt | |||||S |dkr(t|tsJ t| ||||||S td| )aG  `Top-label Calibration Error`_. The expected calibration error can be used to quantify how well a given
    model is calibrated e.g. how well the predicted output probabilities of the model matches the actual
    probabilities of the ground truth distribution.

    Three different norms are implemented, each corresponding to variations on the calibration error metric.

    .. math::
        \text{ECE} = \sum_i^N b_i \|(p_i - c_i)\|, \text{L1 norm (Expected Calibration Error)}

    .. math::
        \text{MCE} =  \max_{i} (p_i - c_i), \text{Infinity norm (Maximum Calibration Error)}

    .. math::
        \text{RMSCE} = \sqrt{\sum_i^N b_i(p_i - c_i)^2}, \text{L2 norm (Root Mean Square Calibration Error)}

    Where :math:`p_i` is the top-1 prediction accuracy in bin :math:`i`, :math:`c_i` is the average confidence of
    predictions in bin :math:`i`, and :math:`b_i` is the fraction of data points in bin :math:`i`. Bins are constructed
    in an uniform way in the [0,1] range.

    This function is a simple wrapper to get the task specific versions of this metric, which is done by setting the
    ``task`` argument to either ``'binary'`` or ``'multiclass'``. See the documentation of
    :func:`binary_calibration_error` and :func:`multiclass_calibration_error` for the specific details of
    each argument influence and examples.
    Nr[   r\   zKExpected argument `task` to either be `'binary'` or `'multiclass'` but got )rP   r,   r-   rY   r0   )rE   rF   rZ   r:   r'   rQ   r<   rM   r#   r#   r$   calibration_error<  s   "r]   )r&   F)r&   NrJ   )rL   r&   NT)NrL   r&   NNT)typingr   r   r   r   r   typing_extensionsr   7torchmetrics.functional.classification.confusion_matrixr   r   r	   r
   r%   r-   strboolr9   rD   rI   rK   rP   rR   rS   rX   rY   r]   r#   r#   r#   r$   <module>   s(  
$

2


G



J	