o
    yiZ                     @   sb  d dl mZmZmZmZ d dlZd dlmZmZ d dlm	Z	 d dl
mZmZmZmZmZmZmZmZmZmZmZmZ d dlmZmZmZ d dlmZmZ d dlmZ d d	l m!Z! 	
	d6deeee f deeee f dee	d  dee def
ddZ"			d7dee# deee$ee# ef  dee$ ddfddZ%		d8deeeeef f dee dee# de$deeeeeef f f
ddZ&				d9dededee# deee$ee# ef  dee$ d e'deeeef fd!d"Z(	
		d:d#e$dee	d  deee$ee# ef  dee$ ddf
d$d%Z)	
	d6deeeeef f d#e$dee	d  dee def
d&d'Z*	
			d;deded#e$dee	d  deee$ee# ef  dee$ d e'defd(d)Z+		d<d*e$dee	d+  deee$ee# ef  dee$ ddf
d,d-Z,	d=deeeeef f d*e$dee	d+  dee dee$ deeeeef ef fd.d/Z-	
			d;deded*e$dee	d+  deee$ee# ef  dee$ d e'deeeeef eee ee ee f f fd0d1Z.				
			d>deded2e	d3 deee$ee# ef  d#ee$ d*ee$ dee	d  dee# dee$ d e'deeeeeef eee ee ee f f fd4d5Z/dS )?    )ListOptionalTupleUnionN)Tensortensor)Literal)-_binary_precision_recall_curve_arg_validation%_binary_precision_recall_curve_format0_binary_precision_recall_curve_tensor_validation%_binary_precision_recall_curve_update1_multiclass_precision_recall_curve_arg_validation)_multiclass_precision_recall_curve_format4_multiclass_precision_recall_curve_tensor_validation)_multiclass_precision_recall_curve_update1_multilabel_precision_recall_curve_arg_validation)_multilabel_precision_recall_curve_format4_multilabel_precision_recall_curve_tensor_validation)_multilabel_precision_recall_curve_update)_binary_roc_compute_multiclass_roc_compute_multilabel_roc_compute)_auc_compute_without_check_safe_divide)	_bincount)rank_zero_warnmacrofprtpraverage)r   weightednoneweightsreturnc                 C   s   t | trt| |ddd}ndd t| |D }t|}|du s%|dkr'|S t| r7td| d	t	 t| }|d
krG|| 
 S |dkrb|durbt|| ||  }|| |  S td)zOUtility function for reducing multiple average precision score into one number.      ?   )axisc                 S   s   g | ]
\}}t ||d qS )r$   )r   ).0xy r*   `/home/ubuntu/.local/lib/python3.10/site-packages/torchmetrics/functional/classification/auroc.py
<listcomp>6   s    z!_reduce_auroc.<locals>.<listcomp>Nr!   zUAverage precision score for one or more classes was `nan`. Ignoring these classes in z-averager   r    zBReceived an incompatible combinations of inputs to make reduction.)
isinstancer   r   ziptorchstackisnananyr   UserWarningmeanr   sum
ValueError)r   r   r   r"   residxr*   r*   r+   _reduce_auroc,   s$   


r9   max_fpr
thresholdsignore_indexc                 C   sP   t || | d ur"t| ts$d|   k rdkr&n d S td|  d S d S d S )Nr   r%   z@Arguments `max_fpr` should be a float in range (0, 1], but got: )r	   r-   floatr6   )r:   r;   r<   r*   r*   r+   _binary_auroc_arg_validationI   s   
,r>   r%   state	pos_labelc                 C   s  t | ||\}}}|d u s|dkrt||dS t|tr|jn|d j}t||d}tj||ddd}	|||	d   ||	 ||	d    }
t||	d  ||	 |
}t	|d |	 |
dg}t	|d |	 |
dg}t||d}d|d  }dd|| ||    S )	Nr%   r$   r   )deviceT)	out_int32rightg      ?   )r   r   r-   r   rA   r   r/   	bucketizelerpcatview)r?   r;   r:   r@   r   r   __devicemax_areastopweight
interp_tprpartial_aucmin_arear*   r*   r+   _binary_auroc_computeS   s   $rQ   Tpredstargetvalidate_argsc                 C   sH   |rt ||| t| || t| |||\} }}t| ||}t|||S )a`  Compute Area Under the Receiver Operating Characteristic Curve (`ROC AUC`_) for binary tasks. The AUROC
    score summarizes the ROC curve into an single number that describes the performance of a model for multiple
    thresholds at the same time. Notably, an AUROC score of 1 is a perfect score and an AUROC score of 0.5
    corresponds to random guessing.

    Accepts the following input tensors:

    - ``preds`` (float tensor): ``(N, ...)``. Preds should be a tensor containing probabilities or logits for each
      observation. If preds has values outside [0,1] range we consider the input to be logits and will auto apply
      sigmoid per element.
    - ``target`` (int tensor): ``(N, ...)``. Target should be a tensor containing ground truth labels, and therefore
      only contain {0,1} values (except if `ignore_index` is specified). The value 1 always encodes the positive class.

    Additional dimension ``...`` will be flattened into the batch dimension.

    The implementation both supports calculating the metric in a non-binned but accurate version and a binned version
    that is less accurate but more memory efficient. Setting the `thresholds` argument to `None` will activate the
    non-binned  version that uses memory of size :math:`\mathcal{O}(n_{samples})` whereas setting the `thresholds`
    argument to either an integer, list or a 1d tensor will use a binned version that uses memory of
    size :math:`\mathcal{O}(n_{thresholds})` (constant memory).

    Args:
        preds: Tensor with predictions
        target: Tensor with true labels
        max_fpr: If not ``None``, calculates standardized partial AUC over the range ``[0, max_fpr]``.
        thresholds:
            Can be one of:

            - If set to `None`, will use a non-binned approach where thresholds are dynamically calculated from
              all the data. Most accurate but also most memory consuming approach.
            - If set to an `int` (larger than 1), will use that number of thresholds linearly spaced from
              0 to 1 as bins for the calculation.
            - If set to an `list` of floats, will use the indicated thresholds in the list as bins for the calculation
            - If set to an 1d `tensor` of floats, will use the indicated thresholds in the tensor as
              bins for the calculation.

        validate_args: bool indicating if input arguments and tensors should be validated for correctness.
            Set to ``False`` for faster computations.

    Returns:
        A single scalar with the auroc score

    Example:
        >>> from torchmetrics.functional.classification import binary_auroc
        >>> preds = torch.tensor([0, 0.5, 0.7, 0.8])
        >>> target = torch.tensor([0, 1, 1, 0])
        >>> binary_auroc(preds, target, thresholds=None)
        tensor(0.5000)
        >>> binary_auroc(preds, target, thresholds=5)
        tensor(0.5000)
    )r>   r   r
   r   rQ   )rR   rS   r:   r;   r<   rT   r?   r*   r*   r+   binary_aurocn   s   ;rU   num_classesc                 C   0   t | || d}||vrtd| d| d S )N)r   r    r!   N)Expected argument `average` to be one of 	 but got )r   r6   )rV   r   r;   r<   allowed_averager*   r*   r+    _multiclass_auroc_arg_validation   
   r[   c              	   C   s^   t | ||\}}}t||||d u rt| d |d dS | d d d dd d f ddS )Nr%   )	minlengthr   r"   )r   r9   r   r=   r5   )r?   rV   r   r;   r   r   rI   r*   r*   r+   _multiclass_auroc_compute   s   r`   c                 C   sR   |rt |||| t| ||| t| ||||\} }}t| |||}t||||S )a  Compute Area Under the Receiver Operating Characteristic Curve (`ROC AUC`_) for multiclass tasks. The AUROC
    score summarizes the ROC curve into an single number that describes the performance of a model for multiple
    thresholds at the same time. Notably, an AUROC score of 1 is a perfect score and an AUROC score of 0.5
    corresponds to random guessing.

    Accepts the following input tensors:

    - ``preds`` (float tensor): ``(N, C, ...)``. Preds should be a tensor containing probabilities or logits for each
      observation. If preds has values outside [0,1] range we consider the input to be logits and will auto apply
      softmax per sample.
    - ``target`` (int tensor): ``(N, ...)``. Target should be a tensor containing ground truth labels, and therefore
      only contain values in the [0, n_classes-1] range (except if `ignore_index` is specified).

    Additional dimension ``...`` will be flattened into the batch dimension.

    The implementation both supports calculating the metric in a non-binned but accurate version and a binned version
    that is less accurate but more memory efficient. Setting the `thresholds` argument to `None` will activate the
    non-binned  version that uses memory of size :math:`\mathcal{O}(n_{samples})` whereas setting the `thresholds`
    argument to either an integer, list or a 1d tensor will use a binned version that uses memory of
    size :math:`\mathcal{O}(n_{thresholds} \times n_{classes})` (constant memory).

    Args:
        preds: Tensor with predictions
        target: Tensor with true labels
        num_classes: Integer specifing the number of classes
        average:
            Defines the reduction that is applied over classes. Should be one of the following:

            - ``macro``: Calculate score for each class and average them
            - ``weighted``: Calculates score for each class and computes weighted average using their support
            - ``"none"`` or ``None``: Calculates score for each class and applies no reduction
        thresholds:
            Can be one of:

            - If set to `None`, will use a non-binned approach where thresholds are dynamically calculated from
              all the data. Most accurate but also most memory consuming approach.
            - If set to an `int` (larger than 1), will use that number of thresholds linearly spaced from
              0 to 1 as bins for the calculation.
            - If set to an `list` of floats, will use the indicated thresholds in the list as bins for the calculation
            - If set to an 1d `tensor` of floats, will use the indicated thresholds in the tensor as
              bins for the calculation.

        validate_args: bool indicating if input arguments and tensors should be validated for correctness.
            Set to ``False`` for faster computations.

    Returns:
        If `average=None|"none"` then a 1d tensor of shape (n_classes, ) will be returned with auroc score per class.
        If `average="macro"|"weighted"` then a single scalar is returned.

    Example:
        >>> from torchmetrics.functional.classification import multiclass_auroc
        >>> preds = torch.tensor([[0.75, 0.05, 0.05, 0.05, 0.05],
        ...                       [0.05, 0.75, 0.05, 0.05, 0.05],
        ...                       [0.05, 0.05, 0.75, 0.05, 0.05],
        ...                       [0.05, 0.05, 0.05, 0.75, 0.05]])
        >>> target = torch.tensor([0, 1, 3, 2])
        >>> multiclass_auroc(preds, target, num_classes=5, average="macro", thresholds=None)
        tensor(0.5333)
        >>> multiclass_auroc(preds, target, num_classes=5, average=None, thresholds=None)
        tensor([1.0000, 1.0000, 0.3333, 0.3333, 0.0000])
        >>> multiclass_auroc(preds, target, num_classes=5, average="macro", thresholds=5)
        tensor(0.5333)
        >>> multiclass_auroc(preds, target, num_classes=5, average=None, thresholds=5)
        tensor([1.0000, 1.0000, 0.3333, 0.3333, 0.0000])
    )r[   r   r   r   r`   )rR   rS   rV   r   r;   r<   rT   r?   r*   r*   r+   multiclass_auroc   s   J

ra   
num_labels)micror   r    r!   c                 C   rW   )N)rc   r   r    r!   NrX   rY   )r   r6   )rb   r   r;   r<   rZ   r*   r*   r+    _multilabel_auroc_arg_validation   r\   rd   c              	   C   s   |dkr>t | tr|d urt| d|d dS | d  }| d  }|d ur5||k}||  }||  }t||f|d dS t| |||\}}	}
t||	||d u r^| d dkjdd dS | d d d dd d f ddS )Nrc   r%   )r:   r   )dimr^   r_   )r-   r   rQ   r5   flattenr   r9   r=   )r?   rb   r   r;   r<   rR   rS   r8   r   r   rI   r*   r*   r+   _multilabel_auroc_compute,  s&   

rg   c                 C   sT   |rt |||| t| ||| t| ||||\} }}t| |||}t|||||S )a  Compute Area Under the Receiver Operating Characteristic Curve (`ROC AUC`_) for multilabel tasks. The AUROC
    score summarizes the ROC curve into an single number that describes the performance of a model for multiple
    thresholds at the same time. Notably, an AUROC score of 1 is a perfect score and an AUROC score of 0.5
    corresponds to random guessing.

    Accepts the following input tensors:

    - ``preds`` (float tensor): ``(N, C, ...)``. Preds should be a tensor containing probabilities or logits for each
      observation. If preds has values outside [0,1] range we consider the input to be logits and will auto apply
      sigmoid per element.
    - ``target`` (int tensor): ``(N, C, ...)``. Target should be a tensor containing ground truth labels, and therefore
      only contain {0,1} values (except if `ignore_index` is specified).

    Additional dimension ``...`` will be flattened into the batch dimension.

    The implementation both supports calculating the metric in a non-binned but accurate version and a binned version
    that is less accurate but more memory efficient. Setting the `thresholds` argument to `None` will activate the
    non-binned  version that uses memory of size :math:`\mathcal{O}(n_{samples})` whereas setting the `thresholds`
    argument to either an integer, list or a 1d tensor will use a binned version that uses memory of
    size :math:`\mathcal{O}(n_{thresholds} \times n_{labels})` (constant memory).

    Args:
        preds: Tensor with predictions
        target: Tensor with true labels
        num_labels: Integer specifing the number of labels
        average:
            Defines the reduction that is applied over labels. Should be one of the following:

            - ``micro``: Sum score over all labels
            - ``macro``: Calculate score for each label and average them
            - ``weighted``: Calculates score for each label and computes weighted average using their support
            - ``"none"`` or ``None``: Calculates score for each label and applies no reduction
        thresholds:
            Can be one of:

            - If set to `None`, will use a non-binned approach where thresholds are dynamically calculated from
              all the data. Most accurate but also most memory consuming approach.
            - If set to an `int` (larger than 1), will use that number of thresholds linearly spaced from
              0 to 1 as bins for the calculation.
            - If set to an `list` of floats, will use the indicated thresholds in the list as bins for the calculation
            - If set to an 1d `tensor` of floats, will use the indicated thresholds in the tensor as
              bins for the calculation.

        validate_args: bool indicating if input arguments and tensors should be validated for correctness.
            Set to ``False`` for faster computations.

    Returns:
        If `average=None|"none"` then a 1d tensor of shape (n_classes, ) will be returned with auroc score per class.
        If `average="micro|macro"|"weighted"` then a single scalar is returned.

    Example:
        >>> from torchmetrics.functional.classification import multilabel_auroc
        >>> preds = torch.tensor([[0.75, 0.05, 0.35],
        ...                       [0.45, 0.75, 0.05],
        ...                       [0.05, 0.55, 0.75],
        ...                       [0.05, 0.65, 0.05]])
        >>> target = torch.tensor([[1, 0, 1],
        ...                        [0, 0, 0],
        ...                        [0, 1, 1],
        ...                        [1, 1, 1]])
        >>> multilabel_auroc(preds, target, num_labels=3, average="macro", thresholds=None)
        tensor(0.6528)
        >>> multilabel_auroc(preds, target, num_labels=3, average=None, thresholds=None)
        tensor([0.6250, 0.5000, 0.8333])
        >>> multilabel_auroc(preds, target, num_labels=3, average="macro", thresholds=5)
        tensor(0.6528)
        >>> multilabel_auroc(preds, target, num_labels=3, average=None, thresholds=5)
        tensor([0.6250, 0.5000, 0.8333])
    )rd   r   r   r   rg   )rR   rS   rb   r   r;   r<   rT   r?   r*   r*   r+   multilabel_aurocI  s   N

rh   task)binary
multiclass
multilabelc
           
      C   s|   |dkrt | |||||	S |dkr"t|tsJ t| ||||||	S |dkr7t|ts-J t| ||||||	S td| )a:  Compute Area Under the Receiver Operating Characteristic Curve (`ROC AUC`_). The AUROC score summarizes the
    ROC curve into an single number that describes the performance of a model for multiple thresholds at the same
    time. Notably, an AUROC score of 1 is a perfect score and an AUROC score of 0.5 corresponds to random guessing.

    This function is a simple wrapper to get the task specific versions of this metric, which is done by setting the
    ``task`` argument to either ``'binary'``, ``'multiclass'`` or ``multilabel``. See the documentation of
    :func:`binary_auroc`, :func:`multiclass_auroc` and :func:`multilabel_auroc` for the specific details of
    each argument influence and examples.

    Legacy Example:
        >>> preds = torch.tensor([0.13, 0.26, 0.08, 0.19, 0.34])
        >>> target = torch.tensor([0, 0, 1, 1, 1])
        >>> auroc(preds, target, task='binary')
        tensor(0.5000)

        >>> preds = torch.tensor([[0.90, 0.05, 0.05],
        ...                       [0.05, 0.90, 0.05],
        ...                       [0.05, 0.05, 0.90],
        ...                       [0.85, 0.05, 0.10],
        ...                       [0.10, 0.10, 0.80]])
        >>> target = torch.tensor([0, 1, 1, 2, 2])
        >>> auroc(preds, target, task='multiclass', num_classes=3)
        tensor(0.7778)
    rj   rk   rl   z[Expected argument `task` to either be `'binary'`, `'multiclass'` or `'multilabel'` but got )rU   r-   intra   rh   r6   )
rR   rS   ri   r;   rV   rb   r   r:   r<   rT   r*   r*   r+   auroc  s   $rn   )r   N)NNN)Nr%   )NNNT)r   NN)r   NNT)NN)N)NNNr   NNT)0typingr   r   r   r   r/   r   r   typing_extensionsr   =torchmetrics.functional.classification.precision_recall_curver	   r
   r   r   r   r   r   r   r   r   r   r   *torchmetrics.functional.classification.rocr   r   r   torchmetrics.utilities.computer   r   torchmetrics.utilities.datar   torchmetrics.utilities.printsr   r9   r=   rm   r>   rQ   boolrU   r[   r`   ra   rd   rg   rh   rn   r*   r*   r*   r+   <module>   s  8




E





W



!
*
\
	
,