o
    yi                     @   s  d dl Z d dlmZmZ d dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZmZmZ dedefd	d
Z		ddedededed deeeef  defddZdedefddZ		ddededed deeeef  def
ddZ		ddeded deeeef  defddZdS )    N)OptionalUnion)Tensor)Literal)#_multiclass_confusion_matrix_update)_drop_empty_rows_and_cols_handle_nan_in_data_nominal_input_validationconfmatreturnc                 C   sV   t | } |  }| | }| d| }|dd|jd }t|t||  S )a  Compute Conditional Entropy Statistic based on a pre-computed confusion matrix.

    .. math::
        H(X|Y) = \sum_{x, y ~ (X, Y)} p(x, y)\frac{p(y)}{p(x, y)}

    Args:
        confmat: Confusion matrix for observed data

    Returns:
        Conditional Entropy Value
       )r   sum	unsqueezerepeatshapetorchnansumlog)r
   total_occurrencesp_xy_mp_yp_y_m r   \/home/ubuntu/.local/lib/python3.10/site-packages/torchmetrics/functional/nominal/theils_u.py_conditional_entropy_compute   s   r   replace        predstargetnum_classesnan_strategy)r   dropnan_replace_valuec                 C   sN   | j dkr
| dn| } |j dkr|dn|}t| |||\} }t| ||S )a  Computes the bins to update the confusion matrix with for Theil's U calculation.

    Args:
        preds: 1D or 2D tensor of categorical (nominal) data
        target: 1D or 2D tensor of categorical (nominal) data
        num_classes: Integer specifing the number of classes
        nan_strategy: Indication of whether to replace or drop ``NaN`` values
        nan_replace_value: Value to replace ``NaN`s when ``nan_strategy = 'replace```

    Returns:
        Non-reduced confusion matrix
       r   )ndimargmaxr   r   )r   r   r   r    r"   r   r   r   _theils_u_update6   s   r&   c                 C   s`   t | } t| }|  }| d| }t|t|  }|dkr*tjd| jdS || | S )zCompute Theil's U statistic based on a pre-computed confusion matrix.

    Args:
        confmat: Confusion matrix for observed data

    Returns:
        Theil's U statistic
    r   device)r   r   r   r   r   tensorr(   )r
   s_xyr   p_xs_xr   r   r   _theils_u_computeO   s   	r-   c                 C   s.   t t| |g }t| ||||}t|S )aW  Compute `Theil's U`_ statistic (Uncertainty Coefficient) measuring the association between two categorical
    (nominal) data series.

    .. math::
        U(X|Y) = \frac{H(X) - H(X|Y)}{H(X)}

    where :math:`H(X)` is entropy of variable :math:`X` while :math:`H(X|Y)` is the conditional entropy of :math:`X`
    given :math:`Y`.

    Theils's U is an asymmetric coefficient, i.e. :math:`TheilsU(preds, target) \neq TheilsU(target, preds)`.

    The output values lies in [0, 1]. 0 means y has no information about x while value 1 means y has complete
    information about x.

    Args:
        preds: 1D or 2D tensor of categorical (nominal) data
            - 1D shape: (batch_size,)
            - 2D shape: (batch_size, num_classes)
        target: 1D or 2D tensor of categorical (nominal) data
            - 1D shape: (batch_size,)
            - 2D shape: (batch_size, num_classes)
        nan_strategy: Indication of whether to replace or drop ``NaN`` values
        nan_replace_value: Value to replace ``NaN``s when ``nan_strategy = 'replace'``

    Returns:
        Theil's U Statistic: Tensor

    Example:
        >>> from torchmetrics.functional import theils_u
        >>> _ = torch.manual_seed(42)
        >>> preds = torch.randint(10, (10,))
        >>> target = torch.randint(10, (10,))
        >>> theils_u(preds, target)
        tensor(0.8530)
    )lenr   catuniquer&   r-   )r   r   r    r"   r   r
   r   r   r   theils_ui   s   )r1   matrixc                 C   s   t || | jd }tj||| jd}tt|dD ]9\}}| dd|f | dd|f }}tt	||g
 }	t|||	||}
t|
|||f< t|
j|||f< q|S )a  Compute `Theil's U`_ statistic between a set of multiple variables.

    This can serve as a convenient tool to compute Theil's U statistic for analyses of correlation between categorical
    variables in your dataset.

    Args:
        matrix: A tensor of categorical (nominal) data, where:
            - rows represent a number of data points
            - columns represent a number of categorical (nominal) features
        nan_strategy: Indication of whether to replace or drop ``NaN`` values
        nan_replace_value: Value to replace ``NaN``s when ``nan_strategy = 'replace'``

    Returns:
        Theil's U statistic for a dataset of categorical variables

    Example:
        >>> from torchmetrics.functional.nominal import theils_u_matrix
        >>> _ = torch.manual_seed(42)
        >>> matrix = torch.randint(0, 4, (200, 5))
        >>> theils_u_matrix(matrix)
        tensor([[1.0000, 0.0202, 0.0142, 0.0196, 0.0353],
                [0.0202, 1.0000, 0.0070, 0.0136, 0.0065],
                [0.0143, 0.0070, 1.0000, 0.0125, 0.0206],
                [0.0198, 0.0137, 0.0125, 1.0000, 0.0312],
                [0.0352, 0.0065, 0.0204, 0.0308, 1.0000]])
    r   r'   r#   N)r	   r   r   onesr(   	itertoolscombinationsranger.   r/   r0   r&   r-   T)r2   r    r"   num_variablestheils_u_matrix_valueijxyr   r
   r   r   r   theils_u_matrix   s   

"r>   )r   r   )r4   typingr   r   r   r   typing_extensionsr   7torchmetrics.functional.classification.confusion_matrixr   %torchmetrics.functional.nominal.utilsr   r   r	   r   intfloatr&   r-   r1   r>   r   r   r   r   <module>   s`   

0