o
    .wi                     @   s   d dl Z d dlmZ d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
mZmZ dedefd	d
Z		ddedededed dee defddZdedefddZ		ddededed dee def
ddZ		ddeded dee defddZdS )    N)Optional)Tensor)Literal)#_multiclass_confusion_matrix_update)_drop_empty_rows_and_cols_handle_nan_in_data_nominal_input_validationconfmatreturnc                 C   sV   t | } |  }| | }| d| }|dd|jd }t|t||  S )a  Compute Conditional Entropy Statistic based on a pre-computed confusion matrix.

    .. math::
        H(X|Y) = \sum_{x, y ~ (X, Y)} p(x, y)\frac{p(y)}{p(x, y)}

    Args:
        confmat: Confusion matrix for observed data

    Returns:
        Conditional Entropy Value

       )r   sum	unsqueezerepeatshapetorchnansumlog)r	   total_occurrencesp_xy_mp_yp_y_m r   e/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/torchmetrics/functional/nominal/theils_u.py_conditional_entropy_compute   s   r   replace        predstargetnum_classesnan_strategy)r   dropnan_replace_valuec                 C   sN   | j dkr
| dn| } |j dkr|dn|}t| |||\} }t| ||S )a  Compute the bins to update the confusion matrix with for Theil's U calculation.

    Args:
        preds: 1D or 2D tensor of categorical (nominal) data
        target: 1D or 2D tensor of categorical (nominal) data
        num_classes: Integer specifying the number of classes
        nan_strategy: Indication of whether to replace or drop ``NaN`` values
        nan_replace_value: Value to replace ``NaN`s when ``nan_strategy = 'replace```

    Returns:
        Non-reduced confusion matrix

       r   )ndimargmaxr   r   )r   r   r   r   r!   r   r   r   _theils_u_update7   s   r%   c                 C   s`   t | } t| }|  }| d| }t|t|  }|dkr*tjd| jdS || | S )zCompute Theil's U statistic based on a pre-computed confusion matrix.

    Args:
        confmat: Confusion matrix for observed data

    Returns:
        Theil's U statistic

    r   device)r   r   r   r   r   tensorr'   )r	   s_xyr   p_xs_xr   r   r   _theils_u_computeQ   s   
r,   c                 C   s.   t t| |g }t| ||||}t|S )aG  Compute `Theils Uncertainty coefficient`_ statistic measuring the association between two nominal data series.

    .. math::
        U(X|Y) = \frac{H(X) - H(X|Y)}{H(X)}

    where :math:`H(X)` is entropy of variable :math:`X` while :math:`H(X|Y)` is the conditional entropy of :math:`X`
    given :math:`Y`.

    Theils's U is an asymmetric coefficient, i.e. :math:`TheilsU(preds, target) \neq TheilsU(target, preds)`.

    The output values lies in [0, 1]. 0 means y has no information about x while value 1 means y has complete
    information about x.

    Args:
        preds: 1D or 2D tensor of categorical (nominal) data
            - 1D shape: (batch_size,)
            - 2D shape: (batch_size, num_classes)
        target: 1D or 2D tensor of categorical (nominal) data
            - 1D shape: (batch_size,)
            - 2D shape: (batch_size, num_classes)
        nan_strategy: Indication of whether to replace or drop ``NaN`` values
        nan_replace_value: Value to replace ``NaN``s when ``nan_strategy = 'replace'``

    Returns:
        Tensor containing Theil's U statistic

    Example:
        >>> from torch import randint
        >>> from torchmetrics.functional.nominal import theils_u
        >>> preds = randint(10, (10,))
        >>> target = randint(10, (10,))
        >>> theils_u(preds, target)
        tensor(0.8530)

    )lenr   catuniquer%   r,   )r   r   r   r!   r   r	   r   r   r   theils_ul   s   )r0   matrixc                 C   s   t || | jd }tj||| jd}tt|dD ]9\}}| dd|f | dd|f }}tt	||g
 }	t|||	||}
t|
|||f< t|
j|||f< q|S )a  Compute `Theil's U`_ statistic between a set of multiple variables.

    This can serve as a convenient tool to compute Theil's U statistic for analyses of correlation between categorical
    variables in your dataset.

    Args:
        matrix: A tensor of categorical (nominal) data, where:
            - rows represent a number of data points
            - columns represent a number of categorical (nominal) features
        nan_strategy: Indication of whether to replace or drop ``NaN`` values
        nan_replace_value: Value to replace ``NaN``s when ``nan_strategy = 'replace'``

    Returns:
        Theil's U statistic for a dataset of categorical variables

    Example:
        >>> from torch import randint
        >>> from torchmetrics.functional.nominal import theils_u_matrix
        >>> matrix = randint(0, 4, (200, 5))
        >>> theils_u_matrix(matrix)
        tensor([[1.0000, 0.0202, 0.0142, 0.0196, 0.0353],
                [0.0202, 1.0000, 0.0070, 0.0136, 0.0065],
                [0.0143, 0.0070, 1.0000, 0.0125, 0.0206],
                [0.0198, 0.0137, 0.0125, 1.0000, 0.0312],
                [0.0352, 0.0065, 0.0204, 0.0308, 1.0000]])

    r   r&   r"   N)r   r   r   onesr'   	itertoolscombinationsranger-   r.   r/   r%   r,   T)r1   r   r!   num_variablestheils_u_matrix_valueijxyr   r	   r   r   r   theils_u_matrix   s   
 
"r=   )r   r   )r3   typingr   r   r   typing_extensionsr   7torchmetrics.functional.classification.confusion_matrixr   %torchmetrics.functional.nominal.utilsr   r   r   r   intfloatr%   r,   r0   r=   r   r   r   r   <module>   s`   

0