o
    yi                     @   s   d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dlm	Z	 deded	e
d
eeef fddZddededed d
efddZ	ddeded	e
ded d
ef
ddZdS )    )TupleN)Tensor)Literal)_check_same_shape)_safe_xlogypqlog_probreturnc                 C   s   t | | | jdks|jdkrtd| j d|j d| jd }|r4tj|  | |  dd}||fS | | jddd	 } ||jddd	 }t| | | jdd}||fS )
a  Updates and returns KL divergence scores for each observation and the total number of observations. Checks
    same shape and 2D nature of the input tensors else raises ValueError.

    Args:
        p: data distribution with shape ``[N, d]``
        q: prior or approximate distribution with shape ``[N, d]``
        log_prob: bool indicating if input is log-probabilities or probabilities. If given as probabilities,
            will normalize to make sure the distributes sum to 1
       z4Expected both p and q distribution to be 2D but got z and z respectivelyr   )axisT)r   keepdim)r   ndim
ValueErrorshapetorchsumexpr   )r   r   r	   totalmeasures r   d/home/ubuntu/.local/lib/python3.10/site-packages/torchmetrics/functional/regression/kl_divergence.py_kld_update   s   


r   meanr   r   	reduction)r   r   noneNc                 C   s@   |dkr|   S |dkr|   | S |du s|dkr| S | | S )a  Computes the KL divergenece based on the type of reduction.

    Args:
        measures: Tensor of KL divergence scores for each observation
        total: Number of observations
        reduction:
            Determines how to reduce over the ``N``/batch dimension:

            - ``'mean'`` [default]: Averages score across samples
            - ``'sum'``: Sum score across samples
            - ``'none'`` or ``None``: Returns score per sample

    Example:
        >>> p = torch.tensor([[0.36, 0.48, 0.16]])
        >>> q = torch.tensor([[1/3, 1/3, 1/3]])
        >>> measures, total = _kld_update(p, q, log_prob=False)
        >>> _kld_compute(measures, total)
        tensor(0.0853)
    r   r   Nr   )r   )r   r   r   r   r   r   _kld_compute2   s   r   Fc                 C   s   t | ||\}}t|||S )a  Computes `KL divergence`_

    .. math::
        D_{KL}(P||Q) = \sum_{x\in\mathcal{X}} P(x) \log\frac{P(x)}{Q{x}}

    Where :math:`P` and :math:`Q` are probability distributions where :math:`P` usually represents a distribution
    over data and :math:`Q` is often a prior or approximation of :math:`P`. It should be noted that the KL divergence
    is a non-symetrical metric i.e. :math:`D_{KL}(P||Q) \neq D_{KL}(Q||P)`.

    Args:
        p: data distribution with shape ``[N, d]``
        q: prior or approximate distribution with shape ``[N, d]``
        log_prob: bool indicating if input is log-probabilities or probabilities. If given as probabilities,
            will normalize to make sure the distributes sum to 1
        reduction:
            Determines how to reduce over the ``N``/batch dimension:

            - ``'mean'`` [default]: Averages score across samples
            - ``'sum'``: Sum score across samples
            - ``'none'`` or ``None``: Returns score per sample

    Example:
        >>> import torch
        >>> p = torch.tensor([[0.36, 0.48, 0.16]])
        >>> q = torch.tensor([[1/3, 1/3, 1/3]])
        >>> kl_divergence(p, q)
        tensor(0.0853)
    )r   r   )r   r   r	   r   r   r   r   r   r   kl_divergenceP   s   r   )r   )Fr   )typingr   r   r   typing_extensionsr   torchmetrics.utilities.checksr   torchmetrics.utilities.computer   boolintr   r   r   r   r   r   r   <module>   s(   " 