o
    {i                     @   sr  d dl Zd dlZd dlmZmZmZmZmZm	Z	m
Z
mZ d dlmZ d dlmZ d dlmZ d dlmZ ejdejdZejdejdZejd	ejdZeejjZeejjZ ed
d Z!ej"dddd Z#ej"dejejj$ejjddddejj$ejjddddgdejjejjejj%ejj&dddd Z'ej"ddefddZ(ej"dddd Z)ej"dddd Z*ej"dddddZ+ej"ddedfd d!Z,ej"ddefd"d#Z-ej"ddd$d% Z.ej"ddd&d' Z/ej"ddd(d) Z0ej"ddd*d+ Z1ej"dejejj$ejjddddejj$ejjddddgdejjejjejjejj2ejj2ejj%ejj&d,dd-d. Z3ej4ddd/d0 Z5ej"ddd1d2 Z6ej"ddd3d4 Z7ej"ddd5d6 Z8ej"ddd7d8 Z9ej"ddd9d: Z:ej"ddd;d< Z;ej"ddd=d> Z<ej"ddd?d@ Z=ej"dddAdB Z>ej"dddCdD Z?ej"dejejj$ejjddddejj$ejjddddgdejjejjejjejj%ejj&dEddFdG Z@ej"ddejjejj%ejj&dHddIdJ ZAej"dejejj$ejjddddejj$ejjddddgdejjejj%ejj&dHddKdL ZBej4dddMdN ZCej"ddejjejj%ejj&dHddOdP ZDej"dejejj$ejjddddejj$ejjddddgdejjejj%ejj&dHddQdR ZEej"dejejj$ejjddddejj$ejjddddgdejjejjejjejjejj%ejj&dSddTdU ZFej4dddVdW ZGej"dejejj$ejjddddejj$ejjddddgdejjejjejjejjejjejj%ejj&dXddYdZ ZHej"dejejj$ejjddddejj$ejjddddgdejjejjejjejjejjejjejjejj%ejj&d[	dd\d] ZIej"dejejj$ejjddddejj$ejjddddgdejjejjejjejjejjejjejj%ejj&d^dd_d` ZJej"dejejj$ejjddddejj$ejjddddgdejjejjejjejj%ejj&daddbdc ZKej"dejejj$ejjddddejj$ejjddddgdejjejjejjejjejjejjejjejj%ejj&dd	ddedf ZLej"dejejj$ejjddddejj$ejjddddgdejjejjejjejjejjejjejjejj%ejj&d[	ddgdh ZMej"dddidj ZNej"dddkdl ZOej4dddmdn ZPej"dddodp ZQej"dejejj$ejjddddejj$ejjddddgdejjejjejjejj%ejj&dqddrds ZRej"dejejj$ejjddddejj$ejjddddgdejjejjejjejj%ejj&dqddtdu ZSej4dddvdw ZTe" ddydzZUej"ddd{d| ZVej"dd}ed~fddZWej"ddedfddZXe" dd ZYe" dddZZe" dddZ[e" dd Z\ej"dejejj$ejj2ddddejj$ejj2ddddgdddejj]ejj2ejj%ejj&dddd Z^ej"dejejj$ejj2ddddejj$ejj2ddddgdddejj]ejj]ejj2ejj2ejj%ejj&dddd Z_ej"dejejj$ejjddddejj$ejj2ddddejj$ejjddddgdddejjejj%ejj&ejjdddd Z`ej"dejejj$ejjddddejj$ejj2ddddejj$ejjddddgdddejjejjejjejj%ejj&dEddd Zaej"dejejj$ejjddddejj$ejj2ddddejj$ejjddddgdejjejj%ejj&dHddd Zbej"dejejj$ejjddddejj$ejj2ddddejj$ejjddddgdejj2ejjejj%ejj&dddd Zcej"dejejj$ejjddddejj$ejj2ddddejj$ejjddddgdejj2ejjejj%ejj&dddd Zdej"dejejj$ejjddddejj$ejj2ddddejj$ejjddddgdejj2ejjejj%ejj&dddd Zei de#de#de'de)de)de)de*de*de*de*de+de(de(de,d!e,d#e-d'e/i dDe?dJeAdPeDdpeQd@e=de0d|eVdjeNdleOdseRdeWdeWdeZdeZdeZdeZde[i de[deXdeYdeYde\de\de\d%e.d+e1d4e7d2e6d6e8de9d:e:de<de;dBe>e^e_dZfe'ejgde'ejgde@eCdeBeCdeEeGde@ePdeSeTde3e5ddZheFeDdeHeZdeHeZdeIeWdeIeWdeJe[deJe[deKeYdeKeYdeLe\deLe\deMeXddZie^e^e_e_e^e_de`e`eaebdececedeeddZjdS )    N)allocate_graph_structuresinitialize_graph_structuresinitialize_supplyinitialize_costnetwork_simplex_core
total_costProblemStatussinkhorn_transport_plan)types)	intrinsic)cgutils)ir   dtype)r   r   c                 C   s   t t j}dd }||fS )z1Hardware popcount for uint8 using LLVM intrinsic.c           	      S   s<   |\}|j }t||g}t|j|d}|||g}|S )Nzllvm.ctpop.i8)typellvm_irFunctionTyper   get_or_insert_functionmodulecall)	contextbuildersigargsvalllvm_i8fnty
llvm_ctpopresult r    I/home/ubuntu/.local/lib/python3.10/site-packages/pynndescent/distances.pypopcnt_u8_impl$   s   z!popcnt_u8.<locals>.popcnt_u8_impl)r
   uint8)	typingctxr   r   r"   r    r    r!   	popcnt_u8   s   r%   T)fastmathc                 C   s:   d}t | jd D ]}|| | ||  d 7 }q	t|S )z_Standard euclidean distance.

    .. math::
        D(x, y) = \\sqrt{\sum_i (x_i - y_i)^2}
            r   r   rangeshapenpsqrtxyr   ir    r    r!   	euclidean2   s   
r1   zf4(f4[::1],f4[::1])   C)readonly)r   diffdimr0   )r&   localsc                 C   s<   d}| j d }t|D ]}| | ||  }||| 7 }q|S )zVSquared euclidean distance.

    .. math::
        D(x, y) = \sum_i (x_i - y_i)^2
    r'   r   r*   r)   )r.   r/   r   r6   r0   r5   r    r    r!   squared_euclidean?   s   
r9   c                 C   sB   d}t | jd D ]}|| | ||  d ||  7 }q	t|S )zEuclidean distance standardised against a vector of standard
    deviations per coordinate.

    .. math::
        D(x, y) = \sqrt{\sum_i \frac{(x_i - y_i)**2}{v_i}}
    r'   r   r   r(   )r.   r/   sigmar   r0   r    r    r!   standardised_euclidean^   s   "
r;   c                 C   s6   d}t | jd D ]}|t| | ||  7 }q	|S )z\Manhattan, taxicab, or l1 distance.

    .. math::
        D(x, y) = \sum_i |x_i - y_i|
    r'   r   r)   r*   r+   absr-   r    r    r!   	manhattanm   s   r>   c                 C   s8   d}t | jd D ]}t|t| | ||  }q	|S )zZChebyshev or l-infinity distance.

    .. math::
        D(x, y) = \max_i |x_i - y_i|
    r'   r   )r)   r*   maxr+   r=   r-   r    r    r!   	chebyshev{   s   r@   c                 C   sB   d}t | jd D ]}|t| | ||  | 7 }q	|d|  S )ah  Minkowski distance.

    .. math::
        D(x, y) = \left(\sum_i |x_i - y_i|^p\right)^{\frac{1}{p}}

    This is a general distance. For p=1 it is equivalent to
    manhattan distance, for p=2 it is Euclidean distance, and
    for p=infinity it is Chebyshev distance. In general it is better
    to use the more specialised functions for those distances.
    r'   r         ?r<   )r.   r/   pr   r0   r    r    r!   	minkowski   s    rC   c                 C   sJ   d}t | jd D ]}||| t| | ||  |  7 }q	|d|  S )aW  A weighted version of Minkowski distance.

    .. math::
        D(x, y) = \left(\sum_i w_i |x_i - y_i|^p\right)^{\frac{1}{p}}

    If weights w_i are inverse standard deviations of graph_data in each dimension
    then this represented a standardised Minkowski distance (and is
    equivalent to standardised Euclidean distance for p=1).
    r'   r   rA   r<   )r.   r/   wrB   r   r0   r    r    r!   weighted_minkowski   s   (rE   c                 C   s   d}t j| jd t jd}t| jd D ]}| | ||  ||< qt| jd D ]"}d}t| jd D ]}||||f ||  7 }q3||||  7 }q(t |S )zMahalanobis distance.

    .. math::
        D(x, y) = \sqrt{(x - y)^T V^{-1} (x - y)}

    where V is the covariance matrix. This is equivalent to Euclidean distance
    after transforming the space by the inverse square root of the covariance.
    r'   r   r   )r+   emptyr*   float32r)   r,   )r.   r/   vinvr   r5   r0   tmpjr    r    r!   mahalanobis   s   

rK   c                 C   sB   d}t | jd D ]}| | || kr|d7 }q	t|| jd  S )zHamming distance.

    The proportion of elements that differ between two vectors.

    .. math::
        D(x, y) = \frac{1}{n} \sum_i \mathbf{1}_{x_i \neq y_i}
    r'   r   rA   r)   r*   floatr-   r    r    r!   hamming   s   	rN   c                 C   s^   d}t | jd D ]#}t| | t||  }|dkr,|t| | ||  | 7 }q	|S )zCanberra distance.

    A weighted version of Manhattan distance where each term is divided
    by the sum of absolute values.

    .. math::
        D(x, y) = \sum_i \frac{|x_i - y_i|}{|x_i| + |y_i|}
    r'   r   r<   )r.   r/   r   r0   denominatorr    r    r!   canberra   s   
rP   c                 C   sh   d}d}t | jd D ]}|t| | ||  7 }|t| | ||  7 }q|dkr2t|| S dS )zBray-Curtis distance.

    A distance measure commonly used in ecology to quantify the compositional
    dissimilarity between two samples.

    .. math::
        D(x, y) = \frac{\sum_i |x_i - y_i|}{\sum_i |x_i + y_i|}
    r'   r   )r)   r*   r+   r=   rM   )r.   r/   	numeratorrO   r0   r    r    r!   bray_curtis   s   
rR   c                 C   sh   d}d}t | jd D ]}| | dk}|| dk}||p|7 }||o#|7 }q|dkr,dS t|| | S )a>  Jaccard distance.

    One minus the Jaccard similarity coefficient. For binary vectors this is
    the size of the symmetric difference divided by the size of the union.

    .. math::
        D(x, y) = 1 - \frac{|x \cap y|}{|x \cup y|}

    For continuous vectors, non-zero values are treated as set membership.
    r'   r   rL   )r.   r/   num_non_zero	num_equalr0   x_truey_truer    r    r!   jaccard     rW   )r   rS   rT   rU   rV   r6   r0   c                 C   sl   d}d}| j d }t|D ]}| | dk}|| dk}||p|7 }||o%|7 }q|dkr.dS t||  S )a  Alternative Jaccard distance using log transform.

    A transformed version of Jaccard distance suitable for the bounded-radius
    search algorithm. Uses negative log of the Jaccard similarity coefficient.

    .. math::
        D_{alt}(x, y) = -\log_2\left(\frac{|x \cap y|}{|x \cup y|}\right)

    Use `correct_alternative_jaccard` to convert back to standard Jaccard distance.
    r'   r   )r*   r)   r+   log2)r.   r/   rS   rT   r6   r0   rU   rV   r    r    r!   alternative_jaccard  s   
rZ   c                 C      dt d|   S )zConvert alternative Jaccard distance back to standard Jaccard distance.

    .. math::
        D(x, y) = 1 - 2^{-D_{alt}(x, y)}
    rA          @pow)vr    r    r!   correct_alternative_jaccardI     r`   c                 C   sN   d}t | jd D ]}| | dk}|| dk}|||k7 }q	t|| jd  S )a5  Matching distance (simple matching dissimilarity).

    The proportion of elements that differ in their boolean state.
    For binary vectors, counts positions where one is non-zero and
    the other is zero.

    .. math::
        D(x, y) = \frac{1}{n} \sum_i \mathbf{1}_{(x_i \neq 0) \neq (y_i \neq 0)}
    r'   r   rL   r.   r/   num_not_equalr0   rU   rV   r    r    r!   matchingS  s   rd   c                 C   h   d}d}t | jd D ]}| | dk}|| dk}||o|7 }|||k7 }q|dkr,dS |d| |  S )uE  Dice distance (Sørensen-Dice dissimilarity).

    One minus twice the intersection divided by the sum of cardinalities.
    Commonly used for comparing the similarity of two samples.

    .. math::
        D(x, y) = \frac{|x \oplus y|}{2|x \cap y| + |x \oplus y|}

    where :math:`\oplus` denotes symmetric difference.
    r'   r   r\   r)   r*   r.   r/   num_true_truerc   r0   rU   rV   r    r    r!   diceg  rX   ri   c                 C   s|   d}d}t | jd D ]}| | dk}|| dk}||o|7 }|||k7 }q|dkr,dS t|| | jd  || jd   S )a4  Kulsinski distance.

    A variant of Jaccard distance that includes a count of all dimensions.
    For binary vectors, gives more weight to dimensions where both are false.

    .. math::
        D(x, y) = \frac{|x \oplus y| - |x \cap y| + n}{|x \oplus y| + n}

    where n is the number of dimensions.
    r'   r   rL   rg   r    r    r!   	kulsinski  s   rj   c                 C   R   d}t | jd D ]}| | dk}|| dk}|||k7 }q	d| | jd |  S )zRogers-Tanimoto distance.

    A distance measure for binary vectors that gives double weight to
    disagreements.

    .. math::
        D(x, y) = \frac{2|x \oplus y|}{n + |x \oplus y|}

    where n is the number of dimensions.
    r'   r   r\   rf   rb   r    r    r!   rogers_tanimoto     rl   c                 C   s   d}t | jd D ]}| | dk}|| dk}||o|7 }q	|t| dkkr2|t|dkkr2dS t| jd | | jd  S )zRussell-Rao distance.

    The proportion of dimensions where at least one vector has a false value.

    .. math::
        D(x, y) = \frac{n - |x \cap y|}{n}

    where n is the number of dimensions.
    r'   r   )r)   r*   r+   sumrM   )r.   r/   rh   r0   rU   rV   r    r    r!   
russellrao  s   $ro   c                 C   rk   )a  Sokal-Michener distance.

    Equivalent to Rogers-Tanimoto distance. A distance measure for binary
    vectors that gives double weight to disagreements.

    .. math::
        D(x, y) = \frac{2|x \oplus y|}{n + |x \oplus y|}

    where n is the number of dimensions.
    r'   r   r\   rf   rb   r    r    r!   sokal_michener  rm   rp   c                 C   re   )zSokal-Sneath distance.

    A binary distance that gives double weight to agreements (both true).

    .. math::
        D(x, y) = \frac{|x \oplus y|}{0.5|x \cap y| + |x \oplus y|}

    where :math:`\oplus` denotes symmetric difference.
    r'   r         ?rf   rg   r    r    r!   sokal_sneath  s   rr   c                 C   s   | j d dkrtdtd| d |d   }td| d |d   }t|d t| d t|d  |d   }dt| S )a  Haversine (great circle) distance.

    The angular distance between two points on a sphere, given their
    latitudes and longitudes in radians. Only valid for 2D data where
    x[0], y[0] are latitudes and x[1], y[1] are longitudes.

    .. math::
        D(x, y) = 2 \arcsin\left(\sqrt{\sin^2\left(\frac{\phi_1 - \phi_2}{2}\right) + \cos(\phi_1)\cos(\phi_2)\sin^2\left(\frac{\lambda_1 - \lambda_2}{2}\right)}\right)

    where :math:`\phi` is latitude and :math:`\lambda` is longitude.
    r   r   z6haversine is only defined for 2 dimensional graph_datarq   r2   r\   )r*   
ValueErrorr+   sinr,   cosarcsin)r.   r/   sin_latsin_longr   r    r    r!   	haversine  s   2ry   c           	      C   s   d}d}d}t | jd D ]"}| | dk}|| dk}||o|7 }||o&| 7 }|| o-|7 }q| jd | | | }|dksC|dkrEdS d| | || ||   S )a"  Yule distance.

    A binary distance based on the Yule Q coefficient of association.

    .. math::
        D(x, y) = \frac{2 \cdot n_{TF} \cdot n_{FT}}{n_{TT} \cdot n_{FF} + n_{TF} \cdot n_{FT}}

    where :math:`n_{TF}` is the count of positions where x is true and y is false, etc.
    r'   r   r\   rf   )	r.   r/   rh   num_true_falsenum_false_truer0   rU   rV   num_false_falser    r    r!   yule  s   
r}   c                 C   s   d}d}d}t | jd D ]}|| | ||  7 }|| | d 7 }||| d 7 }q|dkr4|dkr4dS |dks<|dkr>dS d|t||   S )a0  Cosine distance.

    One minus the cosine of the angle between two vectors. Measures the
    angular difference between vectors, independent of their magnitudes.

    .. math::
        D(x, y) = 1 - \frac{\langle x, y \rangle}{\|x\| \|y\|}

    Returns 0 if both vectors are zero, 1 if one is zero.
    r'   r   r   rA   r(   )r.   r/   r   norm_xnorm_yr0   r    r    r!   cosine+  s   r   )r   r~   r   r6   r0   c                 C   s   d}d}d}| j d }t|D ] }|| | ||  7 }|| | | |  7 }||| ||  7 }q|dkr:|dkr:dS |dksB|dkrDtS |dkrJtS t|| | }t|S )a  Alternative cosine distance using log transform.

    A transformed version of cosine distance suitable for the bounded-radius
    search algorithm. Uses negative log of the cosine similarity.

    .. math::
        D_{alt}(x, y) = \log_2\left(\frac{\|x\| \|y\|}{\langle x, y \rangle}\right)

    Returns FLOAT32_MAX for non-positive cosine similarities (treating them
    as infinitely far). Use `correct_alternative_cosine` to convert back
    to standard cosine distance.
    r'   r   r*   r)   FLOAT32_MAXr+   r,   rY   r.   r/   r   r~   r   r6   r0   r    r    r!   alternative_cosineG  s    

r   )r   r6   r0   c                 C   sD   d}| j d }t|D ]}|| | ||  7 }q|dkrdS d| S )aU  Dot product distance for normalized vectors.

    One minus the dot product. This is equivalent to cosine distance when
    vectors are normalized to unit length. For unnormalized vectors, use
    `inner_product` distance instead.

    .. math::
        D(x, y) = 1 - \langle x, y \rangle

    Returns 1.0 for non-positive dot products.
    r'   r   rA   r8   r.   r/   r   r6   r0   r    r    r!   doty  s   
r   c                 C   sH   d}| j d }t|D ]}|| | ||  7 }q|dkrtS t| S )a  Alternative dot product distance using log transform.

    A transformed version of dot product distance suitable for the bounded-radius
    search algorithm. Uses negative log of the dot product.

    .. math::
        D_{alt}(x, y) = -\log_2(\langle x, y \rangle)

    Returns FLOAT32_MAX for non-positive dot products (treating them as
    infinitely far). Use `correct_alternative_cosine` to convert back
    to standard dot distance.
    r'   r   )r*   r)   r   r+   rY   r   r    r    r!   alternative_dot  s   
r   c                 C   r[   )z{Convert alternative cosine/dot distance back to standard form.

    .. math::
        D(x, y) = 1 - 2^{-D_{alt}(x, y)}
    rA   r\   r]   dr    r    r!   correct_alternative_cosine  ra   r   c                 C   s6   d}| j d }t|D ]}|| | ||  7 }q| S )a  Inner product distance (negative inner product).

    This is useful for retrieval tasks where the inner product represents
    similarity (higher = more similar). The distance is simply the negation
    of the inner product, so that higher similarity becomes lower distance.

    Note: Unlike dot product distance, this does NOT assume normalized vectors.
    For normalized vectors, use the `dot` distance instead which is bounded [0, 1].

    .. math::
        D(x, y) = -\sum_i x_i y_i
    r'   r   r8   r   r    r    r!   inner_product  s
   
r   c                 C   sD   d}| j d }t|D ]}|| | ||  7 }q|dkrtS d| S )u  Alternative inner product distance using reciprocal transform.

    This transforms the inner product into a positive distance suitable for
    the bounded-radius search algorithm. The transform is:

    .. math::
        D_{alt}(x, y) = \frac{1}{\langle x, y \rangle}

    This maps positive inner products to positive distances:
    - High inner product → small positive distance
    - Low positive inner product → large positive distance
    - Non-positive inner product → FLOAT32_MAX (treated as infinitely far)

    In high-dimensional nearest neighbor search, we expect true neighbors
    to have positive inner products. Pairs with non-positive inner products
    are treated as maximally distant, similar to how alternative_cosine
    handles negative cosine similarities.

    The correction function `correct_alternative_inner_product` converts
    back to the negative inner product.
    r'   r   rA   r*   r)   r   r   r    r    r!   alternative_inner_product  s   %
r   )r   	ip_resultr~   r   r6   r0   c                 C   s   d}d}d}| j d }t|D ] }|| | ||  7 }|| | | |  7 }||| ||  7 }q|dks8|dkr:tS t|t||   }|dkrT|dt|  S tS )a  A proxy for inner product distance (negative inner product).

    Inner product distance has undesirable properties for nearest neighbor
    graph based search, and NNDescent in general. This is a proxy function
    that behaves similarly to inner product distance for ranking neighbors,
    but avoids some of the pitfalls.

    This is to be used internally, and results should use reranking with true
    inner product distance.
    r'   r   rA   r*   r)   r   r+   rY   r,   )r.   r/   r   r~   r   r6   r0   cosine_resultr    r    r!   proxy_inner_product  s   
r   c                 C   s   | t krdS d|  S )a8  Convert alternative inner product distance back to negative inner product.

    .. math::
        D(x, y) = -\langle x, y \rangle = -\frac{1}{D_{alt}(x, y)}

    For d = FLOAT32_MAX (non-positive inner products), returns 0.0 as the
    negative inner product (representing orthogonal or dissimilar vectors).
    r'   g      )r   r   r    r    r!   !correct_alternative_inner_productI  s   
r   )r   	l1_norm_x	l1_norm_ycdf_xcdf_yr6   r0   c           	      C   s   d}d}| j d }t|D ]}|| | 7 }||| 7 }q|dks$|dkr&tS d}d}d}t|D ]}|| | | 7 }||| | 7 }|t|| 7 }q0|S )aA  A proxy for 1D Wasserstein distance.

    Uses L1 distance on the cumulative distribution functions, which is
    exactly equal to Wasserstein-1 distance for 1D distributions. This
    avoids the more expensive Minkowski computation with allocation.

    For Wasserstein-p with p > 1, this is a lower bound and correlates
    well for nearest neighbor search.

    .. math::
        D_{proxy}(x, y) = \sum_i |F_x(i) - F_y(i)|

    where :math:`F_x, F_y` are the cumulative distribution functions.

    Results should be reranked with true wasserstein_1d distance if p > 1.
    r'   r   r*   r)   r   r+   r=   )	r.   r/   r   r   r6   r0   r   r   r   r    r    r!   proxy_wasserstein_1dX  s    $
r   )	r   r   r   	tv_resulthellinger_resultpxpyr6   r0   c           
      C      d}d}| j d }t|D ]}|| | 7 }||| 7 }q|dks$|dkr&tS d}d}t|D ] }| | | }|| | }	|t||	 7 }|t||	 7 }q.d| d|  S )a  A proxy for Kantorovich (Earth Mover's) distance.

    The full Kantorovich distance requires solving an optimal transport
    problem via network simplex, which is expensive. This proxy uses a
    combination of:
    1. Total variation distance (L1 on normalized distributions)
    2. Hellinger-like term for better correlation

    This is much cheaper to compute and correlates reasonably well with
    true optimal transport distance for nearest neighbor search.

    Results should be reranked with true kantorovich distance.
    r'   r   rq   rA   r*   r)   r   r+   r=   r,   
r.   r/   r   r   r6   r0   r   r   r   r   r    r    r!   proxy_kantorovich  s    #
r   )r   r   r   r   r   mur6   r0   c           
      C   s   d}d}| j d }t|D ]}|| | 7 }||| 7 }q|dks$|dkr&tS d}d}d}t|D ]}|| | | 7 }||| | 7 }||| 7 }q0|| }d}d}d}	t|D ]}|| | | 7 }||| | 7 }|	t|| | 7 }	qW|	S )ae  A proxy for circular Kantorovich distance.

    Uses mean-shifted CDF L1 distance instead of the more expensive
    median-shifted Minkowski distance. The mean is a reasonable
    approximation to the median for most distributions and avoids
    the expensive median computation.

    Results should be reranked with true circular_kantorovich distance.
    r'   r   r   )
r.   r/   r   r   r6   r0   r   r   r   r   r    r    r!   proxy_circular_kantorovich  s0   
r   )bcr   r   r6   r0   c                 C   s   d}d}| j d }t|D ]}|| | 7 }||| 7 }q|dks$|dkr&tS d}t|D ]}|t| | | || |  7 }q,d||  S )a  A proxy for Jensen-Shannon divergence.

    Jensen-Shannon requires computing logs and the mixture distribution,
    which is expensive. This proxy uses squared Hellinger distance, which
    is also a proper divergence on probability distributions and much
    cheaper to compute (no logs required).

    .. math::
        D_{proxy}(x, y) = 1 - \left(\sum_i \sqrt{p_i q_i}\right)^2

    where p, q are the normalized distributions.

    Results should be reranked with true jensen_shannon_divergence.
    r'   r   rA   )r*   r)   r   r+   r,   )r.   r/   r   r   r6   r0   r   r    r    r!   proxy_jensen_shannon  s    
$r   )	r   r   r   r   r   denomr5   r6   r0   c                 C   s   d}d}| j d }t|D ]}|| | 7 }||| 7 }q|dks$|dkr&tS d}t|D ]"}| | | }|| | }|| }	|	dkrN|| }
||
|
 |	 7 }q,|S )a  A proxy for symmetric KL divergence.

    Symmetric KL requires computing logs which is expensive. This proxy
    uses triangular discrimination (symmetric chi-squared divergence),
    which is a second-order approximation to KL divergence and much cheaper.

    .. math::
        D_{proxy}(x, y) = \sum_i \frac{(p_i - q_i)^2}{p_i + q_i}

    Results should be reranked with true symmetric_kl_divergence.
    r'   r   r   )r.   r/   r   r   r6   r0   r   r   r   r   r5   r    r    r!   proxy_symmetric_klH  s$   !
r   c           
      C   r   )a@  A proxy for Sinkhorn (entropy-regularized optimal transport) distance.

    Sinkhorn distance requires iterative matrix scaling which is expensive.
    This proxy uses the same combination as proxy_kantorovich since Sinkhorn
    approximates Kantorovich.

    Results should be reranked with true sinkhorn distance.
    r'   r   rq   rA   r   r   r    r    r!   proxy_sinkhorn  s    
r   c                 C   s   d}d}d}d}| j d }t|D ].}| | ||  }||| 7 }|| | ||  7 }|| | | |  7 }||| ||  7 }qt|}t|}t|| }	|||  }t|td }
t||	 d |
 }|| t|
 d }|| S )ak  Triangle Area Similarity - Sector Area Similarity (TS-SS) distance.

    A distance metric that combines both magnitude and angular information.
    It multiplies a triangle area (capturing angular difference) by a sector
    area (capturing both angular and magnitude differences).

    Useful when both the direction and magnitude of vectors are important.
    r'   r   
   r   r\   )r*   r)   r+   r,   r=   arccosradiansrt   )r.   r/   d_euc_squaredd_cosr~   r   r6   r0   r5   magnitude_differencethetasectortriangler    r    r!   tsss  s&   



r   c                 C   s   d}d}d}| j d }t|D ] }|| | ||  7 }|| | | |  7 }||| ||  7 }q|dkr:|dkr:dS |dksB|dkrDtS |dkrJtS |t||  }dt|tj  S )ud  True angular distance.

    The actual angle between two vectors, normalized to [0, 1].
    Unlike cosine distance which uses 1 - cos(θ), this returns 1 - θ/π.

    .. math::
        D(x, y) = 1 - \frac{\arccos\left(\frac{\langle x, y \rangle}{\|x\| \|y\|}\right)}{\pi}

    Returns 0 for identical directions, approaches 1 for opposite directions.
    r'   r   rA   )r*   r)   r   r+   r,   r   pir   r    r    r!   true_angular  s    
r   c                 C   s   dt td|  t j  S )zConvert alternative cosine distance to true angular distance.

    .. math::
        D_{angular}(x, y) = 1 - \frac{\arccos(2^{-D_{alt}})}{\pi}
    rA   r\   )r+   r   r^   r   r   r    r    r!   true_angular_from_alt_cosine  s   r   c           
      C   s   d}d}d}d}d}t | jd D ]}|| | 7 }||| 7 }q|| jd  }|| jd  }t | jd D ] }| | | }|| | }	||d 7 }||	d 7 }|||	 7 }q5|dkr`|dkr`dS |dkrfdS d|t||   S )a[  Correlation distance.

    One minus the Pearson correlation coefficient. Measures how linearly
    related two vectors are after centering (subtracting their means).

    .. math::
        D(x, y) = 1 - \frac{\langle x - \bar{x}, y - \bar{y} \rangle}{\|x - \bar{x}\| \|y - \bar{y}\|}

    Equivalent to cosine distance on mean-centered data.
    r'   r   r   rA   r(   )
r.   r/   mu_xmu_yr~   r   dot_productr0   	shifted_x	shifted_yr    r    r!   correlation  s*   r   )r   r   r   r6   r0   c                 C   s   d}d}d}| j d }t|D ]}|t| | ||  7 }|| | 7 }||| 7 }q|dkr5|dkr5dS |dks=|dkr?dS td|t||   S )aJ  Hellinger distance.

    A distance for probability distributions, based on the Bhattacharyya
    coefficient. Input vectors are treated as (unnormalized) probability
    distributions.

    .. math::
        D(x, y) = \sqrt{1 - \frac{\sum_i \sqrt{x_i y_i}}{\sqrt{\sum_i x_i \cdot \sum_i y_i}}}

    Returns values in [0, 1].
    r'   r   rA   r2   )r*   r)   r+   r,   r.   r/   r   r   r   r6   r0   r    r    r!   	hellinger+  s   
r   c                 C   s   d}d}d}| j d }t|D ]}|t| | ||  7 }|| | 7 }||| 7 }q|dkr5|dkr5dS |dks=|dkr?tS |dkrEtS t|| | }t|S )as  Alternative Hellinger distance using log transform.

    A transformed version of Hellinger distance suitable for the bounded-radius
    search algorithm.

    .. math::
        D_{alt}(x, y) = \log_2\left(\frac{\sqrt{\sum_i x_i \cdot \sum_i y_i}}{\sum_i \sqrt{x_i y_i}}\right)

    Use `correct_alternative_hellinger` to convert back to standard Hellinger distance.
    r'   r   )r*   r)   r+   r,   r   rY   r   r    r    r!   alternative_hellingerZ  s    

r   c                 C   s   t dtd|   S )zConvert alternative Hellinger distance back to standard Hellinger distance.

    .. math::
        D(x, y) = \sqrt{1 - 2^{-D_{alt}(x, y)}}
    rA   r\   )r+   r,   r^   r   r    r    r!   correct_alternative_hellinger  s   r   averagec           	      C   sD  t t | }|dkr|jdd}n|jdd}t j|jt jd}t |j||< |dkr6|d t j	S || }t 
|jt j}|dd  |d d k|dd < | | }|dkrb|t j	S t |d	 }t |t t|g|jf}|d
kr|| t j	S |dkr||d  d t j	S d|| ||d   d  S )Nordinal	mergesort)kind	quicksortr   r2   denser   r?   minrq   )r+   ravelasarrayargsortrF   sizeintparangeastypefloat64onesbool_cumsumnonzeroconcatenatearraylenr   )	amethodarrsorterinvobsr   r   countr    r    r!   rankdata  s*    r   c                 C   s   t | }t |}t||S )aA  Spearman rank correlation distance.

    One minus the Spearman rank correlation coefficient. Measures the monotonic
    relationship between two vectors by computing correlation on their ranks.

    .. math::
        D(x, y) = 1 - \rho(\text{rank}(x), \text{rank}(y))

    where :math:`\rho` is Pearson correlation.
    )r   r   )r.   r/   x_ranky_rankr    r    r!   	spearmanr  s   
r   )nogili c                 C   s
  | dk}|dk}| |  tj}||  tj}| }| }	|| }||	 }||ddf dd|f }
t|jd |jd d\}}}t|| ||j t|
||j	 t
|||}|dkrctdt||||}|tjkrstd|tjkr|tdt|j|j	}|S )a  Kantorovich distance (Earth Mover's Distance / Wasserstein distance).

    The optimal transport distance between two probability distributions.
    Computes the minimum cost to transform one distribution into another,
    given a cost matrix.

    Parameters
    ----------
    x, y : array-like
        Input vectors treated as probability distributions (will be normalized).
    cost : array-like
        Cost matrix where cost[i,j] is the cost of moving mass from bin i to bin j.
    max_iter : int
        Maximum number of iterations for the network simplex algorithm.

    Returns
    -------
    float
        The optimal transport distance.
    r   NFzDKantorovich distance inputs must be valid probability distributions.z>Optimal transport problem was INFEASIBLE. Please check inputs.z=Optimal transport problem was UNBOUNDED. Please check inputs.)r   r+   r   rn   r   r*   r   supplyr   costr   rs   r   r   
INFEASIBLE	UNBOUNDEDr   flow)r.   r/   r   max_iterrow_maskcol_maskr   ba_sumb_sumsub_costnode_arc_dataspanning_treegraphinit_statussolve_statusr   r    r    r!   kantorovich  s<   


r   rA   c                 C   s   | dk}|dk}| |  tj}||  tj}| }| }	|| }||	 }||ddf dd|f }
t| ||
|d}|jd }|jd }d}t|D ]}t|D ]}||||f |||f  7 }qTqN|S )a  Sinkhorn distance (entropy-regularized optimal transport).

    An approximation to the Kantorovich distance using entropy regularization.
    Faster to compute than exact optimal transport for large distributions.

    Parameters
    ----------
    x, y : array-like
        Input vectors treated as probability distributions (will be normalized).
    cost : array-like
        Cost matrix where cost[i,j] is the cost of moving mass from bin i to bin j.
    regularization : float
        Entropy regularization parameter. Smaller values give results closer
        to exact Kantorovich distance but may be less stable.

    Returns
    -------
    float
        The entropy-regularized optimal transport distance.
    r   N)r   regularizationr2   r'   )r   r+   r   rn   r	   r*   r)   )r.   r/   r   r   r   r   r   r   r   r   r   transport_plandim_idim_jr   r0   rJ   r    r    r!   sinkhorn  s(   

r   c           
   
   C   s   d}d}d}| j d }t|D ]}|| | 7 }||| 7 }q|t| 7 }|t| 7 }| t | }|t | }d||  }	t|D ]$}|d|| t|| |	|   || t|| |	|     7 }q@|S )a  Jensen-Shannon divergence.

    A symmetrized and smoothed version of KL divergence. Measures the
    similarity between two probability distributions.

    .. math::
        D(x, y) = \frac{1}{2} \left( D_{KL}(x \| m) + D_{KL}(y \| m) \right)

    where :math:`m = \frac{1}{2}(x + y)` and :math:`D_{KL}` is KL divergence.
    Input vectors are normalized to probability distributions.
    r'   r   rq   r*   r)   FLOAT32_EPSr+   log)
r.   r/   r   r   r   r6   r0   pdf_xpdf_ymr    r    r!   jensen_shannon_divergenceA  s"   
:r   c                 C   s   d}d}t | jd D ]}|| | 7 }||| 7 }q| | }|| }t d|jd D ]}||  ||d  7  < ||  ||d  7  < q*t|||S )a&  1-dimensional Wasserstein distance.

    The p-Wasserstein distance for 1D distributions, computed efficiently
    via the CDF. Input vectors are treated as histograms over ordered bins.

    .. math::
        W_p(x, y) = \left( \sum_i |F_x(i) - F_y(i)|^p \right)^{1/p}

    where :math:`F_x, F_y` are the cumulative distribution functions.

    Parameters
    ----------
    x, y : array-like
        Input vectors treated as probability distributions (will be normalized).
    p : int
        The order of the Wasserstein distance (default 1).
    r'   r   r2   )r)   r*   rC   )r.   r/   rB   x_sumy_sumr0   x_cdfy_cdfr    r    r!   wasserstein_1df  s   r  c                 C   sv  d}d}t | jd D ]}|| | 7 }||| 7 }q| | }|| }t d|jd D ]}||  ||d  7  < ||  ||d  7  < q*t|| | }d}	|dkrut |jd D ]}|	t|| ||  | | 7 }	q[|	d|  S |dkrt |jd D ]}|| ||  | }
|	|
|
 7 }	qt|	S |dkrt |jd D ]}|	t|| ||  | 7 }	q|	S td)a  Circular Kantorovich distance.

    The Wasserstein distance for distributions on a circle (periodic domain).
    Useful for cyclic data like angles, time of day, or periodic histograms.

    Parameters
    ----------
    x, y : array-like
        Input vectors treated as probability distributions (will be normalized).
    p : int
        The order of the Wasserstein distance (default 1).
    r'   r   r2   r   rA   z)Invalid p supplied to Kantorvich distance)r)   r*   r+   medianr=   r,   rs   )r.   r/   rB   r   r  r0   r  r  r   r   r   r    r    r!   circular_kantorovich  s4   $
 r  c           	   	   C   s   d}d}d}| j d }t|D ]}|| | 7 }||| 7 }q|t| 7 }|t| 7 }| t | }|t | }t|D ]"}||| t|| ||   || t|| ||    7 }q:|S )a3  Symmetric Kullback-Leibler divergence.

    The sum of KL divergences in both directions, making it symmetric.

    .. math::
        D(x, y) = D_{KL}(x \| y) + D_{KL}(y \| x)

    where :math:`D_{KL}(p \| q) = \sum_i p_i \log(p_i / q_i)`.
    Input vectors are normalized to probability distributions.
    r'   r   r   )	r.   r/   r   r   r   r6   r0   r   r   r    r    r!   symmetric_kl_divergence  s    
(r  zf4(u1[::1],u1[::1])F)r   intersectionr6   r0   )r&   r   boundscheckr7   c                 C   sB   d}| j d }t|D ]}| | || A }|t|7 }qt|S )aq  Hamming distance for bit-packed binary vectors.

    Counts the number of differing bits between two uint8 arrays, where each
    byte contains 8 packed binary features. More efficient than standard
    Hamming for binary data.

    .. math::
        D(x, y) = \sum_i \text{popcount}(x_i \oplus y_i)

    Returns the total count of differing bits (not normalized).
    r   )r*   r)   r%   r+   rG   )r.   r/   r   r6   r0   r  r    r    r!   bit_hamming  s   

r
  )r   r   and_or_r6   r0   c                 C   s   d}d}| j d }t|D ]}| | || @ }| | || B }|t|7 }|t|7 }q|dkr2dS tt|t|  S )a  Jaccard distance for bit-packed binary vectors.

    Computes Jaccard distance for uint8 arrays where each byte contains
    8 packed binary features. Uses negative log transform for compatibility
    with the bounded-radius search algorithm.

    .. math::
        D(x, y) = -\log\left(\frac{\text{popcount}(x \land y)}{\text{popcount}(x \lor y)}\right)

    More efficient than standard Jaccard for binary data.
    r   r'   )r*   r)   r%   r+   r   rG   )r.   r/   r   r   r6   r0   r  r  r    r    r!   bit_jaccard
  s    
r  zf4(f4[::1],u1[::1],f4[::1]))r   r6   r0   y_ic                 C   sD   d}| j d }t|D ]}|||  }| | | }||| 7 }q|S )zSquared Euclidean distance between a float vector ``x`` and
    a quantized uint8 vector ``y``. The uint8 values in ``y`` are mapped
    back to floats using the provided ``quantized_values`` array.
    r'   r   r8   )r.   r/   quantized_valuesr   r6   r0   r  r5   r    r    r!   quantized_uint8_sq_euclidean:  s   
r  c           	      C   s   d}d}d}| j d }t|D ] }|||  }|| | | 7 }|| | | |  7 }||| 7 }q|dkr:|dkr:dS |dksB|dkrDtS |dkrJtS |t||  }t|d d  S )zAlternative cosine distance between a float vector ``x`` and
    a quantized uint8 vector ``y``. The uint8 values in ``y`` are mapped
    back to floats using the provided ``quantized_values`` array.
    r'   r   rA   r\   r   )	r.   r/   r  r   r~   r   r6   r0   qyr    r    r!   "quantized_uint8_alternative_cosine]  s"   
r  c                 C   sj   d}| j d }d}t|D ]}|||  }|| | | 7 }||| 7 }q|dkr*tS t|t|  S )zAlternative dot product distance between a float vector ``x`` and
    a quantized uint8 vector ``y``. The uint8 values in ``y`` are mapped
    back to floats using the provided ``quantized_values`` array. x and y
    are assumed to be normalized.
    r'   r   r   )r.   r/   r  r   r6   r   r0   r  r    r    r!   quantized_uint8_alternative_dot  s   
r  )quantized_indexr   r6   r0   c           	      C   sj   d}| j d }t|D ]'}||d  }|d dkr|d@ }n|d? d@ }| | ||  }||| 7 }q|S )zSquared Euclidean distance between a float vector ``x`` and
    a quantized uint8 vector ``y``. The uint8 values in ``y`` are mapped
    back to floats using upper and lower nibbles and via the provided
    ``quantized_values`` array.
    r'   r   r         r8   )	r.   r/   r  r   r6   r0   byter  r5   r    r    r!   quantized_uint4_sq_euclidean  s   

r  c                 C   s   d}d}d}| j d }t|D ]5}||d  }|d dkr"|d@ }	n|d? d@ }	||	 }
|| | |
 7 }|| | | |  7 }||
|
 7 }q|dkrO|dkrOdS |dksW|dkrYtS |dkr_tS |t||  }t|d d  S )zAlternative cosine distance between a float vector ``x`` and
    a quantized uint8 vector ``y``. The uint8 values in ``y`` are mapped
    back to floats using upper and lower nibbles and via the provided
    ``quantized_values`` array.
    r'   r   r   r  r  rA   r\   r   )r.   r/   r  r   r~   r   r6   r0   r  r  r  r    r    r!   "quantized_uint4_alternative_cosine  s*   

r  c           
      C   s   d}| j d }d}t|D ]+}||d  }|d dkr |d@ }n|d? d@ }|| }	|| | |	 7 }||	|	 7 }q|dkr?tS t|t|  S )a  Alternative dot product distance between a float vector ``x`` and
    a quantized uint8 vector ``y``. The uint8 values in ``y`` are mapped
    back to floats using upper and lower nibbles and via the provided
    ``quantized_values`` array. x and y are assumed to be normalized.
    r'   r   r   r  r  r   )
r.   r/   r  r   r6   r   r0   r  r  r  r    r    r!   quantized_uint4_alternative_dot  s   

r  l2sqeuclideantaxicabl1	linfinitylinftylinf
seuclidean
wminkowski
braycurtiswassersteinzwasserstein-1dzkantorovich-1dkantorovich_1dcircular_wassersteinzjensen-shannonjensen_shannonzsymmetric-klsymmetric_klsymmetric_kullback_lieblerrogerstanimotosokalsneathsokalmichener)r
  r  )dist
correction)r1   r  r   r   r   r   r   rW   )
proxy_dist	true_dist)r   r   zproxy_wasserstein-1dr   proxy_wassersteinr   proxy_circular_wassersteinr   zproxy_jensen-shannonr   zproxy_symmetric-klr   )r1   r  r   r   rN   rW   )r1   r  r   r   )binaryr#   uint4)r   )r   )r2   )knumpyr+   numbapynndescent.optimal_transportr   r   r   r   r   r   r   r	   r
   numba.extendingr   
numba.corer   llvmliter   r   eyerG   _mock_identityr   
_mock_oneszerosr   _dummy_costfinfoepsr   r?   r   r%   njitr1   Arrayr   uint16r9   r;   r>   r@   rC   rE   rK   rN   rP   rR   rW   r#   rZ   	vectorizer`   rd   ri   rj   rl   ro   rp   rr   ry   r}   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  int32r
  r  r  r  r  r  r  r  named_distancesr,   fast_distance_alternativesproxy_distancesquantized_distancesr    r    r    r!   <module>   s"  (























	



















!
	


	
	
!



)
'
.
%
$

!

!



	
'


 
	
#


D0
$"5
"






"
	
 !"#$%&'()*+,-/0123456789
D
5	
