o
    {i}                     @   s  d dl mZ d dlZd dlZd dlmZmZ d dlm	Z	m
Z
mZ eejjZeejjZejdddd	d
Zejdddd Zejdddd Zejdddd Zejdejejjejjddddejjejjddddgejejdddd Zejejejejjddejejjddfejjejjddddejjejjddddejjejjddddejjejjddddgdejjddd ejjddd ejjejjejjejjejjddddd Zejdddd Zejejejejjejejjfejjejjddddejjejjddddejjejjddddejjejjddddgdejjejjejjejjejjdddd d! Z ejejejjejjddddejjejjddddejjejjddddejjejjddddgdejjejjejjejjejjejjd"ddd#d$ Z!e d%d& Z"ejdd'd(d) Z#ejd*ejejjejjddddejjejjddddejjejjddddejjejjddddgdejjddd ejjejjejj$ejjd+d,d-d. Z%e d/d0 Z&e d1d2 Z'e dd4d5Z(e d6d7 Z)e d8d9 Z*ejd*ejejjejjddddejjejjddddejjejjddddejjejjddddgdd'd:d; Z+e d<d= Z,ejd*ejejjejjddddejjejjddddejjejjddddejjejjddddgdejj$ejj$d>d,d?d@ Z-ej.dd'dAdB Z/e dCdD Z0e dEdF Z1e dGdH Z2e dIdJ Z3e dKdL Z4e dMdN Z5e dOdP Z6e dQdR Z7ejdejjejjejjejj$ejjdSd,dTdU Z8ej.dddVdWdX Z9e dYdZ Z:ejdd[ejjid,d\d] Z;e d^d_ Z<e d`da Z=ejdejjejjejjejj$ejjdbd,dcdd Z>ej.dddVdedf Z?e dgdh Z@didj ZAe e@fdkdlZBe ddmdnZCe dodp ZDe dqdr ZEejddsdt	uddvdwZFejddsdt	uddxdyZGi dze#d{e#d|e%d}e&d~e&de&de'de'de'de'de(de*de+de)de,de1de0i de2de3de4de5de6de7de<deBdeBdeCdeCdeCdeCde=deDdeDdeEeEeEdZHdZIe%ejJde%ejJde8e9de;e9de>e?de-e/ddZKdS )    )print_functionN)normtau_rand)kantorovichjensen_shannon_divergencesymmetric_kl_divergenceT)cacheh㈵>:0yE>c                 C   s$   t | | }|||t |  kS N)npabs)abrtolatoldiff r   F/home/ubuntu/.local/lib/python3.10/site-packages/pynndescent/sparse.pyisclose   s   r   c                 C   s@   t | }t t jdt jd|dd  |d d kf}|| S )N   dtype)r   sortconcatenateonesbool_)arrauxflagr   r   r   
arr_unique   s   
.r!   c                 C   s6   | j d dkr	|S |j d dkr| S tt| |fS Nr   )shaper!   r   r   )ar1ar2r   r   r   	arr_union$   s
   r&   c                 C   s:   t | |f}|  |d d |dd  |d d k S )Nr   r   )r   r   r   )r$   r%   r   r   r   r   arr_intersect0   s   $r'   zi4(i4[:],i4[:])r   C)readonly)i1i2)localsc           	      C   s   | j d dks|j d dkrdS d}d}| j d d }|j d d }| | }|| }d}	 ||krU|d7 }||k rB|d7 }| | }n	 |S ||k rR|d7 }|| }n(	 |S ||k rf||k rf|d7 }| | }n||k rw||k rw|d7 }|| }n	 |S q-Nr   r   r#   )	r$   r%   r*   r+   limit1limit2j1j2resultr   r   r   fast_intersection_size8   s>   



r4   )
result_indresult_datavalr*   r+   r1   r2   )fastmathr,   r   c                 C   s  | j d |j d  }tj|tjd}tj|tjd}d}d}d}	|| j d k r||j d k r| | }
|| }|
|kr[|| ||  }|dkrR|
||	< |||	< |	d7 }	|d7 }|d7 }n5|
|k rx|| }|dkrs|
||	< |||	< |	d7 }	|d7 }n|| }|dkr|||	< |||	< |	d7 }	|d7 }|| j d k r||j d k s.|| j d k r| | }
|| }|dkr|
||	< |||	< |	d7 }	|d7 }|| j d k s||j d k r|| }|| }|dkr|||	< |||	< |	d7 }	|d7 }||j d k s|d |	 }|d |	 }||fS Nr   r   r   )r#   r   zerosint32float32)ind1data1ind2data2result_sizer5   r6   r*   r+   nnzr1   r2   r7   r   r   r   
sparse_sumn   sh   

	
rC   c                 C   s   t | ||| S r   )rC   )r=   r>   r?   r@   r   r   r   sparse_diff      rD   )r7   r*   r+   r1   r2   c                 C   s   t jjt jj}t jjt jj}d}d}|| jd k rj||jd k rj| | }|| }	||	krO|| ||  }
|
dkrF|| ||
 |d7 }|d7 }n||	k rX|d7 }n|d7 }|| jd k rj||jd k s$||fS r-   )	numbatypedList
empty_listtypesr;   r<   r#   append)r=   r>   r?   r@   r5   r6   r*   r+   r1   r2   r7   r   r   r   
sparse_mul   s&   



rL   )r3   r7   r*   r+   r1   r2   c                 C   s   | j d }|j d }d}d}d}| | }	|| }
	 |	|
krF|| ||  }||7 }|d7 }||kr3|S | | }	|d7 }||krA|S || }
n!|	|
k rY|d7 }||krT|S | | }	n|d7 }||krc|S || }
q)Nr           Tr   r.   )r=   r>   r?   r@   dim1dim2r3   r*   r+   r1   r2   r7   r   r   r   sparse_dot_product   s:   



rP   c                 C   s  t | |}tj|jd tjd}tj|jd tjd}d}d}d}	|| jd k r||jd k r| | }
|| }|
|kr`|| ||  }|dkrW|| ||	< || ||	< |	d7 }	|d7 }|d7 }n1|
|k r{|| }|dkrv|| ||	< |	d7 }	|d7 }n|| }|dkr|| ||	< |	d7 }	|d7 }|| jd k r||jd k s/|| jd k r|| }|dkr|| ||	< |	d7 }	|d7 }|| jd k s||jd k r|| }|dkr|| ||	< |	d7 }	|d7 }||jd k s|d |	 }|d |	 }||fS r9   )r&   r   r:   r#   r<   )r=   r>   r?   r@   r5   result_data1result_data2r*   r+   rB   r1   r2   r7   r   r   r   dense_union<  s\   


rS   )r8   c                 C   sD   t | |||\}}d}t|jd D ]
}||| d 7 }qt|S )NrM   r      )rD   ranger#   r   sqrtr=   r>   r?   r@   _aux_datar3   ir   r   r   sparse_euclideanv  s
   
r[   z#f4(i4[::1],f4[::1],i4[::1],f4[::1]))rY   r3   r   dimrZ   )r8   r,   c           	      C   sD   t | |||\}}d}t|}t|D ]}||| ||  7 }q|S NrM   )rD   lenrU   )	r=   r>   r?   r@   rX   rY   r3   r\   rZ   r   r   r   sparse_squared_euclidean  s   r_   c                 C   s@   t | |||\}}d}t|jd D ]}|t|| 7 }q|S NrM   r   rD   rU   r#   r   r   rW   r   r   r   sparse_manhattan  s
   rb   c                 C   sB   t | |||\}}d}t|jd D ]}t|t|| }q|S r`   )rD   rU   r#   maxr   r   rW   r   r   r   sparse_chebyshev  s
   rd          @c           	      C   sL   t | |||\}}d}t|jd D ]}|t|| | 7 }q|d|  S )NrM   r         ?ra   )	r=   r>   r?   r@   prX   rY   r3   rZ   r   r   r   sparse_minkowski  s
   rh   c                 C   s$   t | |||d jd }t|| S r"   )rD   r#   float)r=   r>   r?   r@   
n_featuresnum_not_equalr   r   r   sparse_hamming  s   rl   c                 C   s~   t |}t |}t| |||\}}d| t j}t| |||\}}	t |	}	t||	||\}
}d}|D ]}||7 }q6|S )Nrf   rM   )r   r   rC   astyper<   rD   rL   )r=   r>   r?   r@   	abs_data1	abs_data2
denom_inds
denom_data
numer_inds
numer_datarX   val_datar3   r7   r   r   r   sparse_canberra  s   



ru   c           	      C   sv   t | |||\}}t|}|jd dkrdS t|}|dkr"dS t| |||\}}t|}t|}t|| S Nr   rM   )rC   r   r   r#   sumrD   ri   )	r=   r>   r?   r@   rX   rq   denominatorrs   	numeratorr   r   r   sparse_bray_curtis  s   



rz   c                 C   s>   t | |}| jd |jd  | }|dkrdS t|| | S rv   r4   r#   ri   r=   r>   r?   r@   	num_equalnum_non_zeror   r   r   sparse_jaccard  s
   
r   )r~   r}   c                 C   sJ   t | |}| jd |jd  | }|dkrdS |dkrtS t||  S rv   )r4   r#   FLOAT32_MAXr   log2r|   r   r   r   sparse_alternative_jaccard  s   
r   c                 C   s   dt d|   S )Nrf   re   )pow)vr   r   r   correct_alternative_jaccard  rE   r   c                 C   s6   t | |}| jd |jd  | }|| }t|| S r"   r{   r=   r>   r?   r@   rj   num_true_truer~   rk   r   r   r   sparse_matching  s   
r   c                 C   F   t | |}| jd |jd  | }|| }|dkrdS |d| |  S )Nr   rM   re   r4   r#   r=   r>   r?   r@   r   r~   rk   r   r   r   sparse_dice!     
r   c                 C   sN   t | |}| jd |jd  | }|| }|dkrdS t|| | ||  S rv   r{   r   r   r   r   sparse_kulsinski-  s   
r   c                 C   :   t | |}| jd |jd  | }|| }d| ||  S Nr   re   r   r   r   r   r   sparse_rogers_tanimoto;     
r   c                 C   sh   | j d |j d krt| |krdS t| |}|t|dkkr,|t|dkkr,dS t|| | S rv   )r#   r   allr4   rw   ri   )r=   r>   r?   r@   rj   r   r   r   r   sparse_russellraoD  s   "
$r   c                 C   r   r   r   r   r   r   r   sparse_sokal_michenerQ  r   r   c                 C   r   )Nr   rM   g      ?r   r   r   r   r   sparse_sokal_sneathZ  r   r   c           
      C   sp   t | |||\}}d}t|}t|}|D ]}	||	7 }q|dkr&|dkr&dS |dks.|dkr0dS d|||   S NrM   rf   )rL   r   )
r=   r>   r?   r@   rX   rY   r3   norm1norm2r7   r   r   r   sparse_cosinef  s   
r   )r3   norm_xnorm_yr\   rZ   c                 C   s   t | |||\}}d}t|}t|}t|}	t|	D ]}
|||
 7 }q|dkr.|dkr.dS |dks6|dkr8tS |dkr>tS || | }t|S r]   )rL   r   r^   rU   r   r   r   )r=   r>   r?   r@   rX   rY   r3   r   r   r\   rZ   r   r   r   sparse_alternative_cosinex  s   
r   )r8   r   c                 C   s.   t dt| dds| dk rdS dtd|   S NrM   gHz>)r   rf   re   )r   r   r   dr   r   r   !sparse_correct_alternative_cosine  s   r   c                 C   s   t | |||}d| S )Nrf   )rP   r=   r>   r?   r@   r3   r   r   r   
sparse_dot  s   r   r3   c                 C   s&   t | |||}|dkrtS t| S r]   )rP   r   r   r   r   r   r   r   sparse_alternative_dot  s   r   c                 C   sF  d}d}d}| j d dkr|j d dkrdS | j d dks$|j d dkr&dS t|j d D ]}||| 7 }q-t|j d D ]}||| 7 }q=|| }|| }tj|j d tjd}	tj|j d tjd}
t|j d D ]
}|| | |	|< qkt|j d D ]
}|| | |
|< q}tt|	d || j d  |d   }tt|
d ||j d  |d   }t| |	||
\}}t|}|D ]}||7 }qt| j d D ]}| | |vr||	| | 8 }qt|j d D ]}|| |vr||
| | 8 }qt	| |}||| ||j d   7 }|dkr|dkrdS |dkrdS d|||   S )NrM   r   rf   r   rT   )
r#   rU   r   emptyr<   rV   r   rL   setr&   )r=   r>   r?   r@   rj   mu_xmu_ydot_productrZ   shifted_data1shifted_data2r   r   dot_prod_indsdot_prod_datacommon_indicesr7   all_indicesr   r   r   sparse_correlation  sX     


r   c                 C   s   t | |||\}}d}t|}t|}t|| }	|D ]	}
|t|
7 }q|dkr2|dkr2dS |dks:|dkr<dS ||	krBdS td||	  S r   )rL   r   rw   rV   )r=   r>   r?   r@   aux_indsrY   r3   r   r   sqrt_norm_prodr7   r   r   r   sparse_hellinger  s   

r   )r3   	l1_norm_x	l1_norm_yr\   rZ   c                 C   s   t | |||\}}d}t|}t|}t|}	t|	D ]}
|t||
 7 }q|dkr3|dkr3dS |dks;|dkr=tS |dkrCtS t|| | }t|S r`   )rL   r   rw   r^   rU   rV   r   r   )r=   r>   r?   r@   r   rY   r3   r   r   r\   rZ   r   r   r   sparse_alternative_hellinger  s   


r   c                 C   s4   t dt| dds| dk rdS tdtd|   S r   )r   r   r   rV   r   r   r   r   r   $sparse_correct_alternative_hellinger'  s   r   c                 C   s   t | |k S r   )r   r<   )xyr   r   r   dummy_ground_metric/  rE   r   c                    s   t   fdd}|S )a  Generate a "ground_metric" suitable for passing to a ``sparse_kantorovich``
    distance function. This should be a metric that, given indices of the data,
    should produce the ground distance between the corresponding vectors. This
    allows the construction of a cost_matrix or ground_distance_matrix between
    sparse samples on the fly -- without having to compute an all pairs distance.
    This is particularly useful for things like word-mover-distance.

    For example, to create a suitable ground_metric for word-mover distance one
    would use:

    ``wmd_ground_metric = create_ground_metric(word_vectors, cosine)``

    Parameters
    ----------
    ground_vectors: array of shape (n_features, d)
        The set of vectors between which ground_distances are measured. That is,
        there should be a vector for each feature of the space one wishes to compute
        Kantorovich distance over.

    metric: callable (numba jitted)
        The underlying metric used to cpmpute distances between feature vectors.

    Returns
    -------
    ground_metric: callable (numba jitted)
        A ground metric suitable for passing to ``sparse_kantorovich``.
    c                    s    |   | S r   r   )index1index2ground_vectorsmetricr   r   ground_metricQ  s   z+create_ground_metric.<locals>.ground_metric)rF   njit)r   r   r   r   r   r   create_ground_metric4  s   r   c                 C   sh   t | jd |jd f}t| jd D ]}t|jd D ]}|| | || |||f< qqt|||S r"   )r   r   r#   rU   r   )r=   r>   r?   r@   r   cost_matrixrZ   jr   r   r   sparse_kantorovichX  s   r   c                 C   s(  d}d}d}d}d}	d}
d}t |}t |}dd }|| jd k r|	|jd k r| | }||	 }||kr`||||  7 }|
|| | 7 }
|||	 | 7 }||
| |}|}|d7 }|	d7 }	n?||k r||||  7 }|
|| | 7 }
||
| |}|}|d7 }n||||  7 }|||	 | 7 }||
| |}|}|	d7 }	|| jd k r|	|jd k s*|| jd k r| | }||||  7 }|
|| | 7 }
||
| |}|}|d7 }|| jd k s|	|jd k r||	 }||||  7 }|||	 | 7 }||
| |}|}|	d7 }	|	|jd k st |d| S )NrM   r   c                 S   s   t t | |S r   )r   powerr   )r   rg   r   r   r   <lambda>o  s    z'sparse_wasserstein_1d.<locals>.<lambda>r   rf   )r   rw   r#   r   )r=   r>   r?   r@   rg   r3   old_inddeltar*   r+   cdf1cdf2r   r   r   r1   r2   r   r   r   sparse_wasserstein_1dc  sd   



r   c                 C      t | |||\}}t||S r   )rS   r   r=   r>   r?   r@   dense_data1dense_data2r   r   r    sparse_jensen_shannon_divergence     
r   c                 C   r   r   )rS   r   r   r   r   r   sparse_symmetric_kl_divergence  r   r   F)parallelr   rf   c              	   C   s  t | jd D ]}| |df g}	||df g}
td| jd D ]}| ||f dk r, nd}tt|	D ]e}|	| }||| ||f  || ||f d   }||| ||f  || ||f d   }||| ||d   }||| ||d   }|||||}|
| tkr||||f k rt||k rd} nq4|r|	| ||f  |
|||f  q t| jd D ]&}|t|	k r|	| | ||f< |
| |||f< qd| ||f< tj	|||f< qq| |fS )Nr   r   TFr   )
rF   pranger#   rU   r^   FLOAT32_EPSr   rK   r   inf)indices	distancesdata_indicesdata_indptr	data_datadist	rng_stateprune_probabilityrZ   new_indicesnew_distancesr   r    kcfrom_ind	from_datato_indto_datar   r   r   r   	diversify  sF   ""r   c	                 C   s  | j d d }	t|	D ]}
|| |
 | |
d   }|| |
 | |
d   }t|}tj|j d tjd}td|j d D ]n}|| }t|D ]c}|| }|| dkr|| }|| }||| ||d   }||| ||d   }||| ||d   }||| ||d   }|||||}|| tkr||| k rt	||k rd||<  nqHq>t|j d D ]}|| }|| dkrd|| |
 | < qqd S )Nr   r   r   )
r#   rF   r   r   argsortr   int8rU   r   r   )graph_indptrgraph_indices
graph_datar   r   r   r   r   r   n_nodesrZ   current_indicescurrent_dataorderretainedidxr   r   lrg   q	from_indsr   to_indsr   r   r   r   r   diversify_csr  s<   
r   	euclideanl2sqeuclidean	manhattanl1taxicab	chebyshevlinflinfty	linfinity	minkowskicanberra
braycurtishammingjaccarddicematching	kulsinskirogerstanimoto
russellraosokalmichenersokalsneathcosinecorrelationr   wassersteinwasserstein_1dzwasserstein-1dzkantorovich-1d	hellingerzjensen-shannonjensen_shannonzsymmetric-kl)symmetric_klsymmetric_kullback_liebler)r  r	  r
  r  r  r  r  )r   
correction)r   r   r  dotr  r  )r	   r
   )re   )r   )rf   )L
__future__r   numpyr   rF   pynndescent.utilsr   r   pynndescent.distancesr   r   r   finfor<   epsr   rc   r   r   r   r!   r&   r'   rJ   r;   Arrayuint16r4   TuplerC   rD   ListTyperL   rP   rS   r[   intpr_   rb   rd   rh   rl   ru   rz   r   r   	vectorizer   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   sparse_named_distancessparse_need_n_featuresrV   !sparse_fast_distance_alternativesr   r   r   r   <module>   sp  







)	

?

	
'

9
	
	



	


	














	
<



$
?

	6
6	
 !"#$%&'(,


