o
    {i>                     @   sz  d Z ddlZddlZddlmZmZ ddlmZm	Z	m
Z
mZ ddlmZmZ ddlmZ ddlmZ ddlmZmZ ejd	ed
 g dZg dZeg deedd\ZZedddd\ZZed7 Zeee7 ZeeefZ e!eefZ"ee e"ddd\Z Z#Z"Z$e Z%e%&e Z e%'e#Z#dd Z(dd Z)dd Z*dd Z+dd  Z,d!d" Z-d#d$ Z.d%d& Z/d'd( Z0d)d* Z1d+d, Z2dS )-z9
Simple tests for flat clustering over HDBSCAN hierarchy
    N)HDBSCANapproximate_predict)HDBSCAN_flatapproximate_predict_flatmembership_vector_flat"all_points_membership_vectors_flat)
make_blobs
make_moons)StandardScaler)train_test_split)assert_array_equalassert_array_lessignorecategory))r      )gɿr   )皙?r   )g      ?r   )g       @g      ?)g      @g        )g      ?g{Gz?gQ?ffffff?r   r   )F      P   d   (         )	n_samplescenterscluster_stdrandom_statei,  gQ?*   )r   noiser   g      @r   )	test_sizer   c                 C   s   t | d S )Nr   )npamax)labels_ r%   K/home/ubuntu/.local/lib/python3.10/site-packages/hdbscan/tests/test_flat.pyn_clusters_from_labels+   s   r'   c                  C   s   t ddt} t| j}tt|dd}t|j| j t|j| j t ddt} t| j}tt|dd}t|j| j t|j| j dS )zE
    Verify that the default clustering of HDBSCAN is preserved.
    eom)cluster_selection_method
n_clustersr)   leafN)r   fitXr'   r$   r   r   probabilities_)	clustererr+   clusterer_flatr%   r%   r&   test_flat_base_default/   s&   

r2   c                  C   s   d} t t| dd}|j}td|dt}t|j|j t|j|j d} t t| dd}|j}td|dt}t|j|j t|j|j dS )zj
    Verify that a clustering of HDBSCAN specified by
        cluster_selection_epsilon is preserved.
       r(   r*   r)   cluster_selection_epsilon   r,   N)r   r.   r5   r   r-   r   r$   r/   )r+   r1   epsilonr0   r%   r%   r&   test_flat_base_epsilonO   s:   r8   c                  C   s   t dddt} t| j}tjdd+}ttd|d d}t|dks&J t	|d j
ts8t	|d j
ts8J W d	   n1 sBw   Y  |jd
ksPJ d|j}t d
|dt}t|j|j t|j|j d	S )z
    Verify that when we request more clusters than 'eom' can handle,
        method switches to 'leaf' and the results match 'leaf'.
    r(   r   r4   Trecordr   r)   r+   Nr,   z3cluster selection method has not switched to 'leaf')r   r-   r.   r'   r$   warningscatch_warningsr   len
issubclassr   UserWarningDeprecationWarningr)   r5   r   r/   )r0   max_clusterswr1   r7   clusterer_leafr%   r%   r&   test_switch_to_leafv   s8   
$
rF   c                  C   sL   t ddddt} t| tdd\}}t| t\}}t|| t|| dS )zO
    Verify that approximate_predict_flat produces same results as default
    r(   r   T)r)   r5   prediction_dataNr+   )r   r-   r.   r   X_testr   r   )r0   labels_flat
proba_flatlabels_base
proba_baser%   r%   r&   test_approx_predict_default   s   


rN   c                  C   sT   d} t td| d}t|tdd\}}t|}|| ksJ t|tt|d  dS )zU
    Verify that approximate_predict_flat produces as many clusters as clusterer
       r(   r;   NrH   +=)	r   r.   r   rI   r'   r   r"   onesr?   )r+   r0   rJ   rK   n_clusters_outr%   r%   r&   !test_approx_predict_same_clusters   s   
rS   c                  C   s  d} t td| dd}d}t|t|d\}}t|}||ksJ t|tt|d  d}t	j
dd	.}t|t|d\}}t|d
ksEJ t|d jtsOJ dt|d jv sZJ W d   n1 sdw   Y  t|}||kssJ t|tt|d  dS )zQ
    Verify that approximate_predict_flat produces as many clusters as asked
    rO   r(   T)r)   r+   rG      rH   rP      r9   r   r<   zCannot predictN)r   r.   r   rI   r'   r   r"   rQ   r?   r=   r>   r@   r   rA   strmessage)n_clusters_fitr0   n_clusters_predictrJ   rK   rR   rD   r%   r%   r&   !test_approx_predict_diff_clusters   s2   

rZ   c                  C   s   d} t t| d}t|t}|jd t|jksJ t|ttks#J t|t	
|jd  t|jd } t t| d}t|t}|jd | ksIJ t|ttksSJ t|t	
|jd  dS )zH
    Verify membership vector produces same n_clusters as clusterer
    NrH   r   rP   r   )r   r.   r   rI   shaper'   r$   r?   r   r"   rQ   rX   r0   membershipsr%   r%   r&   test_mem_vec_same_clusters   s   

r^   c                  C   s   t jdtd d} tt| d}t|j}|d }t|t|d}|j	d |ks(J t
|t
tks2J t|t|j	d  t|jd } tt| d}| d }t|t|d}|j	d |ks^J t
|t
tkshJ t|t|j	d  dS 	zI
    Verify membership vector produces as many clusters as requested
    r   r   NrH   rT   r   rP   r   )r=   filterwarningsrA   r   r.   r'   r$   r   rI   r[   r?   r   r"   rQ   rX   r0   n_clusters_fittedrY   r]   r%   r%   r&   test_mem_vec_diff_clusters
  s*   
rc   c                  C   s   d} t t| d}t|}|jd t|jksJ t|ttks"J t|t	|jd  t|jd } t t| d}t|}|jd t|jksJJ t|ttksTJ t|t	|jd  dS )za
    Verify membership vector for training set produces same n_clusters
        as clusterer
    NrH   r   rP   r   )
r   r.   r   r[   r'   r$   r?   r   r"   rQ   r\   r%   r%   r&   %test_all_points_mem_vec_same_clusters5  s   rd   c                  C   s   t jdtd d} tt| d}t|j}|d }t||d}|jd |ks'J t	|t	tks1J t
|t|jd  t|jd } tt| d}|d }t||d}|jd |ks\J t	|t	tksfJ t
|t|jd  dS r_   )r=   r`   rA   r   r.   r'   r$   r   r[   r?   r   r"   rQ   ra   r%   r%   r&   %test_all_points_mem_vec_diff_clustersY  s*   
re   )3__doc__r=   numpyr"   hdbscanr   r   hdbscan.flatr   r   r   r   sklearn.datasetsr   r	   sklearn.preprocessingr
   sklearn.model_selectionr   sklearn.utils._testingr   r   r`   FutureWarningr   stdX0y0X1y1r?   vstackr.   concatenateyrI   y_testscalerfit_transform	transformr'   r2   r8   rF   rN   rS   rZ   r^   rc   rd   re   r%   r%   r%   r&   <module>   sN    


 '!'#+$