o
    {iB                     @   s  d dl Zd dlmZ d dlmZ d dlmZ d dlmZ d dl	m
Z
mZmZmZ d dlmZmZmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlZd dlZd dl Z dd Z!dMddZ"dd Z#e# \Z$Z%e &e$Z$e$' Z(ej)dge(d < ej)ej)ge(d< dd Z*dd Z+ej,j-dddd Z.dd  Z/d!d" Z0dNd&d'Z1d(d) Z2d*d+ Z3d,d- Z4d.d/ Z5d0d1 Z6d2d3 Z7d4d5 Z8d6d7 Z9d8d9 Z:d:d; Z;d<d= Z<d>d? Z=d@dA Z>dBdC Z?dDdE Z@dFdG ZAdHdI ZBej,j-dJddKdL ZCdS )O    N)stats)sparse)distance)check_estimator)HDBSCANBranchDetectordetect_branches_in_clustersapproximate_predict_branch)if_matplotlibif_networkx	if_pandas)check_random_stateshuffle)
make_blobs)StandardScaler)mkdtempwrapsc                    s   t   fdd}|S )zJTest decorator that skips test if networkx or pygraphviz is not installed.c                     sB   z
dd l }dd l}W n ty   td Y d S w  | i |S )Nr   z%NetworkX or pygraphviz not available.)networkx
pygraphvizImportErrorpytestskip)argskwargsr   r   func O/home/ubuntu/.local/lib/python3.10/site-packages/hdbscan/tests/test_branches.pyrun_test!   s   zif_pygraphviz.<locals>.run_testr   )r   r   r   r   r   if_pygraphviz   s   	r    d   Tc              
   C   sB  t | tjr| d }| | }nz| \}}W n ty& } ztd|d }~ww t|}tttjd tj|}t	ttjd tj|d }	ttdtj|}
dt	tdtj| }t
t||
t|	|gj}ttj|tjdtj|tjdg}|rt|||d\}}|d ur||j||jd7 }||fS )	N   z8`n_samples` can be either an int or a two-element tuple.      r   )dtype)random_state)scalesize)
isinstancenumbersIntegral
ValueErrorr   npcoslinspacepisinvstackappendThstackzerosintponesutil_shufflenormalshape)	n_samplesr   noiser&   n_samples_outn_samples_ine	generatorouter_circ_xouter_circ_yinner_circ_xinner_circ_yXyr   r   r   make_branches.   sF   
 

rH   c                  C   s   t dddgddd\} }tdddd	\}}t|jd
 d}tjd tjddd}tdd}t| ||ft	|||ffS )N2   )g      g      @)       @      g?r"   )r<   centerscluster_stdr&      gQ?)r<   r=   r&   r   r#                  @)rI   r#   )
r   rH   r-   fullr;   randomseeduniformr2   concatenate)blobsyBlobsmoons_yMoonsr=   yNoiser   r   r   generate_noisy_dataW   s   
r^   r$   rO   c                  C   s   t dddt} | j}| jdusJ |jdksJ |jjd tjd ks'J |jjd tjd ks4J |jjd | j	ksB| j
sBJ |jdu sIJ dS )z=Check that the flag generates internal branch_detection_data.rO   Tmin_cluster_sizebranch_detection_dataNr   r$   )r   fitrF   branch_detection_data_minimum_spanning_tree_
all_finitecore_distancesr;   	neighborsmin_samplesr`   finite_indexcbranch_datar   r   r   test_branch_detection_datas   s   rm   c                  C   s   t dddt} | j}| jdusJ |jdksJ |jjd tjd d ks)J |j	jd tjd d ks8J |j	jd | j
ksF| jsFJ |jdusMJ dS )	z=Check internal branch_detection_data recognizes missing data.rO   Tr_   NFr   r#   r$   )r   rb   X_missing_datarc   rd   re   rf   r;   rF   rg   rh   r`   ri   rj   r   r   r   'test_branch_detection_data_with_missing   s   ro   z)Unreachable code-branch cannot be tested.)reasonc               	   C   s   t jddE} tddddt}dt| d jv sJ |jdus#J t	t
 |j W d   n1 s6w   Y  W d   dS W d   dS 1 sNw   Y  dS )	z$Check warning on unsupported metric.TrecordrO   cosine)r`   ra   metricz1Metric cosine not supported for branch detection!rR   N)warningscatch_warningsr   rb   rF   strmessagerd   r   raisesAttributeErrorra   )wrk   r   r   r   /test_branch_detection_data_with_non_tree_metric   s   	"r|   c                  C   s   t t t} tjdd}tdddd| }dt|d j	v s$J W d   n1 s.w   Y  | t
|  } t|  d	}d
| | |k< t| } |   tjdd}tdddd| }dt|d j	v smJ W d   dS 1 sxw   Y  dS )z$Check warning on unsupported inputs.Trq   rO   precomputed)r`   rt   ra   zFBranch detection for non-vector space inputs is not (yet) implemented.rR   NrI           )r   
squareformpdistrF   ru   rv   r   rb   rw   rx   r-   maxr   scoreatpercentileflattenr   
csr_matrixeliminate_zeros)Dr{   rk   	thresholdr   r   r   1test_branch_detection_data_with_unsupported_input   s4   



"r   c                  C   s\   t ddt} |   | jdusJ tt | j W d   dS 1 s'w   Y  dS )zAGenerate branch detection data function does not re-generate MST.rO   r`   N)	r   rb   rF   generate_branch_detection_datarc   r   ry   rz   rd   )rk   r   r   r   #test_generate_branch_detection_data   s   "r   r"      Fc                 C   s   t t| jtd| jv  |ksJ t t| jtd| jv  |ks&J | jdk}| j| dk s6J | j| dk sAJ | j	| dk sLJ | j
| dk sWJ |skt | j|ksbJ t | j|kskJ tdd | jD || ks{J dS )z2Checks branch_detector output for main invariants.rR   r         ?r~   c                 s   s    | ]}t |V  qd S N)len).0psr   r   r   	<genexpr>   s    z(check_detected_groups.<locals>.<genexpr>N)r   r-   uniquelabels_intcluster_labels_branch_labels_allbranch_probabilities_probabilities_cluster_probabilities_cluster_points_branch_persistences_sum)rk   
n_clusters
n_branches
overridden
noise_maskr   r   r   check_detected_groups   s   &&
$r   c                  C   s   t dddt} tddd| }t|dd td	dd| }t| tdd
d| }t|dd td	d
d| }t| d S )NrO   Tr_   coreeom)branch_detection_methodcluster_selection_method   r   rS   leaf	   r   rb   rF   r   r   rk   br   r   r   test_branch_detector   s2   r   c                  C   sZ   t dddt} tdd| }tj|jdd\}}||dk dk s'J t| d S )NrO   Tr_   r   r   )return_countsr   )	r   rb   rF   r   r-   r   r   r   r   )rk   r   labelscountsr   r   r   test_min_cluster_size   s
   r   c                  C   s2   t dddt} tdd| }t|dd d S )NrO   Tr_   label_sides_as_branches   r   r   r   r   r   r   test_label_sides_as_branches   s   r   c                  C   s4   t dddt} tddd| }t|dd dS )	zSuppresses one branch.rO   Tr_   rI   )r   max_cluster_sizer   r   Nr   r   r   r   r   test_max_cluster_size   s   r   c                  C   s\   t td} d| tdk< d| tdk< tdddt}tdd|| }t|dddd	 d S )
NrR   r   r$   rO   Tr_   r   r#   )r   r   r   )r-   	full_likerG   r   rb   rF   r   r   )split_yrk   r   r   r   r   test_override_cluster_labels  s   r   c                  C   s  t jd t jdd} tddddd| }tdddd	|}t |j}t	|d
ks0J t 
|jdk}|dk|dkB sBJ tddddd|}t |j}t	|dks[J t 
|jdkdksgJ tddddd|}t |j}t	|dksJ t 
|jdkdksJ d S )Nr   rN   r#   rO   T)rh   r`   allow_single_clusterra   r   r   )r`   r   r   r   G   H   皙?)r`   r   r   cluster_selection_persistencer$   g4H4H@)r`   r   cluster_selection_epsilonr   )r-   rT   rU   randr   rb   r   r   r   r   r   r   )no_structurerk   r   unique_labels	num_noiser   r   r   &test_allow_single_cluster_with_filters
  sV   r   c                  C   sn  t dddt} t ddd}t dddt}t ddt}|  tt td W d    n1 s8w   Y  tt td  W d    n1 sQw   Y  tt td W d    n1 sjw   Y  tt t| W d    n1 sw   Y  tt t| W d    n1 sw   Y  tt t| W d    n1 sw   Y  tt t| dd W d    n1 sw   Y  tt t| dd W d    n1 sw   Y  tt t| d	d W d    n	1 sw   Y  tt t| d
d W d    n	1 s#w   Y  tt t| dd W d    n	1 s?w   Y  tt t| dd W d    n	1 s[w   Y  tt t| dd W d    n	1 sww   Y  tt t| dd W d    n	1 sw   Y  tt t| dd W d    d S 1 sw   Y  d S )NrO   Tr_   )r`   gen_min_span_treer   failrR   r   r$   rJ   g)r   )r   something_else)r   )r   )	r   rb   rF   r   r   ry   rz   r   r,   )rk   c_nofit
c_nobranchc_nomstr   r   r   test_badargs8  sp   





$r   c                  C   s~   t  } t| dddt}t |}tdd|}tt|jtd|jv  }tt|jtd|jv  }||ks=J d S )NrO   T)memoryrh   ra   )r   rR   )	r   r   rb   rF   r   r   setr   r   )cachedirrk   b1b2	n_groups1	n_groups2r   r   r   test_cachingh  s   r   c                     s>  t ddgddgddgddgddgg} td	d
dt}t | t  fddtd	D }t jt |dd}t |d d df |d d df f}t 	t 
||d d f |  dk  t  fddtd	D }t jt |dd}t |d d df |d d df f}t 	t 
||d d f |  dk  d S )NgrP   r   皙gffffff?rK   r~   g333333?rO   Tr_   c                       g | ]}  |qS r   )weighted_centroidr   ir   r   r   
<listcomp>z      z)test_centroid_medoids.<locals>.<listcomp>r$   )decimalsr   c                    r   r   )weighted_medoidr   r   r   r   r     r   )r-   asarrayr   rb   rF   r   rangearoundlexsortr   abs)branch_centersrk   	centroidsroundedcordermedoidsr   r   r   test_centroid_medoidsr  s    &$&(r   c                  C   sn   t dddt} t | }|j}|d d u sJ |d d u s"J t|d dks,J t|jdks5J d S )NrO   Tr_   r   r$   r#   r"   )r   rb   rF   r   
exemplars_r   )rk   r   branch_exemplarsr   r   r   test_exemplars  s   r   c                  C   sH  t ddddt} t | }t|tddgg\}}}}}}|d dks)J t|j|d  dks6J t|tdd	gg\}}}}}}|d |d ksQJ |d dksYJ |d d
ksaJ t|tddgg\}}}}}}|d dkszJ |d dksJ |d dksJ |d dksJ |d dksJ |d d
ksJ d S )NrO   T)r`   ra   prediction_datar   r~   r   rR   r#   rJ   r   r$   rQ   )	r   rb   rF   r   r	   r-   arrayr   r   )rk   r   lpclcpblbpr   r   r   test_approximate_predict  s(   """r   c                  C   sp   t dddt} t | }|j \}}|jd tjd ks"J |jD ]}|  q%|jD ]}|  q/d S )NrO   Tr_   r   )	r   rb   rF   r   approximation_graph_to_numpyr;   condensed_trees_linkage_trees_)rk   r   pointsedgestr   r   r   test_trees_numpy_output_formats  s   



r   c                  C   `   t dddt} t | }t|jj  |jD ]}t|j  q|jD ]}t|j  q%d S NrO   Tr_   )	r   rb   rF   r   r   r   	to_pandasr   r   rk   r   r   r   r   r    test_trees_pandas_output_formats     

r  c                  C   r   r  )	r   rb   rF   r   r   r   to_networkxr   r   r  r   r   r   "test_trees_networkx_output_formats  r  r  c                  C   sX   t dddt} t | }|jD ]}t|jddddd qt|jdddd	 d S )
NrO   Tr_   )rgr   Reds)select_clusterslabel_clustersselection_palettecmapFnone)log_sizecolorbarr  )r   rb   rF   r   r   r
   plotr  r   r   r   test_condensed_tree_plot  s   
r  c                  C   sV   t dddt} t | }|jD ]}t|jdd t|jdddd	dd
 qd S )NrO   Tr_   r
  )r  Flastp
   r  )vary_line_widthtruncate_moder   r  r  )r   rb   rF   r   r   r
   r  r  r   r   r   test_single_linkage_tree_plot  s   
r  c                  C   s   t dddt} t | }|j}t|jtd tt|jdddgd tt|jtd d df d	 tt|jd
dd tt|j|jd
 dd d S )NrO   Tr_   )	positionsxrG   )
node_colorfeature_namesr   )r  
centrality)
edge_color
node_alpha)	r   rb   rF   r   r   r
   r  r    _edges)rk   r   r	  r   r   r   test_approximation_graph_plot  s    

r!  z(need to refactor to meet newer standardsc                   C   s   t t d S r   )r   r   r   r   r   r   )test_branch_detector_is_sklearn_estimator  s   r"  )r!   TNN)r"   r   F)Dnumpyr-   scipyr   r   scipy.spatialr   sklearn.utils.estimator_checksr   hdbscanr   r   r   r	   hdbscan.tests.test_hdbscanr
   r   r   sklearn.utilsr   r   r9   sklearn.datasetsr   sklearn.preprocessingr   tempfiler   	functoolsr   r*   r   ru   r    rH   r^   rF   rG   fit_transformcopyrn   nanrm   ro   markr   r|   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r  r!  r"  r   r   r   r   <module>   sb    
)


	.0

