o
    {iM                     @   sj  d dl Zd dlmZmZ d dlmZ d dlmZm	Z	 d dl
mZ d dl
mZmZ d dlmZ dd	lmZ dd
lmZmZmZ ddlmZ ddlmZ ddlmZ 										d4ddZ		d5ddZ		d6ddZdd Zdd Z dd Z!dd Z"dd  Z#		d5d!d"Z$	d7d#d$Z%d%d& Z&d'd( Z'd8d*d+Z(d,d- Z)G d.d/ d/eeZ*d0d1 Z+G d2d3 d3Z,dS )9    N)BaseEstimatorClusterMixin)	coo_array)minimum_spanning_treeconnected_components)Memory)Paralleldelayed)	cpu_count   )label)CondensedTreeSingleLinkageTreeApproximationGraph)approximate_predict)recurse_leaf_dfs)_tree_to_labelsfullF        c                 C   sD  | j du r	td| jdu rtd|du r| j}|du r | j}|du r'| j}|du r.| j}t|	}	t|
}
t	t
|tjrC|dksKtd| dt	t
|
tjrX|
dks`td|
 dt	t
|	tjrm|	dksutd	|	 d|d
vrtd| d|dvrtd| d| j}t|trt|dd}|du rd}t| j}t| j| j| j| j| j| jd\}}|du r|}nd}| d }|du rt|dkdd}| jjs| jj}|| }|| }|dk}| j}|dk rtt d | d}|rt nt|dd}|j t!dgd||| j | jj"| jj#| jj$| jj%||||d\}}}}|j t&dgd||||||	|
|d\}}}}}| t'||||||||d\}}}}}| jjs| jj(}t)|| t*|| t| j}t+|||}t,|||}t+|||}t,|||}t+|||d}t,|||}t,|||}||||||||||||fS )a(  
    Performs a flare-detection post-processing step to detect branches within
    clusters [1]_.

    For each cluster, a graph is constructed connecting the data points based on
    their mutual reachability distances. Each edge is given a centrality value
    based on how far it lies from the cluster's center. Then, the edges are
    clustered as if that centrality was a distance, progressively removing the
    'center' of each cluster and seeing how many branches remain.

    Parameters
    ----------

    clusterer : hdbscan.HDBSCAN
        The clusterer object that has been fit to the data with branch detection
        data generated.

    cluster_labels : np.ndarray, shape (n_samples, ), optional (default=None)
        The cluster labels for each point in the data set. If not provided, the
        clusterer's labels will be used.

    cluster_probabilities : np.ndarray, shape (n_samples, ), optional (default=None)
        The cluster probabilities for each point in the data set. If not provided,
        the clusterer's probabilities will be used, or all points will be given
        1.0 probability if labels are overridden.

    branch_detection_method : str, optional (default=``full``)
        Determines which graph is constructed to detect branches with. Valid
        values are, ordered by increasing computation cost and decreasing
        sensitivity to noise:
        - ``core``: Contains the edges that connect each point to all other
          points within a mutual reachability distance lower than or equal to
          the point's core distance. This is the cluster's subgraph of the
          k-NN graph over the entire data set (with k = ``min_samples``).
        - ``full``: Contains all edges between points in each cluster with a
          mutual reachability distance lower than or equal to the distance of
          the most-distance point in each cluster. These graphs represent the
          0-dimensional simplicial complex of each cluster at the first point in
          the filtration where they contain all their points.

    label_sides_as_branches : bool, optional (default=False),
        When this flag is False, branches are only labelled for clusters with at
        least three branches (i.e., at least y-shapes). Clusters with only two
        branches represent l-shapes. The two branches describe the cluster's
        outsides growing towards each other. Enabling this flag separates these
        branches from each other in the produced labelling.

    min_cluster_size : int, optional (default=None)
        The minimum number of samples in a group for that group to be
        considered a branch; groupings smaller than this size will seen as
        points falling out of a branch. Defaults to the clusterer's min_cluster_size.

    allow_single_cluster : bool, optional (default=None)
        Analogous to HDBSCAN's ``allow_single_cluster``.

    cluster_selection_method : str, optional (default=None)
        The method used to select branches from the cluster's condensed tree.
        The standard approach for FLASC is to use the ``eom`` approach.
        Options are:
          * ``eom``
          * ``leaf``

    cluster_selection_epsilon: float, optional (default=0.0)
        A lower epsilon threshold. Only branches with a death above this value
        will be considered. See [3]_ for more information. Note that this
        should not be used if we want to predict the cluster labels for new
        points in future (e.g. using approximate_predict), as the
        :func:`~hdbscan.branches.approximate_predict` function is not aware of
        this argument.

    cluster_selection_persistence: float, optional (default=0.0)
        An eccentricity persistence threshold. Branches with a persistence below
        this value will be merged. See [3]_ for more information. Note that this
        should not be used if we want to predict the cluster labels for new
        points in future (e.g. using approximate_predict), as the
        :func:`~hdbscan.branches.approximate_predict` function is not aware of
        this argument.

    max_cluster_size : int, optional (default=0)
        A limit to the size of clusters returned by the ``eom`` algorithm.
        Has no effect when using ``leaf`` clustering (where clusters are
        usually small regardless). Note that this should not be used if we
        want to predict the cluster labels for new points in future (e.g. using
        :func:`~hdbscan.branches.approximate_predict`), as that function is
        not aware of this argument.

    Returns
    -------
    labels : np.ndarray, shape (n_samples, )
        Labels that differentiate all subgroups (clusters and branches). Noisy
        samples are given the label -1.

    probabilities : np.ndarray, shape (n_samples, )
        Probabilities considering both cluster and branch membership. Noisy
        samples are assigned 0.

    cluster_labels : np.ndarray, shape (n_samples, )
        The cluster labels for each point in the data set. Noisy samples are
        given the label -1.

    cluster_probabilities : np.ndarray, shape (n_samples, )
        The cluster probabilities for each point in the data set. Noisy samples
        are assigned 1.0.

    branch_labels : np.ndarray, shape (n_samples, )
        Branch labels for each point. Noisy samples are given the label -1.

    branch_probabilities : np.ndarray, shape (n_samples, )
        Branch membership strengths for each point. Noisy samples are
        assigned 0.

    branch_persistences : tuple (n_clusters)
        A branch persistence (eccentricity range) for each detected branch.

    approximation_graphs : tuple (n_clusters)
        The graphs used to detect branches in each cluster stored as a numpy
        array with four columns: source, target, centrality, mutual reachability
        distance. Points are labelled by their row-index into the input data.
        The edges contained in the graphs depend on the ``branch_detection_method``:
        - ``core``: Contains the edges that connect each point to all other
          points in a cluster within a mutual reachability distance lower than
          or equal to the point's core distance. This is an extension of the
          minimum spanning tree introducing only edges with equal distances. The
          reachability distance introduces ``num_points`` * ``min_samples`` of
          such edges.
        - ``full``: Contains all edges between points in each cluster with a
          mutual reachability distance lower than or equal to the distance of
          the most-distance point in each cluster. These graphs represent the
          0-dimensional simplicial complex of each cluster at the first point in
          the filtration where they contain all their points.

    condensed_trees : tuple (n_clusters)
        A condensed branch hierarchy for each cluster produced during the
        branch detection step. Data points are numbered with in-cluster ids.

    linkage_trees : tuple (n_clusters)
        A single linkage tree for each cluster produced during the branch
        detection step, in the scipy hierarchical clustering format.
        (see http://docs.scipy.org/doc/scipy/reference/cluster.hierarchy.html).
        Data points are numbered with in-cluster ids.

    centralities : np.ndarray, shape (n_samples, )
        Centrality values for each point in a cluster. Overemphasizes points'
        eccentricity within the cluster as the values are based on minimum
        spanning trees that do not contain the equally distanced edges resulting
        from the mutual reachability distance.

    cluster_points : list (n_clusters)
        The data point row indices for each cluster.

    References
    ----------
    .. [1] Bot D.M., Peeters J., Liesenborgs J., Aerts J. 2025. FLASC: a
    flare-sensitive clustering algorithm. PeerJ Computer Science 11:e2792
    https://doi.org/10.7717/peerj-cs.2792.
    NzClusterer does not have an explicit minimum spanning tree! Try fitting with branch_detection_data=True or gen_min_span_tree=True set.zClusterer does not have branch detection data! Try fitting with branch_detection_data=True set, or run generate_branch_detection_data on the clusterer   z<min_cluster_size must be an integer greater or equal to 2,  z given.r   zGcluster_selection_persistence must be a float greater or equal to 0.0, zCcluster_selection_epsilon must be a float greater or equal to 0.0, )eomleafz"Invalid cluster_selection_method: z!
Should be one of: "eom", "leaf"
)corer   z%Invalid ``branch_detection_method``: z"
Should be one of: "core", "full"
r   )verboseFallow_single_clustercluster_selection_epsilonTr   g      ?r   )n_jobs
max_nbytesthread_pool)ignore)run_coreoverridden_labels)min_cluster_sizer   cluster_selection_methodr   cluster_selection_persistencemax_cluster_size)label_sides_as_branches)-_min_spanning_tree
ValueErrorbranch_detection_data_r$   r'   r   r%   floatnp
issubdtypetypeintegerfloatingmemory
isinstancestrr   lencluster_persistence_update_single_cluster_labels_condensed_treelabels_probabilities_r   maxwhere
all_finitefinite_indexcore_dist_n_jobsr
   SequentialPoolr   cachecompute_branch_linkagetree	neighborscore_distancesdist_metriccompute_branch_segmentationupdate_labellinginternal_to_raw_remap_point_lists_remap_edge_lists_remap_labels_remap_probabilities)	clusterercluster_labelscluster_probabilitiesbranch_detection_methodr(   r$   r'   r   r%   r   r&   r2   r#   num_clusters_cluster_probabilitiesr>   r"   num_jobsr    pointscentralitieslinkage_treesapproximation_graphsbranch_labelsbranch_probabilitiesbranch_persistencescondensed_treeslabelsprobabilitiesrI   
num_points r`   D/home/ubuntu/.local/lib/python3.10/site-packages/hdbscan/branches.pydetect_branches_in_clusters   sB   
+










rb   c           	      C   sp   |r4t |dkr4t|}t|}|dkr4| d dk}| d d| k }| d ||@  }d||< d||< ||fS )zhSets all points up to cluster_selection_epsilon to the zero-cluster if
    a single cluster is detected.r   r   
child_size
lambda_valchildr   )r5   r-   
zeros_like	ones_like)	condensed_treer]   r^   persistencesr   r   	size_masklambda_masknoise_pointsr`   r`   ra   r7   r  s   


r7   c              
      sB   | f	ddt |D }t|rtt| S dS )Nc                 3   s.    | ]}t t |
V  qd S N)r	   "_compute_branch_linkage_of_cluster).0
cluster_id	rO   rP   rE   rF   min_spanning_treerD   r#   r"   
space_treer`   ra   	<genexpr>  s     
z)compute_branch_linkage.<locals>.<genexpr>)r`   r`   r`   r`   )ranger5   tuplezip)rO   rP   rr   rs   rD   rE   rF   rR   r    r"   r#   resultr`   rq   ra   rB     s   rB   c
                 C   s  | |	k}
t |
d }t j| jd dt jd}t jt|t jd||< | |dddf t j |	k}| |dddf t j |	k}|||@  }||dddf t j |dddf< ||dddf t j |dddf< |j	j
| }t j|||
 dd}||d |dddf }t jdd d| }W d   n1 sw   Y  |rt|||| |}n|jd	  }t|||||}t||||S )
z#Detect branches within one cluster.r   r   dtypeNr   weightsaxisr!   )divider   )r-   r<   r   shapedoublearanger5   astypeintpdatabaseaveragepairwiseerrstateextract_core_cluster_graphTr;   extract_full_cluster_graph!compute_branch_linkage_from_graph)rO   rP   rr   rs   rD   rE   rF   r"   r#   rp   cluster_maskcluster_pointsin_cluster_idsparent_mask
child_maskcluster_mstrU   centroidrV   edgesmax_distr`   r`   ra   rn     s4     ((

rn   c                 C   sr  t ||d d df t j ||d d df t j |d d df  tt|d d df |d d df t j|d d df t jfft| t| fddd}| |d d df t j |d d df< | |d d df t j |d d df< |rt|dd\}}|dkr| |||fS |	 }t 
|j|j|jf}|t |d d df d d f }t|}| |||fS )	Nr   r   r   )r   T)	overwriteF)directed)r-   maximumr   r   r   r   int32r5   r   tocoocolumn_stackrowcolr   argsortr   )r   rV   r   r#   centrality_mstnum_componentsr]   linkage_treer`   r`   ra   r     s2   >(	("r   c                 C   s  |j d }|j d }| j d }tj|||  dftjd}| dddf tj}| dddf tj}	t||	|d|df  t||	|d|df  ttj	|tjd|}
||
  }t|
|||ddf  t|
|||ddf  | dddf |d|df< t|||ddf tj |||ddf tj ||ddf  tj||dddf dkddf dd	}|S )
zGCreate a graph connecting all points within each point's core distance.r   r      ry   Nr            r}   )r   r-   zerosr   r   r   minimumr   repeatr   flattenunique)cluster_spanning_treerE   rD   r   r_   num_neighborscountr   mst_parentsmst_childrencore_parentcore_childrenr`   r`   ra   r     s(   


*r   c              	   C   s@  | j | jj| |d dd\}}tjt|tjd}t|D ]\}}	||  t|	7  < qttj	t|tj
d|}
|t| }t|}|dk|
|k @ ||k@ }tj| dftj
d}|
| |d d df< || |d d df< tt||d d df tj ||d d df tj || |d d d	f  |S )
Ng:0yE>T)rreturn_distancery   r   r   r   r   r   )query_radiusr   r   r-   r   r5   r   	enumerater   r   r   concatenatesumr   r   )rs   rE   r   r   r   children_mapdistances_mapnum_childrenichildrenfull_parentsfull_childrenfull_distancesmaskr   r`   r`   ra   r   ,  s:   

r   c                    s.   | fdd| D }t |rtt| S dS )z/Extracts branches from the linkage hierarchies.c                 3   s$    | ]}t t|fi  V  qd S rm   )r	    segment_branch_linkage_hierarchy)ro   cluster_linkage_treekwargsr`   ra   rt   V  s
    
z.compute_branch_segmentation.<locals>.<genexpr>)r`   r`   r`   r`   r`   )r5   rv   rw   )cluster_linkage_treesr    r   resultsr`   r   ra   rG   T  s   rG   c           	      K   s   t | jdkr"| tj| jd tjddd t|  d D ddfS td| f||d|\}}}}}t||||||d\}}|||||fS )z#Select branches within one cluster.r   r   ry   c                 S   s   g | ]}d qS )r   r`   )ro   _r`   r`   ra   
<listcomp>k  s    z4segment_branch_linkage_hierarchy.<locals>.<listcomp>Nr   )	r5   r   r-   onesr   ru   r;   r   r7   )	single_linkage_treer   r   r   r]   r^   stabilitiesrh   r   r`   r`   ra   r   _  s4   
	
r   c                 C   s  t | }dtj|tjd }	|  }
tj|tjd}tj|tjd}tj|tjd}d}t||||||D ]Q\}}}}}}t |}|||< ||rJdndkrZ|durZ||	|< |d7 }q5td|v }|| | |	|< |||< |||< |
|  |7  < |
|  d  < ||| 7 }q5|	|
|||fS )z1Updates the labelling with the detected branches.r   ry   r   r   r   N)	r5   r-   r   r   copyr   r   rw   int)rP   	tree_listpoints_listcentrality_listbranch_label_listbranch_prob_listbranch_pers_listr(   r_   r]   r^   rY   rZ   branch_centralities
running_idrC   _points_labels_probs_centrs_persnum_branches	has_noiser`   r`   ra   rH     sB   
rH   c                 C   s8   | D ]}|D ]}||d  |d< ||d  |d< qqdS )aU  
    Takes a list of edge lists and replaces the internal indices to raw indices.

    Parameters
    ----------
    edge_lists : list[np.ndarray]
        A list of numpy edgelists with the first two columns indicating
        datapoints.
    internal_to_raw: dict
        A mapping from internal integer index to the raw integer index.
    r   r   Nr`   )
edge_listsrI   graphedger`   r`   ra   rK     s   rK   c                 C   s0   | D ]}t t|D ]
}|||  ||< q
qdS )a/  
    Takes a list of points lists and replaces the internal indices to raw indices.

    Parameters
    ----------
    point_lists : list[np.ndarray]
        A list of numpy arrays with point indices.
    internal_to_raw: dict
        A mapping from internal integer index to the raw integer index.
    N)ru   r5   )point_listsrI   rU   idxr`   r`   ra   rJ     s
   rJ   r   c                 C   s   t ||}| ||< |S )z7Creates new label array with infinite points set to -1.)r-   r   )
old_labelsr>   r_   
fill_value
new_labelsr`   r`   ra   rL     s   rL   c                 C   s   t |}| ||< |S )z<Creates new probability array with infinite points set to 0.)r-   r   )	old_probsr>   r_   	new_probsr`   r`   ra   rM     s   
rM   c                   @   s   e Zd ZdZ								dddZddd	Zdd
dZdddZdddZe	dd Z
e	dd Ze	dd Ze	dd ZdS )BranchDetectora4  Performs a flare-detection post-processing step to detect branches within
    clusters [1]_.

    For each cluster, a graph is constructed connecting the data points based on
    their mutual reachability distances. Each edge is given a centrality value
    based on how far it lies from the cluster's center. Then, the edges are
    clustered as if that centrality was a distance, progressively removing the
    'center' of each cluster and seeing how many branches remain.

    Parameters
    ----------
    branch_detection_method : str, optional (default=``full``)
        Determines which graph is constructed to detect branches with. Valid
        values are, ordered by increasing computation cost and decreasing
        sensitivity to noise:
        - ``core``: Contains the edges that connect each point to all other
          points within a mutual reachability distance lower than or equal to
          the point's core distance. This is the cluster's subgraph of the
          k-NN graph over the entire data set (with k = ``min_samples``).
        - ``full``: Contains all edges between points in each cluster with a
          mutual reachability distance lower than or equal to the distance of
          the most-distance point in each cluster. These graphs represent the
          0-dimensional simplicial complex of each cluster at the first point in
          the filtration where they contain all their points.

    label_sides_as_branches : bool, optional (default=False),
        When this flag is False, branches are only labelled for clusters with at
        least three branches (i.e., at least y-shapes). Clusters with only two
        branches represent l-shapes. The two branches describe the cluster's
        outsides growing towards each other. Enabling this flag separates these
        branches from each other in the produced labelling.

    min_cluster_size : int, optional (default=None)
        The minimum number of samples in a group for that group to be
        considered a branch; groupings smaller than this size will seen as
        points falling out of a branch. Defaults to the clusterer's min_cluster_size.

    allow_single_cluster : bool, optional (default=None)
        Analogous to ``allow_single_cluster``.

    cluster_selection_method : str, optional (default=None)
        The method used to select branches from the cluster's condensed tree.
        The standard approach for FLASC is to use the ``eom`` approach.
        Options are:
          * ``eom``
          * ``leaf``

    cluster_selection_epsilon: float, optional (default=0.0)
        A lower epsilon threshold. Only branches with a death above this value
        will be considered.

    cluster_selection_persistence: float, optional (default=0.0)
        An eccentricity persistence threshold. Branches with a persistence below
        this value will be merged.

    max_cluster_size : int, optional (default=None)
        A limit to the size of clusters returned by the ``eom`` algorithm. Has
        no effect when using ``leaf`` clustering (where clusters are usually
        small regardless). Note that this should not be used if we want to
        predict the cluster labels for new points in future because
        `approximate_predict` is not aware of this argument.

    Attributes
    ----------
    labels_ : np.ndarray, shape (n_samples, )
        Labels that differentiate all subgroups (clusters and branches). Noisy
        samples are given the label -1.

    probabilities_ : np.ndarray, shape (n_samples, )
        Probabilities considering both cluster and branch membership. Noisy
        samples are assigned 0.

    cluster_labels_ : np.ndarray, shape (n_samples, )
        The cluster labels for each point in the data set. Noisy samples are
        given the label -1.

    cluster_probabilities_ : np.ndarray, shape (n_samples, )
        The cluster probabilities for each point in the data set. Noisy samples
        are assigned 1.0.

    branch_labels_ : np.ndarray, shape (n_samples, )
        Branch labels for each point. Noisy samples are given the label -1.

    branch_probabilities_ : np.ndarray, shape (n_samples, )
        Branch membership strengths for each point. Noisy samples are
        assigned 0.

    branch_persistences_ : tuple (n_clusters)
        A branch persistence (eccentricity range) for each detected branch.

    approximation_graph_ : ApproximationGraph
        The graphs used to detect branches in each cluster stored as a numpy
        array with four columns: source, target, centrality, mutual reachability
        distance. Points are labelled by their row-index into the input data.
        The edges contained in the graphs depend on the ``branch_detection_method``:
        - ``core``: Contains the edges that connect each point to all other
          points in a cluster within a mutual reachability distance lower than
          or equal to the point's core distance. This is an extension of the
          minimum spanning tree introducing only edges with equal distances. The
          reachability distance introduces ``num_points`` * ``min_samples`` of
          such edges.
        - ``full``: Contains all edges between points in each cluster with a
          mutual reachability distance lower than or equal to the distance of
          the most-distance point in each cluster. These graphs represent the
          0-dimensional simplicial complex of each cluster at the first point in
          the filtration where they contain all their points.

    condensed_trees_ : tuple (n_clusters)
        A condensed branch hierarchy for each cluster produced during the
        branch detection step. Data points are numbered with in-cluster ids.

    linkage_trees_ : tuple (n_clusters)
        A single linkage tree for each cluster produced during the branch
        detection step, in the scipy hierarchical clustering format.
        (see http://docs.scipy.org/doc/scipy/reference/cluster.hierarchy.html).
        Data points are numbered with in-cluster ids.

    centralities_ : np.ndarray, shape (n_samples, )
        Centrality values for each point in a cluster. Overemphasizes points'
        eccentricity within the cluster as the values are based on minimum
        spanning trees that do not contain the equally distanced edges resulting
        from the mutual reachability distance.

    cluster_points_ : list (n_clusters)
        The data point row indices for each cluster.

    References
    ----------
    .. [1] Bot D.M., Peeters J., Liesenborgs J., Aerts J. 2025. FLASC: a
    flare-sensitive clustering algorithm. PeerJ Computer Science 11:e2792
    https://doi.org/10.7717/peerj-cs.2792.
    r   FNr   c	           	      C   sL   || _ || _|| _|| _|| _|| _|| _|| _d | _d | _	d | _
d | _d S rm   )rQ   r(   r$   r'   r   r%   r   r&   _approximation_graphs_condensed_trees_cluster_linkage_trees_branch_exemplars)	selfrQ   r(   r$   r'   r   r%   r   r&   r`   r`   ra   __init__p  s   
zBranchDetector.__init__c                 C   sV   || _ |  }t|||fi |\| _| _| _| _| _| _| _	| _
| _| _| _| _| S )aI  
        Perform a flare-detection post-processing step to detect branches within
        clusters.

        Parameters
        ----------
        clusterer : HDBSCAN
            A fitted HDBSCAN object with branch detection data generated.

        labels : np.ndarray, shape (n_samples, ), optional (default=None)
            The cluster labels for each point in the data set. If not provided, the
            clusterer's labels will be used.

        probabilities : np.ndarray, shape (n_samples, ), optional (default=None)
            The cluster probabilities for each point in the data set. If not provided,
            the clusterer's probabilities will be used, or all points will be given
            1.0 probability if labels are overridden.

        Returns
        -------
        self : object
            Returns self.
        )
_clusterer
get_paramsrb   r9   r:   cluster_labels_cluster_probabilities_branch_labels_branch_probabilities_branch_persistences_r   r   _linkage_treescentralities_cluster_points_)r   rN   r]   r^   r   r`   r`   ra   fit  s"   zBranchDetector.fitc                 C   s   |  ||| | jS )a  
        Perform a flare-detection post-processing step to detect branches within
        clusters [1]_.

        Parameters
        ----------
        clusterer : HDBSCAN
            A fitted HDBSCAN object with branch detection data generated.

        labels : np.ndarray, shape (n_samples, ), optional (default=None)
            The cluster labels for each point in the data set. If not provided, the
            clusterer's labels will be used.

        probabilities : np.ndarray, shape (n_samples, ), optional (default=None)
            The cluster probabilities for each point in the data set. If not provided,
            the clusterer's probabilities will be used, or all points will be given
            1.0 probability if labels are overridden.

        Returns
        -------
        labels : ndarray, shape (n_samples, )
            subgroup labels differentiated by cluster and branch.
        )r   r9   )r   rN   r]   r^   r`   r`   ra   fit_predict  s   zBranchDetector.fit_predictc                 C   sz   | j du r	td| jjdu r|du rtd|dkrtd|du r'| jj}| j |k}|| }| j| }tj||ddS )a  Provides an approximate representative point for a given branch.
        Note that this technique assumes a euclidean metric for speed of
        computation. For more general metrics use the ``weighted_medoid`` method
        which is slower, but can work with the metric the model trained with.

        Parameters
        ----------
        label_id: int
            The id of the cluster to compute a centroid for.

        data : np.ndarray (n_samples, n_features), optional (default=None)
            A dataset to use instead of the raw data that was clustered on.

        Returns
        -------
        centroid: array of shape (n_features,)
            A representative centroid for cluster ``label_id``.
        NModel has not been fit to dataRaw data not availabler   MCannot calculate weighted centroid for -1 cluster since it is a noise clusterr   r{   )r9   AttributeErrorr   	_raw_datar*   r:   r-   r   )r   label_idr   r   cluster_datacluster_membership_strengthsr`   r`   ra   weighted_centroid  s   


z BranchDetector.weighted_centroidc           	      C   s   | j du r	td| jjdu r|du rtd|dkrtd|du r'| jj}| j |k}|| }| j| }| jjj}||| }t	
|jdd}|| S )a  Provides an approximate representative point for a given branch.

        Note that this technique can be very slow and memory intensive for large
        clusters. For faster results use the ``weighted_centroid`` method which
        is faster, but assumes a euclidean metric.

        Parameters
        ----------
        label_id: int
            The id of the cluster to compute a medoid for.

        data : np.ndarray (n_samples, n_features), optional (default=None)
            A dataset to use instead of the raw data that was clustered on.

        Returns
        -------
        centroid: array of shape (n_features,)
            A representative medoid for cluster ``label_id``.
        Nr   r   r   r   r   r   )r9   r   r   r   r*   r:   r+   rF   r   r-   argminr   )	r   r   r   r   r   r  rF   dist_matmedoid_indexr`   r`   ra   weighted_medoid  s"   



zBranchDetector.weighted_medoidc                 C   sD   | j du r	tdt| j | j| j| j| j| j| j| j	dd| j
jdS )@See :class:`~hdbscan.branches.BranchDetector` for documentation.Nz<No approximation graph was generated; try running fit first.
centralitybranch)	lens_namesub_cluster_nameraw_data)r   r   r   r9   r:   r   r   r   r   r   r   r   r   r`   r`   ra   approximation_graph_  s"   
z#BranchDetector.approximation_graph_c                    s.    j du r	td fddt j  jD S )r  Nz9No condensed trees were generated; try running fit first.c                    s    g | ]\}}t | j| qS r`   )r   r   )ro   rC   rU   r  r`   ra   r   8  s    z3BranchDetector.condensed_trees_.<locals>.<listcomp>)r   r   rw   r   r  r`   r  ra   condensed_trees_0  s   

zBranchDetector.condensed_trees_c                 C   s"   | j du r	tddd | j D S )r  Nz7No linkage trees were generated; try running fit first.c                 S   s    g | ]}|d urt |nd qS rm   )r   )ro   rC   r`   r`   ra   r   G  s    z1BranchDetector.linkage_trees_.<locals>.<listcomp>)r   r   r  r`   r`   ra   linkage_trees_@  s   
zBranchDetector.linkage_trees_c                 C   s\  | j dur| j S | jjdu rtd| jdu rtdt| j}dd | jD }dd | jD }dg| | _ t| jD ]o\}}|| }t|| j	rKdndkrOq;g | j |< | j| }|D ]N}t
jg t
jd	}	t|| t
|D ]%}
|d
 |d |
k  }|d |d |
k|d
 |k@  }t
|	|g}	qo||	 }| j | | jj|ddf  q[q;| j S )r  Nz:Branch exemplars not available with precomputed distances.z,No branches detected; try running fit first.c                 S   s   g | ]
}||d  dk qS )rc   r   r`   ro   branch_treer`   r`   ra   r   Y  s    z-BranchDetector.exemplars_.<locals>.<listcomp>c                 S   s   g | ]}t | qS r`   )sorted_select_clustersr  r`   r`   ra   r   ]  s    
r   r   ry   rd   parentre   )r   r   r   r   r   r5   r  r   r   r(   r-   arrayr   r   r;   hstackappend)r   rR   branch_cluster_treesselected_branch_idsr   rU   selected_branchesraw_condensed_treer	  r   r   leaf_max_lambda
candidatesidsr`   r`   ra   
exemplars_L  sP   







"zBranchDetector.exemplars_)r   FNNNNr   r   )NNrm   )__name__
__module____qualname____doc__r   r   r   r  r  propertyr  r  r  r   r`   r`   r`   ra   r     s0     


+

$(


r   c                 C   s  t | j|dd\}}}t|}tj|tjd}tj|tjd}tj|tjd}tj|tjd}	| j	s4dnd}
t
t|||D ]A\}\}}}|dk rNd||< q>t| j| |
kr`|||< |||< q>| j| ||< | j| ||< | j| |	|< ||	|  d ||< q>||||||	fS )aD  Predict the cluster and branch label of new points.

    Extends ``approximate_predict`` to also predict in which branch
    new points lie (if the cluster they are part of has branches).

    Parameters
    ----------
    branch_detector : BranchDetector
        A clustering object that has been fit to vector input data.

    points_to_predict : array, or array-like (n_samples, n_features)
        The new data points to predict cluster labels for. They should
        have the same dimensionality as the original dataset over which
        clusterer was fit.

    Returns
    -------
    labels : array (n_samples,)
        The predicted cluster and branch labels.

    probabilities : array (n_samples,)
        The soft cluster scores for each.

    cluster_labels : array (n_samples,)
        The predicted cluster labels.

    cluster_probabilities : array (n_samples,)
        The soft cluster scores for each.

    branch_labels : array (n_samples,)
        The predicted cluster labels.

    branch_probabilities : array (n_samples,)
        The soft cluster scores for each.
    T)return_connecting_pointsry   r   r   r   r   )r   r   r5   r-   emptyr   r   r   r   r(   r   rw   r   r9   r   r   )branch_detectorpoints_to_predictrO   rP   connecting_pointsnum_predictr]   r^   rY   rZ   min_num_branchesr   r   probconnecting_pointr`   r`   ra   approximate_predict_branch}  s<   %


r/  c                   @   s    e Zd ZdZdd Zdd ZdS )r@   z6API of a Joblib Parallel pool but sequential executionc                 C   s
   d| _ d S )Nr   )r   r  r`   r`   ra   r     s   
zSequentialPool.__init__c                 C   s   dd |D S )Nc                 S   s    g | ]\}}}||i |qS r`   r`   )ro   funargsr   r`   r`   ra   r     s     z+SequentialPool.__call__.<locals>.<listcomp>r`   )r   jobsr`   r`   ra   __call__  s   zSequentialPool.__call__N)r!  r"  r#  r$  r   r3  r`   r`   r`   ra   r@     s    r@   )
NNr   FNNNNr   r   )Fr   )FF)F)r   )-numpyr-   sklearn.baser   r   scipy.sparser   scipy.sparse.csgraphr   r   joblibr   r   r	   joblib.parallelr
   _hdbscan_linkager   plotsr   r   r   
predictionr   _hdbscan_treer   hdbscan_r   rb   r7   rB   rn   r   r   r   rG   r   rH   rK   rJ   rL   rM   r   r/  r@   r`   r`   r`   ra   <module>   sf   
  h
!
!2*&(
.
5
   I