o
    {i8}                    @   s^
  d dl mZ d dlZd dlZd dlZd dlmZm	Z	m
Z
mZmZ d dlmZmZ d dlZd dlmZ dZeejjd Zeejjd Zedg d	Zejddd Zejdddddf Zejddd Z ejZ!e"ed
ed
fZ#ejddd Z$ej%dd e&dD ejdZ'ej(ej)*ejddd ejddd ee!fejdddddf ejddd ej+ddd ej,ej,ejddd ejejej,ej,ej,ej,d	dddddd Z-ej(ej)*ejddd ejddd e e!fejdddddf ejddd ej+ddd ej,ej,ejddd ejejej,ej,ej,ej,d	dddddd Z.ej(ej)*ejddd ejddd ee!fejdddddf ejddd ej+ddd ej,ej,ejddd ejejej,ej,ej,ej,d	dddddd Z/ej(dddej)jddd ej)jddd ej)jej)j,dddd Z0ej(dddddd Z1dZ2ej(ddd dd!d" Z3ej(ddd dd#d$ Z4ej(ddd ddd&d'Z5d(Z6ej(ddd dd)d* Z7ej(ddd dd+d, Z8ej(dd ej)jej)jd-d.	/	0dd1d2Z9ej(dd ej)jej)jd-d.	/	0dd3d4Z:ej(dd d5	/	 	0dd6d7Z;ej(ddd dd8d9 Z<ej(ddd dd:d; Z=ej(dd ej)jej)jd-d.	/	0dd<d=Z>ej(dd ej)jej)jd-d.	/	0dd>d?Z?ej(dd d5	/	 	0dd@dAZ@ej(ddd dddCdDZAej(ddd ddEdF ZBej(dd ej)jej)jd-d.	/	0ddGdHZCej(dd d5	/	0ddIdJZDej(dej)jej)jd-dK	/	0ddLdMZEej(dej)Fe#ej)jej)jdNdK	/	0ddOdPZGej(dej)Fe#ej)jej)jdNdK	/	0ddQdRZHej(dej)jej)jd-dK	/	0ddSdTZIej(dej)jej)jd-dK	/	0ddUdVZJej(ddWddXdYZKej(ddW	/	 	0ddZd[ZLej(ddWdd\d]ZMej(d^ej)Nej)jOej)jdd_dd`ej)jej)jOej)jdd_dd`ej)jOej)j+dd_d d`gdej)jej)jPej)jQdaddbdcdd ZRej(deej)Nej)jOej)jdd_dd`ej)jej)jOej)jdd_dd`ej)jOej)j+dd_d d`gdej)jej)jPej)jQdaddbdfdg ZSej(dhej)jOej)jdd_dd`ej)jOej)jdd_dd`ej)jOej)jdid_dd`ej)jOej)jdd_dd`ej)jOej)jdid_dd`ej)jOej)jdd_dd`ej)jOej)j+dd_d d`gej)j,ej)jNdjddkdldm ZTej(dnej)jOej)jdd_dd`ej)jOej)jdd_dd`ej)jOej)jdid_dd`ej)jOej)jdd_dd`ej)jOej)jdid_dd`ej)jOej)jdd_dd`ej)jOej)j+dd_d d`gej)j,ej)jNdjddkdodp ZUej(dddqdrds ZVej(dtej)j,iddkdudv ZW		 	 	0ddwdxZXej(ddWdydz ZYd{d| ZZd}d~ Z[dd Z\e( dd Z]ej(dddd Z^dd Z_d Z`dZadiZbdBZcdZddd Zedd Zfej(dej+ddd ejej,dd ddd Zgej(dejejdd ddd ZhdS )    )warnN)
sparse_mulsparse_diff
sparse_sumarr_intersectsparse_dot_product)tau_rand_intnorm)
namedtupleg:0yE>   FlatTreehyperplanesoffsetschildrenindices	leaf_sizec                 C   s   g | ]	}t |d qS )1)bincount.0i r   H/home/ubuntu/.local/lib/python3.10/site-packages/pynndescent/rp_trees.py
<listcomp>&   s    r      dtype)	n_leftn_righthyperplane_vectorhyperplane_offsetmargindr   
left_indexright_indexT)localsfastmathnogilcachec                 C   s  | j d }t||j d  }t||j d  }|||k7 }||j d  }|| }|| }t| | }t| | }	t|tk r@d}t|	tk rHd}	tj|tjd}
t|D ]}| ||f | | ||f |	  |
|< qTt|
}t|tk rud}t|D ]
}|
| | |
|< qyd}d}t|j d tj	}t|j d D ]L}d}t|D ]}||
| | || |f  7 }qt|tk rt|d ||< || dkr|d7 }q|d7 }q|dkrd||< |d7 }qd||< |d7 }q|dks|dkrd}d}t|j d D ]}t|d ||< || dkr|d7 }q|d7 }qtj|tj
d}tj|tj
d}d}d}t|j d D ] }|| dkrE|| ||< |d7 }q0|| ||< |d7 }q0|||
dfS )M  Given a set of ``graph_indices`` for graph_data points from ``graph_data``, create
    a random hyperplane to split the graph_data, returning two arrays graph_indices
    that fall on either side of the hyperplane. This is the basis for a
    random projection tree, which simply uses this splitting recursively.
    This particular split uses cosine distance to determine the hyperplane
    and which side each graph_data sample falls on.
    Parameters
    ----------
    data: array of shape (n_samples, n_features)
        The original graph_data to be split
    indices: array of shape (tree_node_size,)
        The graph_indices of the elements in the ``graph_data`` array that are to
        be split in the current operation.
    rng_state: array of int64, shape (3,)
        The internal state of the rng
    Returns
    -------
    indices_left: array
        The elements of ``graph_indices`` that fall on the "left" side of the
        random hyperplane.
    indices_right: array
        The elements of ``graph_indices`` that fall on the "left" side of the
        random hyperplane.
    r   r         ?r              )shaper   r	   absEPSnpemptyfloat32rangeint8int32)datar   	rng_statedimr&   r'   leftright	left_norm
right_normr"   r%   hyperplane_normr    r!   sider   r$   indices_leftindices_rightr   r   r   angular_random_projection_split)   sv   
,






rD   c                 C   s  | j d }t||j d  }t||j d  }|||k7 }||j d  }|| }|| }d}d}	tj|d tjd}
|
d| }|
|d }t|D ]"}| ||f | ||f A }|| ||f @ ||< || ||f @ ||< qJd}t|D ]}|t|
|  7 }|t| ||f  7 }|	t| ||f  7 }	qsd}d}t|j d tj}t|j d D ]^}d}t|D ]"}|t|| | || |f @  7 }|t|| | || |f @  8 }qt|t	k rt|d ||< || dkr|d7 }q|d7 }q|dkrd||< |d7 }qd||< |d7 }q|dks|dkr8d}d}t|j d D ]}t|d ||< || dkr2|d7 }q|d7 }qtj|tj
d}tj|tj
d}d}d}t|j d D ] }|| dkrh|| ||< |d7 }qS|| ||< |d7 }qS|||
dfS )r,   r   r   r.   r/   r   N)r0   r   r3   r4   uint8r6   popcntr7   r1   r2   r8   )r9   r   r:   r;   r&   r'   r<   r=   r>   r?   r"   positive_hyperplane_componentnegative_hyperplane_componentr%   
xor_vectorr@   r    r!   rA   r   r$   rB   rC   r   r   r   )angular_bitpacked_random_projection_split   st   
, "



rJ   c                 C   sd  | j d }t||j d  }t||j d  }|||k7 }||j d  }|| }|| }d}tj|tjd}	t|D ]$}
| ||
f | ||
f  |	|
< ||	|
 | ||
f | ||
f   d 8 }q:d}d}t|j d tj}t|j d D ]N}|}t|D ]}
||	|
 | || |
f  7 }q|t|tk rtt|d ||< || dkr|d7 }qt|d7 }qt|dkrd||< |d7 }qtd||< |d7 }qt|dks|dkrd}d}t|j d D ]}t|d ||< || dkr|d7 }q|d7 }qtj|tj	d}tj|tj	d}d}d}t|j d D ] }|| dkr || ||< |d7 }q|| ||< |d7 }q|||	|fS )aP  Given a set of ``graph_indices`` for graph_data points from ``graph_data``, create
    a random hyperplane to split the graph_data, returning two arrays graph_indices
    that fall on either side of the hyperplane. This is the basis for a
    random projection tree, which simply uses this splitting recursively.
    This particular split uses euclidean distance to determine the hyperplane
    and which side each graph_data sample falls on.
    Parameters
    ----------
    data: array of shape (n_samples, n_features)
        The original graph_data to be split
    indices: array of shape (tree_node_size,)
        The graph_indices of the elements in the ``graph_data`` array that are to
        be split in the current operation.
    rng_state: array of int64, shape (3,)
        The internal state of the rng
    Returns
    -------
    indices_left: array
        The elements of ``graph_indices`` that fall on the "left" side of the
        random hyperplane.
    indices_right: array
        The elements of ``graph_indices`` that fall on the "left" side of the
        random hyperplane.
    r   r   r.   r          @r/   )
r0   r   r3   r4   r5   r6   r7   r1   r2   r8   )r9   r   r:   r;   r&   r'   r<   r=   r#   r"   r%   r    r!   rA   r   r$   rB   rC   r   r   r   !euclidean_random_projection_split0  sd   
,"





rL   )normalized_left_datanormalized_right_datar@   r   )r)   r*   r+   r(   c           "      C   sF  t ||jd  }t ||jd  }|||k7 }||jd  }|| }|| }| || ||d   }	||| ||d   }
| || ||d   }||| ||d   }t|
}t|}t|tk rgd}t|tk rod}|
| tj}|| tj}t|	|||\}}t|}t|tk rd}t	|jd D ]
}|| | ||< qd}d}t
|jd tj}t	|jd D ]l}d}| |||  ||| d   }||||  ||| d   }t||||\}}|D ]}||7 }qt|tk rt |d ||< || dkr|d7 }q|d7 }q|dkrd||< |d7 }qd||< |d7 }q|dks2|dkrZd}d}t	|jd D ]}t |d ||< || dkrT|d7 }q=|d7 }q=tj
|tjd}tj
|tjd} d}d}t	|jd D ] }|| dkr|| ||< |d7 }qu|| | |< |d7 }qut||f}!|| |!dfS )  Given a set of ``graph_indices`` for graph_data points from a sparse graph_data set
    presented in csr sparse format as inds, graph_indptr and graph_data, create
    a random hyperplane to split the graph_data, returning two arrays graph_indices
    that fall on either side of the hyperplane. This is the basis for a
    random projection tree, which simply uses this splitting recursively.
    This particular split uses cosine distance to determine the hyperplane
    and which side each graph_data sample falls on.
    Parameters
    ----------
    inds: array
        CSR format index array of the matrix
    indptr: array
        CSR format index pointer array of the matrix
    data: array
        CSR format graph_data array of the matrix
    indices: array of shape (tree_node_size,)
        The graph_indices of the elements in the ``graph_data`` array that are to
        be split in the current operation.
    rng_state: array of int64, shape (3,)
        The internal state of the rng
    Returns
    -------
    indices_left: array
        The elements of ``graph_indices`` that fall on the "left" side of the
        random hyperplane.
    indices_right: array
        The elements of ``graph_indices`` that fall on the "left" side of the
        random hyperplane.
    r   r   r-   r.   r/   r   )r   r0   r	   r1   r2   astyper3   r5   r   r6   r4   r7   r   r8   vstack)"indsindptrr9   r   r:   r&   r'   r<   r=   	left_inds	left_data
right_inds
right_datar>   r?   rM   rN   hyperplane_indshyperplane_datar@   r%   r    r!   rA   r   r$   i_indsi_data_mul_datavalrB   rC   
hyperplaner   r   r   &sparse_angular_random_projection_split  s   *  





r`   )r)   r*   r+   c                 C   s  t t||jd  }t t||jd  }|||k7 }||jd  }|| }|| }| || ||d   }	||| ||d   }
| || ||d   }||| ||d   }d}t|	|
||\}}t|	|
||\}}|d }t||||t j\}}|D ]}||8 }qd}d}t 	|jd t j
}t|jd D ]l}|}| |||  ||| d   }||||  ||| d   }t||||\}}|D ]}||7 }qt|tk rtt|d ||< || dkr|d7 }q|d7 }q|dkrd||< |d7 }qd||< |d7 }q|dks|dkrAd}d}t|jd D ]}tt|d ||< || dkr;|d7 }q"|d7 }q"t j	|t jd}t j	|t jd}d}d}t|jd D ] }|| dkrq|| ||< |d7 }q\|| ||< |d7 }q\t ||f}||||fS )rO   r   r   r.   rK   r/   r   )r3   r1   r   r0   r   r   r   rP   r5   r4   r7   r6   r2   r8   rQ   )rR   rS   r9   r   r:   r&   r'   r<   r=   rT   rU   rV   rW   r#   rX   rY   offset_indsoffset_datar^   r    r!   rA   r   r$   rZ   r[   r\   r]   rB   rC   r_   r   r   r   (sparse_euclidean_random_projection_split1  sz    
  





rc   i  Fc                 C   s`   d}| j d d }||kr.|| d }| | |kr|S | | |k r&|d }n|d }||ksdS )z5Binary search returning index if found, -1 otherwise.r   r   r/   r   )r0   )
sorted_arrvaluelohimidr   r   r   binary_search  s   
ri   c                 C   sl   | j d }tj|tjd}t|D ]"}t| j d D ]}| ||f }|dkr2||k r2||  d7  < qq|S )a  Compute global in-degree for all points in the graph.

    In-degree of a point is how many times it appears as a neighbor of other points.
    This is computed once and reused throughout tree construction.

    Parameters
    ----------
    neighbor_indices : array of shape (n_samples, n_neighbors)
        The neighbor graph indices.

    Returns
    -------
    global_degrees : array of shape (n_samples,)
        The in-degree of each point.
    r   r   r   )r0   r3   zerosr8   r6   )neighbor_indicesn_pointsglobal_degreesr   jneighborr   r   r   compute_global_degrees  s   
rp      c                 C   s   | j d }t||}tj|tdtjd}tj|tjd}t|D ]U}|| |  }|||d  krw|d }	|	dkrR|||	d  krR|	d8 }	|	dkrR|||	d  ksBt|d |	dD ]}
||
d  ||
< ||
d  ||
< qZ|||	< | | ||	< q"|S )a.  Get the indices of the top k highest-degree points from a subset.

    Uses an efficient O(n) selection for small k by maintaining a min-heap of k elements.

    Parameters
    ----------
    indices : array of shape (n,)
        The point indices in the current split.
    global_degrees : array of shape (n_total,)
        Precomputed global degrees for all points.
    k : int
        Number of top hubs to return.

    Returns
    -------
    top_hubs : array of shape (min(k, n),)
        The actual point indices (not positions) of the top k hubs.
    r   r   r   r   r0   minr3   fullr8   r4   r6   )r   rm   krl   actual_ktop_degreestop_indicesr   deg
insert_posrn   r   r   r   get_top_k_hub_indices  s$   

r{   g?c              	   C   sJ  | j d }|j d }t||d}|j d }td}	td}
td}tj|tjd}td}tj|tjd}tj|tjd}t|D ]}t|d |D ]}|| }|| }td}tj|tjd}t|D ]$}| ||f | ||f  ||< ||| | ||f | ||f   d 8 }qitd}td}t|D ]I}|}t|D ]}||| | || |f  7 }q|t	krd||< |d7 }q|t	 k rd||< |d7 }q|d ||< || dkr|d7 }q|d7 }q|dks|dkrqNtt
||t| }||	kr%|}	|}
|}|}t|D ]	}|| ||< qt|D ]	}|| ||< qqNqE|
dks1|dkr_td}
td}t|D ]}tt|d ||< || dkrY|
d7 }
q?|d7 }q?tj|
tjd}tj|tjd}td}td}t|D ] }|| dkr|| ||< |d7 }q}|| ||< |d7 }q}|||||	fS )a  Hub-based graph-informed split using balance-based selection.

    Uses the top 3 highest-degree nodes to generate all 3 possible hyperplanes,
    then selects the one with the best balance (closest to 50/50 split).
    This is much faster than edge-cut counting while still producing good quality trees.

    Parameters
    ----------
    data : array of shape (n_samples, n_features)
        The data array.
    indices : array of shape (n,)
        Indices of points in this node.
    neighbor_indices : array of shape (n_samples, n_neighbors)
        The neighbor graph.
    global_degrees : array of shape (n_samples,)
        Precomputed global in-degrees.
    rng_state : array of int64, shape (3,)
        RNG state (only used for fallback).

    Returns
    -------
    indices_left, indices_right, hyperplane, offset, balance
        The balance is returned so the caller can decide whether to accept the split.
    r   r      r.   r   rK   r/   )r0   r{   r3   r5   uint32rj   r7   r4   r6   r2   rs   r1   r   r8   )r9   r   rk   rm   r:   r;   rl   top_hubsn_hubsbest_balancebest_n_leftbest_n_rightbest_hyperplanebest_offset	best_siderA   rg   hjr<   r=   r#   r"   r%   r    r!   r   r$   balancerB   rC   r   r   r   euclidean_hub_split!  s   







"







6



r   c              	   C   s  | j d }|j d }t||d}|j d }td}	td}
td}tj|tjd}tj|tjd}tj|tjd}t|D ]}t|d |D ]}|| }|| }t	| | }t	| | }t
|tk rhd}t
|tk rpd}tj|tjd}t|D ]}| ||f | | ||f |  ||< q|t	|}t
|tk rd}t|D ]
}|| | ||< qtd}td}t|D ]M}td}t|D ]}||| | || |f  7 }q|tkrd||< |d7 }q|t k rd||< |d7 }q|d ||< || dkr|d7 }q|d7 }q|dks|dkrqJtt||t| }||	krG|}	|}
|}t|D ]	}|| ||< q/t|D ]	}|| ||< q=qJq@|
dksS|dkrtd}
td}t|D ]}t
t|d ||< || dkr{|
d7 }
qa|d7 }qatj|
tjd}tj|tjd}td}td}t|D ] }|| dkr|| ||< |d7 }q|| ||< |d7 }q|||td|	fS )a  Angular hub-based split using balance-based selection.

    Uses the top 3 highest-degree nodes to generate all 3 possible hyperplanes,
    then selects the one with the best balance (closest to 50/50 split).

    Returns
    -------
    indices_left, indices_right, hyperplane, offset, balance
        The balance is returned so the caller can decide whether to accept the split.
    r   r   r|   r.   r   r-   r/   )r0   r{   r3   r5   r}   rj   r7   r4   r6   r	   r1   r2   rs   r   r8   )r9   r   rk   rm   r:   r;   rl   r~   r   r   r   r   r   r   rA   rg   r   r<   r=   r>   r?   r"   r%   r@   r    r!   r   r$   r   rB   rC   r   r   r   angular_hub_split  s   















@



r   )left_node_numright_node_num)r*   r+   r(         c                 C   v  |j d |	kr|
dkrt| ||||\}}}}}|tk rC|tjdgtjd |tj  |tdtdf || dS t	| |||||||||	|
d  t
|d }t	| |||||||||	|
d  t
|d }|| || |t|t|f |tjdgtjd dS |tjdgtjd |tj  |tdtdf || dS )zRecursive tree builder using hub-based splits.

    Stops splitting if:
    - Node size <= leaf_size
    - max_depth reached
    - Best split balance < MIN_SPLIT_BALANCE (creates larger leaf instead of bad split)
    r         r   r   Nr   )r0   r   MIN_SPLIT_BALANCEappendr3   arrayr5   infr8   make_hub_euclidean_treelenr9   r   rk   rm   r   r   r   point_indicesr:   r   	max_depthleft_indicesright_indicesr_   offsetr   r   r   r   r   r   r     l   




r   c                 C   r   )zRecursive tree builder using angular hub-based splits.

    Stops splitting if:
    - Node size <= leaf_size
    - max_depth reached
    - Best split balance < MIN_SPLIT_BALANCE (creates larger leaf instead of bad split)
    r   r   r   r   Nr   )r0   r   r   r   r3   r   r5   r   r8   make_hub_angular_treer   r   r   r   r   r   w  r   r   )r*   r+   c                 C   s   t |}t| jd tj}tjj	t
}tjj	t}	tjj	t}
tjj	t}|r>t| |||||	|
||||d nt| |||||	|
||||d |}|D ]}t||kr`tt|}qQt||	|
||}|S )a  Build an RP tree using simplified hub-based hyperplane selection.

    This version precomputes global degrees once and uses the top 3 highest-degree
    nodes at each split to generate all 3 possible hyperplanes. This is simpler
    and significantly faster than the random sampling approach while maintaining
    or improving tree quality.

    Parameters
    ----------
    data : array of shape (n_samples, n_features)
        The data to build the tree on.
    neighbor_indices : array of shape (n_samples, n_neighbors)
        The neighbor graph indices.
    rng_state : array of int64, shape (3,)
        The internal state of the rng.
    leaf_size : int
        The maximum size of a leaf node.
    angular : bool
        Whether to use angular (cosine) or euclidean distance.
    max_depth : int
        Maximum tree depth.

    Returns
    -------
    tree : FlatTree
        The constructed tree.
    r   r   )rp   r3   aranger0   rP   r8   numbatypedList
empty_listdense_hyperplane_typeoffset_typechildren_typepoint_indices_typer   r   r   r   )r9   rk   r:   r   angularr   rm   r   r   r   r   r   max_leaf_sizepointsresultr   r   r   make_hub_tree  sP   %r   c           2   	   C   s  |j d }t||d}|j d }	tj|j d dtjd}
t|D ]}||
|| < q ttdg}ttdg}td}tj	|tj
d}td}td}td}tj	|tj
d}t|	D ]b}t|d |	D ]W}|| }|| }| || ||d   }||| ||d   }| || ||d   }||| ||d   }td}t||||\}}t||||\}} | d	 } t|||| tj\}} | D ]}!||!8 }qtd}"td}#t|D ]j}|}$| |||  ||| d   }%||||  ||| d   }&t|||%|&\}'}(|(D ]}!|$|!7 }$q|$tkr,d||< |"d7 }"q|$t k r;d||< |#d7 }#q|d
 ||< || dkrM|"d7 }"q|#d7 }#q|"dks\|#dkr]qmtd})t|D ]7}|| }*|| }+t|j d D ]$},||*|,f }-|-dk r n|
|- }.|.dkr||. |+kr|)d7 })qwqf|)d
 })|)|k r|)}|"}|#}| }| }|}t|D ]	}|| ||< qqmqc|dks|dkrtd}td}t|D ]}tt|d
 ||< || dkr|d7 }q|d7 }qtj	|tjd}/tj	|tjd}0td}"td}#t|D ] }|| dkr2|| |/|"< |"d7 }"q|| |0|#< |#d7 }#qt||f}1|/|0|1|fS )zSimplified hub-based split for sparse euclidean data.

    Uses the top 3 highest-degree nodes to generate 3 possible hyperplanes,
    then selects the one that minimizes edge cuts.
    r   r|   r   r   r   r.       r   rK   r/   )r0   r{   r3   rt   r8   r6   r   r5   float64r4   r7   r}   r   r   r   rP   r2   copyr1   r   rQ   )2rR   rS   spdatar   rk   rm   r:   rl   r~   r   
idx_to_posr   best_hyperplane_indsbest_hyperplane_datar   r   best_edge_cutsr   r   rA   rg   r   r<   r=   rT   rU   rV   rW   r#   rX   rY   ra   rb   r^   r    r!   r$   rZ   r[   r\   r]   	edge_cuts	point_idx
point_sidej_nbro   neighbor_posrB   rC   r_   r   r   r   sparse_euclidean_hub_split(  s   










  








U



r   c           4      C   s  |j d }t||d}|j d }	tj|j d dtjd}
t|D ]}||
|| < q ttdg}ttdg}tj|tj	d}t
d}t
d}t
d}tj|tj	d}t|	D ]}t|d |	D ]z}|| }|| }| || ||d   }||| ||d   }| || ||d   }||| ||d   }t|}t|}t|tk rd}t|tk rd}|| tj}|| tj}t||||\}} t| }!t|!tk rd}!t| j d D ]
}"| |" |! | |"< qt
d}#t
d}$t|D ]q}td	}%| |||  ||| d   }&||||  ||| d   }'t|| |&|'\}(})|)D ]}*|%|*7 }%q2|%tkrId||< |#d7 }#q |%t k rYd||< |$d7 }$q |d
 ||< || dkrl|#d7 }#q |$d7 }$q |#dks||$dkr}qht
d}+t|D ]7}|| },|| }-t|j d D ]$}.||,|.f }/|/dk r n|
|/ }0|0dkr||0 |-kr|+d7 }+qq|+d
 }+|+|k r|+}|#}|$}| }|  }t|D ]	}|| ||< qqhq^|dks|dkrt
d}t
d}t|D ]}tt|d
 ||< || dkr|d7 }q|d7 }qtj|tjd}1tj|tjd}2t
d}#t
d}$t|D ] }|| dkrP|| |1|#< |#d7 }#q;|| |2|$< |$d7 }$q;t||f}3|1|2|3td	fS )zSimplified hub-based split for sparse angular data.

    Uses the top 3 highest-degree nodes to generate 3 possible hyperplanes,
    then selects the one that minimizes edge cuts.
    r   r|   r   r   r   r   r   r-   r.   r/   )r0   r{   r3   rt   r8   r6   r   r5   r4   r7   r}   r	   r1   r2   rP   r   r   r   r   r   rQ   )4rR   rS   r   r   rk   rm   r:   rl   r~   r   r   r   r   r   r   r   r   r   rA   rg   r   r<   r=   rT   rU   rV   rW   r>   r?   rM   rN   rX   rY   r@   r%   r    r!   r$   rZ   r[   r\   r]   r^   r   r   r   r   ro   r   rB   rC   r_   r   r   r   sparse_angular_hub_split  s   







  




X



r   c                 C   0  |j d |krn|dkrnt| ||||||
\}}}}t| |||||||||	|
||d  t|	d }t| |||||||||	|
||d  t|	d }|| || |t|t|f |	tjdgtjd dS |tjdgdggtjd |tj	  |tdtdf |	| dS )zDRecursive tree builder using simplified sparse euclidean hub splits.r   r   r   r   r   N)
r0   r   make_sparse_hub_euclidean_treer   r   r3   r8   r   r   r   rR   rS   r   r   rk   rm   r   r   r   r   r:   r   r   r   r   r_   r   r   r   r   r   r   r   R  f   


r   c                 C   r   )zBRecursive tree builder using simplified sparse angular hub splits.r   r   r   r   r   N)
r0   r   make_sparse_hub_angular_treer   r   r3   r8   r   r   r   r   r   r   r   r     r   r   c                 C   s   t |}t|jd d tj}	tjj	t
}
tjj	t}tjj	t}tjj	t}|rBt| |||	|||
||||||d nt| |||	|||
||||||d |}|D ]}t||krftt|}qWt|
||||}|S )a  Build a sparse RP tree using simplified hub-based hyperplane selection.

    This version precomputes global degrees once and uses the top 3 highest-degree
    nodes at each split to generate all 3 possible hyperplanes.

    Parameters
    ----------
    inds : array
        CSR format index array of the matrix.
    indptr : array
        CSR format index pointer array of the matrix.
    spdata : array
        CSR format data array of the matrix.
    neighbor_indices : array of shape (n_samples, n_neighbors)
        The neighbor graph indices.
    rng_state : array of int64, shape (3,)
        The internal state of the rng.
    leaf_size : int
        The maximum size of a leaf node.
    angular : bool
        Whether to use angular (cosine) or euclidean distance.
    max_depth : int
        Maximum tree depth.

    Returns
    -------
    tree : FlatTree
        The constructed tree.
    r   r   r   )rp   r3   r   r0   rP   r8   r   r   r   r   sparse_hyperplane_typer   r   r   r   r   r   r   )rR   rS   r   rk   r:   r   r   r   rm   r   r   r   r   r   r   r   r   r   r   r   make_sparse_hub_tree  sX   )r   r|   c                 C   s   | j d }t||}tj|tdtjd}tj|tjd}t|D ]U}|| |  }	|	||d  krw|d }
|
dkrR|	||
d  krR|
d8 }
|
dkrR|	||
d  ksBt|d |
dD ]}||d  ||< ||d  ||< qZ|	||
< | | ||
< q"|S )zGet the indices of the top k highest-degree points for bit data.

    Also returns the pair with maximum Hamming distance among the top k.
    r   r   r   r   rr   )r   r9   rm   ru   rl   rv   rw   rx   r   ry   rz   rn   r   r   r   get_top_k_hub_indices_bitR  s$   


r   c           %   
   C   s0  | j d }|j d }t|| |d}|j d }tj|j d dtjd}	t|D ]}
|
|	||
 < q&tj|d tjd}tj|tj	d}t
d}t
d}t
d}tj|tj	d}t|D ]:}t|d |D ]/}|| }|| }tj|d tjd}|d| }||d }t|D ]"}| ||f | ||f A }|| ||f @ ||< || ||f @ ||< qt
d}t
d}t|D ]`}
td	}t|D ]"}|t|| | ||
 |f @  7 }|t|| | ||
 |f @  8 }q|tkrd||
< |d7 }q|t k rd||
< |d7 }q|
d ||
< ||
 dkr|d7 }q|d7 }q|dks'|dkr(qft
d}t|D ]7}
||
 }||
 }t|j d D ]$} ||| f }!|!dk rQ n|	|! }"|"dkre||" |kre|d7 }qBq1|d }||k r|}|}|}t|d D ]	}|| ||< q~t|D ]	}
||
 ||
< qqfq\|dks|dkrt
d}t
d}t|D ]}
tt|d ||
< ||
 dkr|d7 }q|d7 }qtj|tjd}#tj|tjd}$t
d}t
d}t|D ] }
||
 dkr||
 |#|< |d7 }q||
 |$|< |d7 }q|#|$|td	fS )
zSimplified hub-based split for bit-packed data.

    Uses the top 3 highest-degree nodes to generate 3 possible hyperplanes,
    then selects the one that minimizes edge cuts.
    r   r   r|   r   r   r/   r   Nr.   )r0   r   r3   rt   r8   r6   rj   rE   r4   r7   r}   r5   rF   r2   r1   r   )%r9   r   rk   rm   r:   r;   rl   r~   r   r   r   r   r   r   r   r   rA   rg   r   r<   r=   r"   rG   rH   r%   rI   r    r!   r$   r   r   r   r   ro   r   rB   rC   r   r   r   bit_hub_splitu  s   
















F



r   c                 C   s$  |j d |	krh|
dkrht| ||||\}}}}t| |||||||||	|
d  t|d }t| |||||||||	|
d  t|d }|| || |t|t|f |tjdgtjd dS |tjtdgtjd |tj	  |tdtdf || dS )z7Recursive tree builder using simplified bit hub splits.r   r   r   r   N)
r0   r   make_bit_hub_tree_recursiver   r   r3   r8   r   rE   r   )r9   r   rk   rm   r   r   r   r   r:   r   r   r   r   r_   r   r   r   r   r   r   r     sZ   


r   c                 C   s   t |}t| jd tj}tjj	t
}tjj	t}tjj	t}	tjj	t}
t| ||||||	|
|||d |}|
D ]}t||krNtt|}q?t|||	|
|}|S )a  Build a bit-packed RP tree using simplified hub-based hyperplane selection.

    This version precomputes global degrees once and uses the top 3 highest-degree
    nodes at each split to generate all 3 possible hyperplanes.

    Parameters
    ----------
    data : array of shape (n_samples, n_features)
        The bit-packed data to build the tree on.
    neighbor_indices : array of shape (n_samples, n_neighbors)
        The neighbor graph indices.
    rng_state : array of int64, shape (3,)
        The internal state of the rng.
    leaf_size : int
        The maximum size of a leaf node.
    max_depth : int
        Maximum tree depth.

    Returns
    -------
    tree : FlatTree
        The constructed tree.
    r   r   )rp   r3   r   r0   rP   r8   r   r   r   r   bit_hyperplane_typer   r   r   r   r   r   )r9   rk   r:   r   r   rm   r   r   r   r   r   r   r   r   r   r   r   make_bit_hub_tree>  s4    r   )r*   r(   c	                 C     |j d |krb|dkrbt| ||\}	}
}}t| |	|||||||d 	 t|d }t| |
|||||||d 	 t|d }|| || |t|t|f |tjdgtjd d S |tjdgtjd |tj	  |tdtdf || d S Nr   r   r   r   r   )
r0   rL   make_euclidean_treer   r   r3   r8   r   r5   r   r9   r   r   r   r   r   r:   r   r   r   r   r_   r   r   r   r   r   r   r   }  sR   



r   )r   r   r   c	                 C   r   r   )
r0   rD   make_angular_treer   r   r3   r8   r   r5   r   r   r   r   r   r     R   



r   c	                 C   r   )Nr   r   r   r      )
r0   rJ   make_bit_treer   r   r3   r8   r   rE   r   r   r   r   r   r   	  r   r   c                 C   $  |j d |	krh|
dkrht| ||||\}}}}t| |||||||||	|
d  t|d }t| |||||||||	|
d  t|d }|| || |t|t|f |tjdgtjd d S |tjdgdggtjd |tj	  |tdtdf || d S r   )
r0   rc   make_sparse_euclidean_treer   r   r3   r8   r   r   r   rR   rS   r9   r   r   r   r   r   r:   r   r   r   r   r_   r   r   r   r   r   r   r   E	  s^   



r   c                 C   r   r   )
r0   r`   make_sparse_angular_treer   r   r3   r8   r   r   r   r   r   r   r   r   	  sZ   


r   )r*   c                 C   s   t | jd t j}tjjt	}tjjt
}tjjt}tjjt}	|r8t| |||||	|||d	 nt| |||||	|||d	 |}
|	D ]}t||
krXtt|}
qIt||||	|
}|S )Nr   r   )r3   r   r0   rP   r8   r   r   r   r   r   r   r   r   r   r   r   r   r9   r:   r   r   r   r   r   r   r   r   r   r   r   r   r   r   make_dense_tree	  sF   r   c                 C   s   t |jd d t j}tjjt	}tjjt
}	tjjt}
tjjt}|r<t| |||||	|
||||d nt| |||||	|
||||d |}|D ]}t||kr^tt|}qOt||	|
||S )Nr   r   r   )r3   r   r0   rP   r8   r   r   r   r   r   r   r   r   r   r   r   r   )rR   rS   r   r:   r   r   r   r   r   r   r   r   r   r   r   r   r   make_sparse_tree	  sL   
r   c                 C   s   t | jd t j}tjjt	}tjjt
}tjjt}tjjt}	|r8t| |||||	|||d	 ntd|}
|	D ]}t||
krOtt|}
q@t||||	|
}|S )Nr   r   z,Euclidean bit trees are not implemented yet.)r3   r   r0   rP   r8   r   r   r   r   r   r   r   r   r   NotImplementedErrorr   r   r   r   r   r   make_dense_bit_tree3
  s2   r   zb1(f4[::1],f4,f4[::1],i8[::1])C)readonly)r$   r;   r%   )r)   r(   r+   c                 C   sn   |}|j d }t|D ]}|| | ||  7 }qt|tk r/tt|d }|dkr-dS dS |dkr5dS dS Nr   r/   r   )r0   r6   r1   r2   r3   r   r_   r   pointr:   r$   r;   r%   rA   r   r   r   select_sideT
  s   
r   zb1(u1[::1],f4,u1[::1],i8[::1])c                 C   s   |}|j d }t|D ]}|t| | || @  7 }|t| ||  || @  8 }qt|tk r?tt|d }|dkr=dS dS |dkrEdS dS r   )r0   r6   rF   r1   r2   r3   r   r   r   r   r   select_side_bitx
  s   
r   z<i4[::1](f4[::1],f4[:,::1],f4[::1],i4[:,::1],i4[::1],i8[::1])r/   )noderA   )r(   r+   c                 C   |   d}||df dkr.t || || | |}|dkr ||df }n||df }||df dks
|||df  ||df   S Nr   r   )r   r   r   r   r   r   r:   r   rA   r   r   r   search_flat_tree
      r   z<i4[::1](u1[::1],u1[:,::1],f4[::1],i4[:,::1],i4[::1],i8[::1])c                 C   r   r   )r   r   r   r   r   search_flat_bit_tree
  r   r   )r)   r+   c           
      C   s   |}| j d }| d|d f dk r|d8 }| d|d f dk s| dd |f tj}| dd |f }|t||||7 }t|tk rPt|d }	|	dkrNdS dS |dkrVdS dS )Nr   r   r.   r/   )r0   rP   r3   r8   r   r1   r2   r   )
r_   r   
point_inds
point_datar:   r$   hyperplane_sizerX   rY   rA   r   r   r   sparse_select_side
  s$   
r   r   c           	      C   s~   d}||df dkr/t || || | ||}|dkr!||df }n||df }||df dks
|||df  ||df   S r   )r   )	r   r   r   r   r   r   r:   r   rA   r   r   r   search_sparse_flat_tree
  s    	r   c
              
      s,  g }
du rt dtddt| |du rd}|jtt|dfdtjz[t	j
rEtj|dd	 fd
dt|D }
n4|r^tj|dd	 fddt|D }
n tj|dd	 fddt|D }
W t|
S W t|
S W t|
S  tttfy   td Y t|
S w )zBuild a random projection forest with ``n_trees``.

    Parameters
    ----------
    data
    n_neighbors
    n_trees
    leaf_size
    rng_state
    angular

    Returns
    -------
    forest: list
        A list of random projection trees.
    N<   r   rq   r   r|   )size	sharedmemn_jobsrequirec              
   3   s6    | ]}t tjjj|  d V  qdS r   N)joblibdelayedr   r   rS   r9   r   r   r9   r   r   
rng_statesr   r   	<genexpr>'  s    

zmake_forest.<locals>.<genexpr>c                 3   ,    | ]}t t|  d V  qdS r   )r   r   r   r   r   r   r   r   4      
c                 3   r   r   )r   r   r   r   r   r   r   r   ;  r   zRandom Projection forest initialisation failed due to recursionlimit being reached. Something is a little strange with your graph_data, and this may take longer than normal to compute.)maxrs   r3   r8   randint	INT32_MIN	INT32_MAXrP   int64scipysparseisspmatrix_csrr   Parallelr6   RuntimeErrorRecursionErrorSystemErrorr   tuple)r9   n_neighborsn_treesr   r:   random_stater   r   bit_treer   r   r   r   r   make_forest
  s>   



!r  c                 C   s   d}t t| jD ]}| j| d dkr!| j| d dkr!|d7 }q	tj||fdtjd}d}t t| jD ]+}| j| d dksJ| j| d dkra| j| jd }| j| ||d |f< |d7 }q6|S )Nr   r   r   r   )r6   r   r   r3   rt   r8   r   r0   )treer   n_leavesr   r   
leaf_indexr   r   r   r   get_leaves_from_treeK  s   $$r  c                    s8   t dd | D  tjddd fdd| D }|S )Nc                 S   s   g | ]}|j qS r   )r   r   rp_treer   r   r   r   ^  s    z.rptree_leaf_array_parallel.<locals>.<listcomp>r   r   r   c                 3   s     | ]}t t| V  qd S N)r   r   r  r  r   r   r   r   _  s
    
z-rptree_leaf_array_parallel.<locals>.<genexpr>)r3   r   r   r  )	rp_forestr   r   r  r   rptree_leaf_array_parallel]  s
   r  c                 C   s(   t | dkrtt| S tdggS )Nr   r   )r   r3   rQ   r  r   )r  r   r   r   rptree_leaf_arrayf  s   r  c           
   
   C   s   | j | d dk r-|t| j|  }| ||df< | ||df< | j| |||< ||fS | j| ||< | j| ||< |d ||df< |}	t| |||||d || j | d \}}|d ||	df< t| |||||d || j | d \}}||fS r   )r   r   r   r   r   recursive_convert
r  r   r   r   r   node_num
leaf_start	tree_nodeleaf_endold_node_numr   r   r   r  n  s@   

r  c           
   
   C   s  | j | d dk r-|t| j|  }| ||df< | ||df< | j| |||< ||fS | j| ||d d d | j| jd f< | j| ||< |d ||df< |}	t| |||||d || j | d \}}|d ||	df< t| |||||d || j | d \}}||fS r   )r   r   r   r   r0   r   recursive_convert_sparser  r   r   r   r    sB    

r  )r+   c                 C   sP   d}d}t t| jD ]}| j| d dk r|d7 }|d7 }q|d7 }q||fS r   )r6   r   r   )r  n_nodesr  r   r   r   r   num_nodes_and_leaves  s   

r   c              
   C   s0  t | \}}d}| jd jdkr.| jd jtjkr|d }n|}tj||f| jd jd}nd}|}tj|d|ftjd}d|d d dd d f< tj|tjd}tdtj	|dftjd }	tdtj	|tjd }
|rt
| |||	|
ddt| jd  nt| |||	|
ddt| jd  t|||	|
| jS )NFr   r   r/   r   Tr   )r   r   ndimr   r3   rE   rj   r5   r8   onesr  r   r   r  r   r   )r  	data_sizedata_dimr  r  	is_sparsehyperplane_dimr   r   r   r   r   r   r   convert_tree_format  s0   
r'     c                 C   s   | j | j| j| j| jf}|S r  r   r  r   r   r   r   denumbaify_tree  s   r*  c                 C   s(   t | t | t | t | t | t }|S r  )r   FLAT_TREE_HYPERPLANESFLAT_TREE_OFFSETSFLAT_TREE_CHILDRENFLAT_TREE_INDICESFLAT_TREE_LEAF_SIZEr)  r   r   r   renumbaify_tree   s   r0  )intersectionr   r   )parallelr(   r+   c                 C   sr   d}t |jd D ]$}t|| | j| j| j| j|}t|| |}|t 	|jd dk7 }q
|t 	|jd  S )Nr.   r   r   )
r   pranger0   r   r   r   r   r   r   r5   )r  rk   r9   r:   r   r   leaf_indicesr1  r   r   r   
score_tree  s   
r5  )r   r   )r*   r(   r+   c                 C   s   |j d }|j d }d}t| j}t|D ]b}t|}| j| d }| j| d }	|dkrw|	dkrw| j| }
|
j d }t|D ]6}|
| }|| }d}t|D ]}|| }t|D ]}|
| |krh|d7 } nqZqP|t|t| 7 }q@q|t| S )a~  Score a tree by measuring how well leaves contain nearest neighbors.

    For each point, computes the fraction of its k nearest neighbors that
    are in the same leaf. Returns the average of this fraction across all points.

    A score of 1.0 means all neighbors are always in the same leaf (perfect).
    A score of 0.0 means no neighbors are ever in the same leaf (worst).
    r   r   r.   r   )r0   r   r   r6   r   r8   r   r5   )r  rk   rl   ru   total_scorer  r   r   
left_childright_childr4  r   rn   idx	neighborsr   niro   lir   r   r   score_linked_tree%  s4   





r=  )rq   )r   r   )r   Fr   )r|   )NFFr   )iwarningsr   numpyr3   r   scipy.sparser   pynndescent.sparser   r   r   r   r   pynndescent.utilsr   r	   r   collectionsr
   r2   iinfor8   rs   r   r   r   r   r5   r   r   r   rE   r   r   typeofr   r   r   r6   rF   njittypesTupler   r}   rD   rJ   rL   r`   rc   FAST_SPLIT_THRESHOLDri   rp   r{   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   ListTyper   r   r   r   r   r   r   booleanArrayintpuint16r   r   r   r   r   r   r  r  r  r  r  r  r   r'  r+  r,  r-  r.  r/  r*  r0  r5  r=  r   r   r   r   <module>   s  "2
r"2
o"2
d

|

3
|
wTTW
 
 KK_
|C><
<
<D
B
)
5 	
	






L
	&

(
"
	