o
    Xεiʆ                  	   @   sF  d dl Z d dlZd dlmZ d dlmZ d dlm	Z	 e 
 dd Ze j
ddde jje jje jje jjdd	d
d Zdd Zdd Ze j
edddZe j
edddZd)defddZ									d*ddZdd ZdddejddddfddZdd Zdddejddddfd d!Zd"d# Z	$	%		&						d+d'd(ZdS ),    N)tqdm)tau_rand_intc                 C   s   | dkrdS | dk rdS | S )zStandard clamping of a value into a fixed range (in this case -4.0 to
    4.0)

    Parameters
    ----------
    val: float
        The value to be clamped.

    Returns
    -------
    The clamped value, now fixed to be in the range -4.0 to 4.0.
    g      @g       )valr   r   @/home/ubuntu/.local/lib/python3.10/site-packages/umap/layouts.pyclip	   s
   r   zf4(f4[::1],f4[::1])T)resultdiffdimi)fastmathcachelocalsc                 C   s<   d}| j d }t|D ]}| | ||  }||| 7 }q|S )zReduced Euclidean distance.

    Parameters
    ----------
    x: array of shape (embedding_dim,)
    y: array of shape (embedding_dim,)

    Returns
    -------
    The squared euclidean distance between x and y
            r   )shaperange)xyr   r
   r   r	   r   r   r   rdist   s   
r   c           0      C   s$  t |jd D ]}|| |kr|| }|| }| | }|| }t||} |rdd|t| |   }!|| t| |d  d|t| |   }"|!||  }#|!||  }$|#d|d|!   t||  |"  }%|$d|d|!   t||  |"  }&|| }'|| ||| |  |'  }(|| ||| |  |'  })|| |(|% |)|&   || |  | }*| dkrd| | t| |d  }+|+|t| | d  }+nd}+t|
D ]8},t|+||, ||,   }-|r|-td|* ||, ||,   7 }-||,  |-| 7  < |r	||,  |- | 7  < q||  || 7  < t	|||  ||  }.t|.D ]^}/t
|| | }|| }t||} | dkrRd|	 | }+|+d|  |t| | d   }+n	||krYq$d}+t|
D ]!},|+dkrst|+||, ||,   }-nd}-||,  |-| 7  < q_q$||  |.||  7  < qd S )	Nr         ?   r                    @MbP?)numbapranger   r   pownpexpr   r   intr   )0head_embeddingtail_embeddingheadtail
n_verticesepochs_per_sampleabrng_state_per_samplegammar
   
move_otheralphaepochs_per_negative_sampleepoch_of_next_negative_sampleepoch_of_next_samplendensmap_flagdens_phi_sumdens_re_sumdens_re_covdens_re_stddens_re_meandens_lambdadens_Rdens_mudens_mu_totr   jkcurrentotherdist_squaredphi	dphi_termq_jkq_kjdrkdrj	re_std_sqweight_kweight_jgrad_cor_coeff
grad_coeffdgrad_dn_neg_samplespr   r   r   '_optimize_layout_euclidean_single_epoch?   s   
&  
 




rO   c                 C   s   | d | d t|jD ]F}|| }	|| }
| |	 }||
 }t||}dd|t||   }||	  || 7  < ||
  || 7  < ||	  |7  < ||
  |7  < qd}t|jD ]}t||| ||   ||< q^d S )Nr   r   g:0yE>)	fillr   r   sizer   r   r   r   log)r!   r"   r#   r$   r'   r(   re_sumphi_sumr   r;   r<   r=   r>   r?   r@   epsilonr   r   r   -_optimize_layout_euclidean_densmap_epoch_init   s"   



 rV   Fr   parallelrX   c                 C   s   | rt S tS )N)3_nb_optimize_layout_euclidean_single_epoch_parallel*_nb_optimize_layout_euclidean_single_epoch)rX   r   r   r   ._get_optimize_layout_euclidean_single_epoch_fn   s   r[   r         @c           )      C   s  | j d }|}|| }| }| }t|}|du ri }|du r#i }|rWtjtd|d}t|d d }|d }|d }|d	 }tj|tj	d
}tj|tj	d
}|d } n$d}d}tjdtj	d
}tjdtj	d
}tjdtj	d
}tjdtj	d
}d}!g }"t
|tr|}!t|!}d|vr| |d< tj| j d t|	f|	tjd
| dddf tjtjdd }#tt|fi |D ]}$|o|d dko|$d t| d|d  k}%|%r|| ||||||| tt||  }&t|}'t|||d  }(nd}&d}'d}(|| ||||||||#|
|||||||$|%|||(|&|'|||| |dt|$t|   }|rG|$t|d  dkrGtd|$d|d |!durX|$|!v rX|"|   q|!dure|"|   |!du rl| S |"S )a^  Improve an embedding using stochastic gradient descent to minimize the
    fuzzy set cross entropy between the 1-skeletons of the high dimensional
    and low dimensional fuzzy simplicial sets. In practice this is done by
    sampling edges based on their membership strength (with the (1-p) terms
    coming from negative sampling similar to word2vec).
    Parameters
    ----------
    head_embedding: array of shape (n_samples, n_components)
        The initial embedding to be improved by SGD.
    tail_embedding: array of shape (source_samples, n_components)
        The reference embedding of embedded points. If not embedding new
        previously unseen points with respect to an existing embedding this
        is simply the head_embedding (again); otherwise it provides the
        existing embedding to embed with respect to.
    head: array of shape (n_1_simplices)
        The indices of the heads of 1-simplices with non-zero membership.
    tail: array of shape (n_1_simplices)
        The indices of the tails of 1-simplices with non-zero membership.
    n_epochs: int, or list of int
        The number of training epochs to use in optimization, or a list of
        epochs at which to save the embedding. In case of a list, the optimization
        will use the maximum number of epochs in the list, and will return a list
        of embedding in the order of increasing epoch, regardless of the order in
        the epoch list.
    n_vertices: int
        The number of vertices (0-simplices) in the dataset.
    epochs_per_sample: array of shape (n_1_simplices)
        A float value of the number of epochs per 1-simplex. 1-simplices with
        weaker membership strength will have more epochs between being sampled.
    a: float
        Parameter of differentiable approximation of right adjoint functor
    b: float
        Parameter of differentiable approximation of right adjoint functor
    rng_state: array of int64, shape (3,)
        The internal state of the rng
    gamma: float (optional, default 1.0)
        Weight to apply to negative samples.
    initial_alpha: float (optional, default 1.0)
        Initial learning rate for the SGD.
    negative_sample_rate: int (optional, default 5)
        Number of negative samples to use per positive sample.
    parallel: bool (optional, default False)
        Whether to run the computation using numba parallel.
        Running in parallel is non-deterministic, and is not used
        if a random seed has been set, to ensure reproducibility.
    verbose: bool (optional, default False)
        Whether to report information on the current progress of the algorithm.
    densmap: bool (optional, default False)
        Whether to use the density-augmented densMAP objective
    densmap_kwds: dict (optional, default None)
        Auxiliary data for densMAP
    tqdm_kwds: dict (optional, default None)
        Keyword arguments for tqdm progress bar.
    move_other: bool (optional, default False)
        Whether to adjust tail_embedding alongside head_embedding
    Returns
    -------
    embedding: array of shape (n_samples, n_components)
        The optimized embedding.
    r   NTrW   mu_sumr   lambdaRmudtype	var_shiftr   disablefracr   
   z	completed z / epochs)r   copyr[   r   njitrV   r   sumzerosfloat32
isinstancelistmaxfulllenint64astypefloat64viewreshaper   r   floatsqrtvarmeandotr    printappend))r!   r"   r#   r$   n_epochsr%   r&   r'   r(   	rng_stater*   initial_alphanegative_sample_raterX   verbosedensmapdensmap_kwds	tqdm_kwdsr+   r
   r,   r-   r.   r/   optimize_fndens_init_fnr:   r7   r8   r9   r2   r3   dens_var_shiftepochs_listembedding_listr)   r0   r1   r5   r6   r4   r   r   r   optimize_layout_euclidean   s   
R


&


r   c           "   	   C   s  t | jd D ]}|| |kr|| }|| }|| }|| }|||g|R  \}}|||g|R  \}}|dkrItd|t|d|   d}nd}d| |d  |d  }t |D ](}t|||  }||  ||	 7  < |
rt|||  }||  ||	 7  < q[||  | | 7  < t|||  ||  } t | D ]X}!t|| | }|| }|||g|R  \}}|dkrtd|t|d|   d}n||krqd}|d | | |d  }t |D ]}t|||  }||  ||	 7  < qq||  | ||  7  < q||fS )Nr   r   r   r   re   r   ư>)r   r   r   r   r    r   )"r&   r/   r#   r$   r!   r"   output_metricoutput_metric_kwdsr
   r,   r+   r0   r.   r-   r)   r%   r'   r(   r*   r   r;   r<   r=   r>   dist_outputgrad_dist_output_rev_grad_dist_outputw_lrJ   rK   rL   rM   rN   r   r   r   %_optimize_layout_generic_single_epoch  sd   
r   r   c                 C   s   | j d }|}|| }| }| }tjtdd}|du r i }d|vr)| |d< tj| j d t|	f|	tjd| dddf 	tj
tjdd }tt|fi |D ]$}|||||| ||||||||||||||
 |d	t|t|   }qW| S )
a	  Improve an embedding using stochastic gradient descent to minimize the
    fuzzy set cross entropy between the 1-skeletons of the high dimensional
    and low dimensional fuzzy simplicial sets. In practice this is done by
    sampling edges based on their membership strength (with the (1-p) terms
    coming from negative sampling similar to word2vec).

    Parameters
    ----------
    head_embedding: array of shape (n_samples, n_components)
        The initial embedding to be improved by SGD.

    tail_embedding: array of shape (source_samples, n_components)
        The reference embedding of embedded points. If not embedding new
        previously unseen points with respect to an existing embedding this
        is simply the head_embedding (again); otherwise it provides the
        existing embedding to embed with respect to.

    head: array of shape (n_1_simplices)
        The indices of the heads of 1-simplices with non-zero membership.

    tail: array of shape (n_1_simplices)
        The indices of the tails of 1-simplices with non-zero membership.

    n_epochs: int
        The number of training epochs to use in optimization.

    n_vertices: int
        The number of vertices (0-simplices) in the dataset.

    epochs_per_sample: array of shape (n_1_simplices)
        A float value of the number of epochs per 1-simplex. 1-simplices with
        weaker membership strength will have more epochs between being sampled.

    a: float
        Parameter of differentiable approximation of right adjoint functor

    b: float
        Parameter of differentiable approximation of right adjoint functor

    rng_state: array of int64, shape (3,)
        The internal state of the rng

    gamma: float (optional, default 1.0)
        Weight to apply to negative samples.

    initial_alpha: float (optional, default 1.0)
        Initial learning rate for the SGD.

    negative_sample_rate: int (optional, default 5)
        Number of negative samples to use per positive sample.

    verbose: bool (optional, default False)
        Whether to report information on the current progress of the algorithm.

    tqdm_kwds: dict (optional, default None)
        Keyword arguments for tqdm progress bar.

    move_other: bool (optional, default False)
        Whether to adjust tail_embedding alongside head_embedding

    Returns
    -------
    embedding: array of shape (n_samples, n_components)
        The optimized embedding.
    r   Tr   Nrd   r   ra   re   r   )r   ri   r   rj   r   r   rq   rr   rs   rt   ru   rv   rw   r   r   rx   )r!   r"   r#   r$   r   r%   r&   r'   r(   r   r*   r   r   r   r   r   r   r+   r
   r,   r-   r.   r/   r   r)   r0   r   r   r   optimize_layout_generic  sT   
V
&r   c           "      C   s  t | jd D ]}|| |kr|| }|| }|| }|| }|||g|R  \}}|| }d||	|  d   }t |
D ]!}t|||  }||  || 7  < |r^||  | | 7  < q=||  | | 7  < t|||  ||  }t |D ]T} t|| }|| }|||g|R  \}}tt|||  d |	| d  }!| d|! d|! |	|  d   }t |
D ]}t|||  }||  || 7  < qqy||  |||  7  < qd S )Nr   r   r   )r   r   r   r    r   r   r   rp   )"r&   r/   r#   r$   r!   r"   r   r   weightsigmasr
   r,   r+   r0   r.   r-   r   r%   rhosr*   r   r;   r<   r=   r>   r   r   r   rJ   rK   rL   rM   rN   w_hr   r   r   %_optimize_layout_inverse_single_epoch  sT   &"
r   c                 C   s   | j d }|}|	| }| }|	 }tjtdd}|du r i }d|vr)| |d< tt|fi |D ]%}||	|||| ||||||||||||||| |dt|t|   }q3| S )a
  Improve an embedding using stochastic gradient descent to minimize the
    fuzzy set cross entropy between the 1-skeletons of the high dimensional
    and low dimensional fuzzy simplicial sets. In practice this is done by
    sampling edges based on their membership strength (with the (1-p) terms
    coming from negative sampling similar to word2vec).

    Parameters
    ----------
    head_embedding: array of shape (n_samples, n_components)
        The initial embedding to be improved by SGD.

    tail_embedding: array of shape (source_samples, n_components)
        The reference embedding of embedded points. If not embedding new
        previously unseen points with respect to an existing embedding this
        is simply the head_embedding (again); otherwise it provides the
        existing embedding to embed with respect to.

    head: array of shape (n_1_simplices)
        The indices of the heads of 1-simplices with non-zero membership.

    tail: array of shape (n_1_simplices)
        The indices of the tails of 1-simplices with non-zero membership.

    weight: array of shape (n_1_simplices)
        The membership weights of the 1-simplices.

    sigmas:

    rhos:

    n_epochs: int
        The number of training epochs to use in optimization.

    n_vertices: int
        The number of vertices (0-simplices) in the dataset.

    epochs_per_sample: array of shape (n_1_simplices)
        A float value of the number of epochs per 1-simplex. 1-simplices with
        weaker membership strength will have more epochs between being sampled.

    a: float
        Parameter of differentiable approximation of right adjoint functor

    b: float
        Parameter of differentiable approximation of right adjoint functor

    rng_state: array of int64, shape (3,)
        The internal state of the rng

    gamma: float (optional, default 1.0)
        Weight to apply to negative samples.

    initial_alpha: float (optional, default 1.0)
        Initial learning rate for the SGD.

    negative_sample_rate: int (optional, default 5)
        Number of negative samples to use per positive sample.

    verbose: bool (optional, default False)
        Whether to report information on the current progress of the algorithm.

    tqdm_kwds: dict (optional, default None)
        Keyword arguments for tqdm progress bar.

    move_other: bool (optional, default False)
        Whether to adjust tail_embedding alongside head_embedding

    Returns
    -------
    embedding: array of shape (n_samples, n_components)
        The optimized embedding.
    r   Tr   Nrd   r   )r   ri   r   rj   r   r   r   rx   )r!   r"   r#   r$   r   r   r   r   r%   r&   r'   r(   r   r*   r   r   r   r   r   r   r+   r
   r,   r-   r.   r/   r   r0   r   r   r   optimize_layout_inverse  sL   
`
r   c           (      C   s  t |}|jd d d }d}|D ]}|jd |kr|jd }qt|tj}tjt|	d  tj	| t
|D ]4}|D ].}||| jd k rq|| | |krq|| | }|| | }| | | }|| | }t||}|dkrd| | t||d  }||t|| d  }nd}t
|D ]} t|||  ||    }!t
| |D ]J}"||" }#||#  krd  kr|"krn q|||"| |f }$|$dkr|!t|tt|"d   |||"| |f  ||  | |# |$| f   8 }!q||   t|!| 7  < |rut|||  ||    }%t
| |D ]P}"||" }#||#  kr0d  kr0|"krgn q|||"| |f }$|$dkrg|%t|tt|"d   |||"| |f  ||  | |# |$| f   8 }%q||   t|%| 7  < q|| |  || | 7  < || | dkrt||| |  || |  }&nd}&t
|&D ]}'t|	|| jd  }|| | }t||}|dkrd|
 | }|d| |t|| d   }n	||krސqd}t
|D ]z} |dkrt|||  ||    }!nd}!t
| |D ]P}"||" }#||#  krd  kr|"krOn q |||"| |f }$|$dkrO|!t|tt|"d   |||"| |f  ||  | |# |$| f   8 }!q ||   t|!| 7  < qq|| |  |&|| |  7  < qBq=d S )	Nr   r   r   r   r   r   r   r   )rr   r   r   arangert   int32randomseedabsshuffler   r   r   r   r   r    r   )(head_embeddingstail_embeddingsheadstailsr&   r'   r(   regularisation_weights	relationsr   r*   lambda_r
   r+   r,   r-   r.   r/   r0   n_embeddingswindow_sizemax_n_edgese_p_sembedding_orderr   mr;   r<   r=   r>   r?   rJ   rK   rL   offset
neighbor_midentified_indexother_grad_drM   rN   r   r   r   /_optimize_layout_aligned_euclidean_single_epochm  s   

&
"*





*
r   P);?V?{Gzt?c                 C   sN  | d j d }|}tjjtjjd d d }tjjtjjd d d }tjjtjjd d d }tt|D ]%}|	|| 
tj|  |	|| 
tj |	|| 
tj q9tjtd|d}|d u rmi }d|vrv| |d< tt|fi |D ]$}|| |||||	|
|||||||||||| |dt|t|   }q| S )Nr   r   TrW   rd   r   )r   r   typedList
empty_listtypesrm   r   rr   r~   rt   r   rj   r   r   rx   )r   r   r   r   r   r&   r   r   r   r'   r(   r*   r   r   r   rX   r   r   r+   r
   r,   r-   r.   r/   r   r   r0   r   r   r   !optimize_layout_aligned_euclidean  s`   
r   )F)	r   r   r\   FFFNNF)
r   r   r   r   r   r\   TFNF)r   numpyr   	tqdm.autor   umap.distances	distancesdist
umap.utilsr   rj   r   r   rm   intpr   rO   rV   rZ   rY   boolr[   r   r   	euclideanr   r   r   r   r   r   r   r   r   <module>   s    

~!
 Q]
 V
  "