o
    mi4                     @   s   d Z ddlZddlmZmZ ddlZddlmZ ddlm  m	Z
 ddlmZmZ dejdejd	ejfd
dZdefddZd%dedefddZdefddZdefddZd&dedefddZG dd  d ejZG d!d" d"ejZG d#d$ d$ejZdS )'z(Core vector quantization implementation.    N)	rearrangerepeat)nn   )broadcast_tensorsrankvaldreturnc                 C   s   | d ur| S |S N )r   r	   r   r   D/home/ubuntu/SpeechTokenizer/speechtokenizer/quantization/core_vq.pydefault+   s   r   decayc                 C   s   | j |j|d| d d S )Nr   )alpha)datamul_add_)
moving_avgnewr   r   r   r   ema_inplace/   s   r   h㈵>n_categoriesepsilonc                 C   s   | | |   ||   S r   )sum)xr   r   r   r   r   laplace_smoothing3   s   r   shapec                  G   s   t | }tj| |S r   )torchemptyr   initkaiming_uniform_)r   tr   r   r   uniform_init7   s   
r#   numc                 C   sT   | j d | j}}||krtj||dd | }| | S tjd||f|d}| | S )Nr   device)r   r&   r   randpermrandint)samplesr$   num_samplesr&   indicesr   r   r   sample_vectors=   s   r,   
   num_clusters	num_itersc              	   C   s   | j d | j}}t| |}t|D ]P}t| dt|d }|d jdd }|jddj}	tj	|	|d}
|
dk}|

|d}|	j|||d	}|dt|	d
|d|  ||d  }t|d ||}q||
fS )Nzn d -> n () dzc d -> () c d   dim)	minlengthr   r   )dtypezn -> n d)r	   .N)r   r5   r,   ranger   r   maxr+   r   bincountmasked_fill	new_zerosscatter_add_r   where)r)   r.   r/   r3   r5   means_diffsdistsbucketsbins	zero_maskbins_min_clamped	new_meansr   r   r   kmeansH   s    

rG   c                       s   e Zd ZdZ					d$deded	ed
edededef fddZejj	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zd d! Zd"d# Z  ZS )%EuclideanCodebookaJ  Codebook with Euclidean distance.
    Args:
        dim (int): Dimension.
        codebook_size (int): Codebook size.
        kmeans_init (bool): Whether to use k-means to initialize the codebooks.
            If set to true, run the k-means algorithm on the first training batch and use
            the learned centroids as initialization.
        kmeans_iters (int): Number of iterations used for k-means algorithm at initialization.
        decay (float): Decay for exponential moving average over the codebooks.
        epsilon (float): Epsilon value for numerical stability.
        threshold_ema_dead_code (int): Threshold for dead code expiration. Replace any codes
            that have an exponential moving average cluster size less than the specified threshold with
            randomly selected vector from the current batch.
    Fr-   Gz?r   r1   r3   codebook_sizekmeans_initkmeans_itersr   r   threshold_ema_dead_codec           
         s   t    || _|stntj}|||}	|| _|| _|| _|| _	| 
dt| g | 
dt| | 
d|	 | 
d|	  d S )Ninitedcluster_sizeembed	embed_avg)super__init__r   r#   r   zerosrJ   rL   r   rM   register_bufferTensorclone)
selfr3   rJ   rK   rL   r   r   rM   init_fnrP   	__class__r   r   rS   p   s   


zEuclideanCodebook.__init__c                 C   sf   | j rd S t|| j| j\}}| jj| | jj|  | j	j| | j jt
dg d S )NT)rN   rG   rJ   rL   rP   r   copy_rQ   rW   rO   r   rV   )rX   r   rP   rO   r   r   r   init_embed_   s   zEuclideanCodebook.init_embed_c                 C   s.   t |d t|| j| j}| jj| d S )Nr6   )r   r=   r,   rJ   rP   r   r\   )rX   r)   maskmodified_codebookr   r   r   replace_   s   zEuclideanCodebook.replace_c                 C   sD   | j dkrd S | j| j k }t|sd S t|d}| j||d d S )Nr   ... d -> (...) d)r^   )rM   rO   r   anyr   r`   )rX   batch_samplesexpired_codesr   r   r   expire_codes_   s   


zEuclideanCodebook.expire_codes_c                 C   s   t |d}|S )Nra   )r   )rX   r   r   r   r   
preprocess      
zEuclideanCodebook.preprocessc                 C   sR   | j  }|djdddd| |  |djddd  }|jddj}|S )Nr1   r   T)keepdimr   r0   r2   )rP   r"   powr   r8   r+   )rX   r   rP   dist	embed_indr   r   r   quantize   s   

zEuclideanCodebook.quantizec                 C   s   |j |d d  S )Nr0   )view)rX   rk   r   r   r   r   postprocess_emb   s   z!EuclideanCodebook.postprocess_embc                 C   s   t || j}|S r   )F	embeddingrP   rX   rk   rl   r   r   r   
dequantize   s   zEuclideanCodebook.dequantizec                 C   s*   |j }| |}| |}| ||}|S r   )r   rf   rl   rn   )rX   r   r   rk   r   r   r   encode   s
   

zEuclideanCodebook.encodec                 C   s   |  |}|S r   )rr   rq   r   r   r   decode   rg   zEuclideanCodebook.decodec           
      C   s   |j |j}}| |}| | | |}t|| j|}| 	||}| 
|}| jrk| | t| j|d| j | | }t| j| | j t| j| j| j| j  }| j|d }	| jj|	 ||fS )Nr   r   )r   r5   rf   r]   rl   ro   one_hotrJ   typern   rr   trainingre   r   rO   r   r   r"   rQ   r   r   	unsqueezerP   r   r\   )
rX   r   r   r5   rk   embed_onehotrl   	embed_sumrO   embed_normalizedr   r   r   forward   s&   




zEuclideanCodebook.forward)Fr-   rI   r   r1   )__name__
__module____qualname____doc__intfloatrS   r   jitignorer]   r`   re   rf   rl   rn   rr   rs   rt   r|   __classcell__r   r   rZ   r   rH   a   sB    


rH   c                       s   e Zd ZdZ							dd	ed
edeje dedededededef fddZ	e
dd Zdd Zdd Zdd Z  ZS )VectorQuantizationa  Vector quantization implementation.
    Currently supports only euclidean distance.
    Args:
        dim (int): Dimension
        codebook_size (int): Codebook size
        codebook_dim (int): Codebook dimension. If not defined, uses the specified dimension in dim.
        decay (float): Decay for exponential moving average over the codebooks.
        epsilon (float): Epsilon value for numerical stability.
        kmeans_init (bool): Whether to use kmeans to initialize the codebooks.
        kmeans_iters (int): Number of iterations used for kmeans initialization.
        threshold_ema_dead_code (int): Threshold for dead code expiration. Replace any codes
            that have an exponential moving average cluster size less than the specified threshold with
            randomly selected vector from the current batch.
        commitment_weight (float): Weight for commitment loss.
    NrI   r   T2   r1         ?r3   rJ   codebook_dimr   r   rK   rL   rM   commitment_weightc
              	      s~   t    t||}
|
|k}|rt||
nt | _|r#t|
|nt | _|| _|	| _	t
|
||||||d| _|| _d S )N)r3   rJ   rK   rL   r   r   rM   )rR   rS   r   r   LinearIdentity
project_inproject_outr   r   rH   	_codebookrJ   )rX   r3   rJ   r   r   r   rK   rL   rM   r   _codebook_dimrequires_projectionrZ   r   r   rS      s   


zVectorQuantization.__init__c                 C   s   | j jS r   )r   rP   )rX   r   r   r   codebook  s   zVectorQuantization.codebookc                 C   s$   t |d}| |}| j|}|S )Nb d n -> b n d)r   r   r   rs   )rX   r   embed_inr   r   r   rs     s   

zVectorQuantization.encodec                 C   s$   | j |}| |}t|d}|S )Nb n d -> b d n)r   rt   r   r   rq   r   r   r   rt     s   

zVectorQuantization.decodec                 C   s   |j }t|d}| |}| |\}}| jr|||   }tjdg|| jd}| jr@| jdkr@t	
| |}||| j  }| |}t|d}|||fS )Nr           )r&   requires_gradr   r   )r&   r   r   r   rw   detachr   tensorr   ro   mse_lossr   )rX   r   r&   rl   rk   losscommit_lossr   r   r   r|   $  s   





zVectorQuantization.forward)NrI   r   Tr   r1   r   )r}   r~   r   r   r   tpOptionalr   boolrS   propertyr   rs   rt   r|   r   r   r   rZ   r   r      sB    	

r   c                	       s   e Zd ZdZ fddZddeje deje fddZ	dd	e
jdeje d
eje de
jfddZdde
jd
ede
jfddZ  ZS )ResidualVectorQuantizationzrResidual vector quantization implementation.
    Follows Algorithm 1. in https://arxiv.org/pdf/2107.03312.pdf
    c                   s,   t    t fddt|D | _d S )Nc                    s   g | ]	}t d i  qS )r   )r   ).0r?   kwargsr   r   
<listcomp>A  s    z7ResidualVectorQuantization.__init__.<locals>.<listcomp>)rR   rS   r   
ModuleListr7   layers)rX   num_quantizersr   rZ   r   r   rS   >  s   

z#ResidualVectorQuantization.__init__Nn_qr   c                 C   s   d}|}g }g }g }|pt | j}t| jd | D ](\}	}
|
|\}}}|| }|| }|| || |rB|	|v rB|| qttj||f\}}||||fS )Nr   )lenr   	enumerateappendmapr   stack)rX   r   r   r   quantized_outresidual
all_lossesall_indicesout_quantizedilayer	quantizedr+   r   
out_lossesout_indicesr   r   r   r|   D  s"   


z"ResidualVectorQuantization.forwardr   str
   c           
      C   sf   |}g }|p
t | j}|pd}| j|| D ]}||}||}|| }|| qt|}	|	S )Nr   )r   r   rs   rt   r   r   r   )
rX   r   r   r   r   r   r   r+   r   r   r   r   r   rs   [  s   


z!ResidualVectorQuantization.encoder   	q_indicesc                 C   sF   t jd|jd}t|D ]\}}| j||  }||}|| }q|S )Nr   r%   )r   r   r&   r   r   rt   )rX   r   r   r   r   r+   r   r   r   r   r   rt   h  s   

z!ResidualVectorQuantization.decode)NN)r   )r}   r~   r   r   rS   r   r   r   listr|   r   rV   rs   rt   r   r   r   rZ   r   r   :  s     ,$r   )r   )r-   )r   typingr   einopsr   r   r   r   torch.nn.functional
functionalro   distribr   r   Anyr   r   r   r   r   r#   r,   rG   ModulerH   r   r   r   r   r   r   <module>   s"    R