o
    㥵id#                     @   s   d dl mZ d dlZd dlZd dlmZ d dlm  mZ	 d dl
mZ d dlmZ d dlmZ G dd dejZG dd	 d	ejZed
kr^eddZedddZeeZeed j dS dS )    )UnionN)	rearrange)weight_norm)WNConv1dc                       sN   e Zd ZdZdededef fddZdd Zd	d
 Zdd Zdd Z	  Z
S )VectorQuantizea  
    Implementation of VQ similar to Karpathy's repo:
    https://github.com/karpathy/deep-vector-quantization
    Additionally uses following tricks from Improved VQGAN
    (https://arxiv.org/pdf/2110.04627.pdf):
        1. Factorized codes: Perform nearest neighbor lookup in low-dimensional space
            for improved codebook usage
        2. l2-normalized codes: Converts euclidean distance to cosine similarity which
            improves training stability
    	input_dimcodebook_sizecodebook_dimc                    sH   t    || _|| _t||dd| _t||dd| _t||| _	d S )N   )kernel_size)
super__init__r   r	   r   in_projout_projnn	Embeddingcodebook)selfr   r   r	   	__class__ C/home/ubuntu/.local/lib/python3.10/site-packages/dac/nn/quantize.pyr      s   
zVectorQuantize.__init__c                 C   s|   |  |}| |\}}tj|| ddddg}tj|| ddddg}|||   }| |}|||||fS )a  Quantized the input tensor using a fixed codebook and returns
        the corresponding codebook vectors

        Parameters
        ----------
        z : Tensor[B x D x T]

        Returns
        -------
        Tensor[B x D x T]
            Quantized continuous representation of input
        Tensor[1]
            Commitment loss to train encoder to predict vectors closer to codebook
            entries
        Tensor[1]
            Codebook loss to update the codebook
        Tensor[B x T]
            Codebook indices (quantized discrete representation of input)
        Tensor[B x D x T]
            Projected latents (continuous representation of input before quantization)
        none)	reductionr
      )r   decode_latentsFmse_lossdetachmeanr   )r   zz_ez_qindicescommitment_losscodebook_lossr   r   r   forward"   s   

zVectorQuantize.forwardc                 C   s   t || jjS N)r   	embeddingr   weightr   embed_idr   r   r   
embed_codeH   s   zVectorQuantize.embed_codec                 C   s   |  |ddS )Nr
   r   )r,   	transposer*   r   r   r   decode_codeK   s   zVectorQuantize.decode_codec                 C   s   t |d}| jj}t|}t|}|djdddd| |   |djddd  }t | dd d|	dd}| 
|}||fS )	Nzb d t -> (b t) dr   r
   T)keepdimz(b t) -> b tr   )b)r   r   r)   r   	normalizepowsumtmaxsizer.   )r   latents	encodingsr   distr#   r"   r   r   r   r   N   s   


 
zVectorQuantize.decode_latents)__name__
__module____qualname____doc__intr   r&   r,   r.   r   __classcell__r   r   r   r   r      s    	&r   c                       sz   e Zd ZdZ					ddeded	ed
eeef def
 fddZddefddZ	de
jfddZde
jfddZ  ZS )ResidualVectorQuantizezg
    Introduced in SoundStream: An end2end neural audio codec
    https://arxiv.org/abs/2107.03312
       	                 r   n_codebooksr   r	   quantizer_dropoutc                    sh   t    t tr fddt|D  || _ | _| _t	 fddt|D | _
|| _d S )Nc                    s   g | ]} qS r   r   ).0_r	   r   r   
<listcomp>q   s    z3ResidualVectorQuantize.__init__.<locals>.<listcomp>c                    s   g | ]
}t  | qS r   )r   )rH   ir	   r   r   r   r   rK   x   s    )r   r   
isinstancer>   rangerF   r	   r   r   
ModuleList
quantizersrG   )r   r   rF   r   r	   rG   r   rM   r   r   g   s   


zResidualVectorQuantize.__init__Nn_quantizersc                 C   sr  d}|}d}d}g }g }|du r| j }| jrLt|jd f| j  d }td| j d |jd f}	t|jd | j }
|	d|
 |d|
< ||j	}t
| jD ]R\}}| jdu r`||kr` nD||\}}}}}tj|jd f||j	d|k }|||ddddf   }|| }|||  7 }|||  7 }|| || qQtj|dd}tj|dd}|||||fS )a  Quantized the input tensor using a fixed set of `n` codebooks and returns
        the corresponding codebook vectors
        Parameters
        ----------
        z : Tensor[B x D x T]
        n_quantizers : int, optional
            No. of quantizers to use
            (n_quantizers < self.n_codebooks ex: for quantizer dropout)
            Note: if `self.quantizer_dropout` is True, this argument is ignored
                when in training mode, and a random number of quantizers is used.
        Returns
        -------
        dict
            A dictionary with the following keys:

            "z" : Tensor[B x D x T]
                Quantized continuous representation of input
            "codes" : Tensor[B x N x T]
                Codebook indices for each codebook
                (quantized discrete representation of input)
            "latents" : Tensor[B x N*D x T]
                Projected latents (continuous representation of input before quantization)
            "vq/commitment_loss" : Tensor[1]
                Commitment loss to train encoder to predict vectors closer to codebook
                entries
            "vq/codebook_loss" : Tensor[1]
                Codebook loss to update the codebook
        r   Nr
   F)
fill_valuedevicedim)rF   trainingtorchonesshaperandintr>   rG   torT   	enumeraterQ   fullr   appendstackcat)r   r    rR   r"   residualr$   r%   codebook_indicesr7   dropout	n_dropoutrL   	quantizerz_q_icommitment_loss_icodebook_loss_i	indices_iz_e_imaskcodesr   r   r   r&      s>   
zResidualVectorQuantize.forwardrm   c                 C   sx   d}g }|j d }t|D ]$}| j| |dd|ddf }|| | j| |}|| }q|tj|dd|fS )a?  Given the quantized codes, reconstruct the continuous representation
        Parameters
        ----------
        codes : Tensor[B x N x T]
            Quantized discrete representation of input
        Returns
        -------
        Tensor[B x D x T]
            Quantized continuous representation of input
        rE   r
   NrU   )rZ   rO   rQ   r.   r_   r   rX   ra   )r   rm   r"   z_prF   rL   z_p_irg   r   r   r   
from_codes   s   
"

z!ResidualVectorQuantize.from_codesr7   c                 C   s   d}g }g }t dgdd | jD  }t ||jd kd jdddd }t|D ]8}|| ||d  }}	| j| |dd||	ddf \}
}||
 || | j| 	|
}|| }q+|t
j|ddt
j|ddfS )	a  Given the unquantized latents, reconstruct the
        continuous representation after quantization.

        Parameters
        ----------
        latents : Tensor[B x N x T]
            Continuous representation of input after projection

        Returns
        -------
        Tensor[B x D x T]
            Quantized representation of full-projected space
        Tensor[B x D x T]
            Quantized representation of latent space
        r   c                 S   s   g | ]}|j qS r   rJ   )rH   qr   r   r   rK      s    z7ResidualVectorQuantize.from_latents.<locals>.<listcomp>r
   T)axiskeepdimsNrU   )npcumsumrQ   whererZ   r5   rO   r   r_   r   rX   ra   r`   )r   r7   r"   rn   rm   dimsrF   rL   jkro   codes_irg   r   r   r   from_latents   s    *


z#ResidualVectorQuantize.from_latents)rA   rB   rC   rD   rE   r'   )r:   r;   r<   r=   r>   r   listfloatr   r&   rX   Tensorrp   r{   r?   r   r   r   r   r@   a   s*    
Ir@   __main__T)rG      rA   P   r7   )typingr   numpyrt   rX   torch.nnr   torch.nn.functional
functionalr   einopsr   torch.nn.utilsr   dac.nn.layersr   Moduler   r@   r:   rvqrandnxyprintrZ   r   r   r   r   <module>   s"    T "
