o
    ϯi#                     @   s   d dl mZ d dlZd dlZd dlmZ d dlm  mZ	 d dl
mZ d dlmZ G dd dejZG dd dejZed	krXed
dZedddZeeZeed j dS dS )    )UnionN)	rearrange)
NormConv1dc                       sN   e Zd ZdZdededef fddZdd Zd	d
 Zdd Zdd Z	  Z
S )VectorQuantizea  
    Implementation of VQ similar to Karpathy's repo:
    https://github.com/karpathy/deep-vector-quantization
    Additionally uses following tricks from Improved VQGAN
    (https://arxiv.org/pdf/2110.04627.pdf):
        1. Factorized codes: Perform nearest neighbor lookup in low-dimensional space
            for improved codebook usage
        2. l2-normalized codes: Converts euclidean distance to cosine similarity which
            improves training stability
    	input_dimcodebook_sizecodebook_dimc                    sH   t    || _|| _t||dd| _t||dd| _t||| _	d S )N   )kernel_size)
super__init__r   r   r   in_projout_projnn	Embeddingcodebook)selfr   r   r   	__class__ F/home/ubuntu/.local/lib/python3.10/site-packages/dacvae/nn/quantize.pyr      s   
zVectorQuantize.__init__c                 C   s|   |  |}| |\}}tj|| ddddg}tj|| ddddg}|||   }| |}|||||fS )a  Quantized the input tensor using a fixed codebook and returns
        the corresponding codebook vectors

        Parameters
        ----------
        z : Tensor[B x D x T]

        Returns
        -------
        Tensor[B x D x T]
            Quantized continuous representation of input
        Tensor[1]
            Commitment loss to train encoder to predict vectors closer to codebook
            entries
        Tensor[1]
            Codebook loss to update the codebook
        Tensor[B x T]
            Codebook indices (quantized discrete representation of input)
        Tensor[B x D x T]
            Projected latents (continuous representation of input before quantization)
        none)	reductionr	      )r   decode_latentsFmse_lossdetachmeanr   )r   zz_ez_qindicescommitment_losscodebook_lossr   r   r   forward#   s   

zVectorQuantize.forwardc                 C   s   t || jjS N)r   	embeddingr   weightr   embed_idr   r   r   
embed_codeI   s   zVectorQuantize.embed_codec                 C   s   |  |ddS )Nr	   r   )r+   	transposer)   r   r   r   decode_codeL   s   zVectorQuantize.decode_codec                 C   s   t |d}| jj}t|}t|}|djdddd| |   |djddd  }t | dd d|	dd}| 
|}||fS )	Nzb d t -> (b t) dr   r	   T)keepdimz(b t) -> b tr   )b)r   r   r(   r   	normalizepowsumtmaxsizer-   )r   latents	encodingsr   distr"   r!   r   r   r   r   O   s   


 
zVectorQuantize.decode_latents)__name__
__module____qualname____doc__intr   r%   r+   r-   r   __classcell__r   r   r   r   r      s    	&r   c                       sz   e Zd ZdZ					ddeded	ed
eeef def
 fddZddefddZ	de
jfddZde
jfddZ  ZS )ResidualVectorQuantizezg
    Introduced in SoundStream: An end2end neural audio codec
    https://arxiv.org/abs/2107.03312
       	                 r   n_codebooksr   r   quantizer_dropoutc                    sh   t    t tr fddt|D  || _ | _| _t	 fddt|D | _
|| _d S )Nc                    s   g | ]} qS r   r   ).0_r   r   r   
<listcomp>r   s    z3ResidualVectorQuantize.__init__.<locals>.<listcomp>c                    s   g | ]
}t  | qS r   )r   )rG   ir   r   r   r   r   rJ   y   s    )r   r   
isinstancer=   rangerE   r   r   r   
ModuleList
quantizersrF   )r   r   rE   r   r   rF   r   rL   r   r   h   s   


zResidualVectorQuantize.__init__Nn_quantizersc                 C   sr  d}|}d}d}g }g }|du r| j }| jrLt|jd f| j  d }td| j d |jd f}	t|jd | j }
|	d|
 |d|
< ||j	}t
| jD ]R\}}| jdu r`||kr` nD||\}}}}}tj|jd f||j	d|k }|||ddddf   }|| }|||  7 }|||  7 }|| || qQtj|dd}tj|dd}|||||fS )a  Quantized the input tensor using a fixed set of `n` codebooks and returns
        the corresponding codebook vectors
        Parameters
        ----------
        z : Tensor[B x D x T]
        n_quantizers : int, optional
            No. of quantizers to use
            (n_quantizers < self.n_codebooks ex: for quantizer dropout)
            Note: if `self.quantizer_dropout` is True, this argument is ignored
                when in training mode, and a random number of quantizers is used.
        Returns
        -------
        dict
            A dictionary with the following keys:

            "z" : Tensor[B x D x T]
                Quantized continuous representation of input
            "codes" : Tensor[B x N x T]
                Codebook indices for each codebook
                (quantized discrete representation of input)
            "latents" : Tensor[B x N*D x T]
                Projected latents (continuous representation of input before quantization)
            "vq/commitment_loss" : Tensor[1]
                Commitment loss to train encoder to predict vectors closer to codebook
                entries
            "vq/codebook_loss" : Tensor[1]
                Codebook loss to update the codebook
        r   Nr	   F)
fill_valuedevicedim)rE   trainingtorchonesshaperandintr=   rF   torS   	enumeraterP   fullr   appendstackcat)r   r   rQ   r!   residualr#   r$   codebook_indicesr6   dropout	n_dropoutrK   	quantizerz_q_icommitment_loss_icodebook_loss_i	indices_iz_e_imaskcodesr   r   r   r%      s>   
zResidualVectorQuantize.forwardrl   c                 C   sx   d}g }|j d }t|D ]$}| j| |dd|ddf }|| | j| |}|| }q|tj|dd|fS )a?  Given the quantized codes, reconstruct the continuous representation
        Parameters
        ----------
        codes : Tensor[B x N x T]
            Quantized discrete representation of input
        Returns
        -------
        Tensor[B x D x T]
            Quantized continuous representation of input
        rD   r	   NrT   )rY   rN   rP   r-   r^   r   rW   r`   )r   rl   r!   z_prE   rK   z_p_irf   r   r   r   
from_codes   s   
"

z!ResidualVectorQuantize.from_codesr6   c                 C   s   d}g }g }t dgdd | jD  }t ||jd kd jdddd }t|D ]8}|| ||d  }}	| j| |dd||	ddf \}
}||
 || | j| 	|
}|| }q+|t
j|ddt
j|ddfS )	a  Given the unquantized latents, reconstruct the
        continuous representation after quantization.

        Parameters
        ----------
        latents : Tensor[B x N x T]
            Continuous representation of input after projection

        Returns
        -------
        Tensor[B x D x T]
            Quantized representation of full-projected space
        Tensor[B x D x T]
            Quantized representation of latent space
        r   c                 S   s   g | ]}|j qS r   rI   )rG   qr   r   r   rJ      s    z7ResidualVectorQuantize.from_latents.<locals>.<listcomp>r	   T)axiskeepdimsNrT   )npcumsumrP   whererY   r4   rN   r   r^   r   rW   r`   r_   )r   r6   r!   rm   rl   dimsrE   rK   jkrn   codes_irf   r   r   r   from_latents   s    *


z#ResidualVectorQuantize.from_latents)r@   rA   rB   rC   rD   r&   )r9   r:   r;   r<   r=   r   listfloatr   r%   rW   Tensorro   rz   r>   r   r   r   r   r?   b   s*    
Ir?   __main__T)rF      r@   P   r6   )typingr   numpyrs   rW   torch.nnr   torch.nn.functional
functionalr   einopsr   dacvae.nn.layersr   Moduler   r?   r9   rvqrandnxyprintrY   r   r   r   r   <module>   s    T "
