o
    i%                     @   s.  d dl Z d dl mZ d dlmZ d dlmZ d dlmZm	Z	 ddl
mZ G dd	 d	ejZG d
d deZG dd deZe jjdd Ze jjdd ZG dd deZdd Zdd ZG dd deZG dd deZG dd deZG dd deZG d d! d!eZG d"d# d#eZG d$d% d%eZdS )&    N)nn)
functional)	rearrange)
ResidualVQFSQ   )ResidualVectorQuantizec                       s6   e Zd Zd	def fddZd	ddZdd Z  ZS )

BottleneckFis_discretec                    s   t    || _d S N)super__init__r
   )selfr
   	__class__ g/home/ubuntu/.local/lib/python3.10/site-packages/solospeech/vae_modules/stable_vae/models/bottleneck.pyr      s   

zBottleneck.__init__c                 K      t r   NotImplementedError)r   xreturn_infokwargsr   r   r   encode      zBottleneck.encodec                 C   r   r   r   r   r   r   r   r   decode   r   zBottleneck.decodeF)__name__
__module____qualname__boolr   r   r   __classcell__r   r   r   r   r	   
   s    
r	   c                       s$   e Zd Z fddZdd Z  ZS )DiscreteBottleneckc                    s$   t  jdd || _|| _|| _d S )NTr
   )r   r   num_quantizerscodebook_size	tokens_id)r   r%   r&   r'   r   r   r   r      s   
zDiscreteBottleneck.__init__c                 K   r   r   r   )r   codesr   r   r   r   decode_tokens   r   z DiscreteBottleneck.decode_tokens)r   r   r    r   r)   r"   r   r   r   r   r#      s    r#   c                       .   e Zd Z fddZdddZdd Z  ZS )	TanhBottleneckc                    s   t  jdd t | _d S NFr$   )r   r   r   Tanhtanhr   r   r   r   r   $   s   zTanhBottleneck.__init__Fc                 C   s   i }t |}|r||fS |S r   )torchr.   r   r   r   infor   r   r   r   (   s
   
zTanhBottleneck.encodec                 C      |S r   r   r   r   r   r   r   2   r   zTanhBottleneck.decoder   r   r   r    r   r   r   r"   r   r   r   r   r+   #   s    

r+   c                 C   sZ   t j|d }|| }t|}t| | |  }| |  | | d d }||fS )N-C6?r   )r   r   softplusr0   log
randn_likesummean)r:   scalestdevvarlogvarlatentsklr   r   r   vae_sample_kl6   s   
rA   c                 C   s&   t j|d }t| | |  }|S )Nr5   )r   r   r6   r0   r8   )r:   r;   r<   r?   r   r   r   
vae_sampleB   s   rB   c                       r*   )	VAEBottleneckc                       t  jdd d S r,   r   r   r/   r   r   r   r   J      zVAEBottleneck.__init__Fc                 K   sF   |j ddd\}}|ri }t||\}}||d< ||fS t||}|S )N   r   dimr@   )chunkrA   rB   )r   r   r   r   r:   r;   r2   r@   r   r   r   r   M   s   
zVAEBottleneck.encodec                 C   r3   r   r   r   r   r   r   r   Y   r   zVAEBottleneck.decoder   r4   r   r   r   r   rC   I   s    
rC   c                 C   s>   | d d d f |d    dd| jd  }t|  S )NrG   )powr:   shaper0   exp)r   ykernel_inputr   r   r   compute_mean_kernel]   s   .rQ   c                 C   s\   |  dddd| jd }t|}t||}t||}t||}|| d|  }| S )Nr   rG   r   rK   )permutereshaperM   r0   r8   rQ   r:   )r?   latents_reshapednoiselatents_kernelnoise_kernellatents_noise_kernelmmdr   r   r   compute_mmdb   s   



rZ   c                       s6   e Zd Zd
def fddZdddZdd	 Z  ZS )WassersteinBottleneckr   noise_augment_dimc                    s   t  jdd || _d S r,   )r   r   r\   )r   r\   r   r   r   r   o   s   
zWassersteinBottleneck.__init__Fc                 C   s.   i }| j r|rt|}||d< |r||fS |S )NrY   )trainingrZ   )r   r   r   r2   rY   r   r   r   r   t   s   
zWassersteinBottleneck.encodec                 C   sB   | j dkrt|jd | j |jd |}tj||gdd}|S )Nr   rK   r   rH   )r\   r0   randnrM   type_ascat)r   r   rU   r   r   r   r      s   
zWassersteinBottleneck.decode)r   r   )r   r   r    intr   r   r   r"   r   r   r   r   r[   n   s    
r[   c                       r*   )	L2Bottleneckc                    rD   r,   rE   r/   r   r   r   r      rF   zL2Bottleneck.__init__Fc                 C   s"   i }t j|dd}|r||fS |S Nr   rH   F	normalizer1   r   r   r   r      s
   zL2Bottleneck.encodec                 C   s   t j|ddS rc   rd   r   r   r   r   r      s   zL2Bottleneck.decoder   r4   r   r   r   r   rb      s    

rb   c                       6   e Zd Z fddZd
ddZdd Zdd	 Z  ZS )RVQBottleneckc                    8   t  j|d |d dd tdi || _|d | _d S Nr%   r&   quantizer_indicesr%   r&   r'   r   r   r   r   	quantizerr%   r   quantizer_kwargsr   r   r   r         zRVQBottleneck.__init__Fc                 K   sL   i }t |d}| |\}}}t |d}||d< | |d< |r$||fS |S )Nb c n -> b n cb n c -> b c nrk   quantizer_loss)r   rn   r:   )r   r   r   r   r2   indiceslossr   r   r   r      s   

zRVQBottleneck.encodec                 C   r3   r   r   r   r   r   r   r      r   zRVQBottleneck.decodec                 K      | j |}| j|fi |S r   rn   get_outputs_from_indicesr   r   r(   r   r?   r   r   r   r)         zRVQBottleneck.decode_tokensr   r   r   r    r   r   r   r)   r"   r   r   r   r   rh      s
    
rh   c                       rg   )RVQVAEBottleneckc                    ri   rj   rm   ro   r   r   r   r      rq   zRVQVAEBottleneck.__init__Fc                 C   sj   i }t |jddd \}}||d< t|d}| |\}}}t|d}||d< | |d< |r3||fS |S )	NrG   r   rH   r@   rr   rs   rk   rt   )rB   rJ   r   rn   r:   )r   r   r   r2   r@   ru   rv   r   r   r   r      s   

zRVQVAEBottleneck.encodec                 C   r3   r   r   r   r   r   r   r      r   zRVQVAEBottleneck.decodec                 K   rw   r   rx   rz   r   r   r   r)      r{   zRVQVAEBottleneck.decode_tokensr   r|   r   r   r   r   r}      s
    
r}   c                       s8   e Zd Zd
 fdd	Zd
ddZdd Zdd	 Z  ZS )DACRVQBottleneckFc                    >   t  j|d |d dd tdi || _|d | _|| _d S Nn_codebooksr&   r(   rl   r   r   r   DACResidualVQrn   r%   quantize_on_decoder   r   rp   r   r   r   r         

zDACRVQBottleneck.__init__c                 K   s   i }||d< | j r||r|fS |fS | j|fi |\}}}}}	|||||	d}
|
d  | j  < |
d  | j  < ||
 |rG|
d |fS |
d S )Npre_quantizerzr(   r?   vq/commitment_lossvq/codebook_lossr   r   r   )r   rn   r%   update)r   r   r   r   r2   r   r(   r?   commitment_losscodebook_lossoutputr   r   r   r      s"   
zDACRVQBottleneck.encodec                 C      | j r
| |d }|S Nr   r   rn   r   r   r   r   r         zDACRVQBottleneck.decodec                 K   $   | j |\}}}| j|fi |S r   rn   
from_codesr   r   r(   r   r?   _r   r   r   r)        zDACRVQBottleneck.decode_tokensr   r|   r   r   r   r   r~      s
    
r~   c                       s>   e Zd Zd fdd	ZddefddZdd	 Zd
d Z  ZS )DACRVQVAEBottleneckFc                    r   r   r   r   r   r   r   r     r   zDACRVQVAEBottleneck.__init__Nn_quantizersc                 C   s   i }|j ddd\}}t||\}}||d< ||d< | jr&||r#|fS |fS | j||d\}}	}
}}||	|
||d}|d  | j  < |d	  | j  < || |rY|d
 |fS |d
 S )NrG   r   rH   r   r@   )r   r   r   r   r   )rJ   rB   r   rn   r%   r   )r   r   r   r   r2   r:   r;   r@   r   r(   r?   r   r   r   r   r   r   r     s(   
zDACRVQVAEBottleneck.encodec                 C   r   r   r   r   r   r   r   r   4  r   zDACRVQVAEBottleneck.decodec                 K   r   r   r   r   r   r   r   r)   ;  r   z!DACRVQVAEBottleneck.decode_tokensr   )FN)	r   r   r    r   ra   r   r   r)   r"   r   r   r   r   r     s
    !r   c                       rg   )FSQBottleneckc                    s,   t  jd|| dd t|g| d| _d S )Nr   rk   rl   )levels)r   r   r   rn   )r   rI   r   r   r   r   r   B  s   zFSQBottleneck.__init__Fc                 C   s>   i }t |d}| |\}}t |d}||d< |r||fS |S )Nrr   rs   rk   )r   rn   )r   r   r   r2   ru   r   r   r   r   F  s   

zFSQBottleneck.encodec                 C   r3   r   r   r   r   r   r   r   T  r   zFSQBottleneck.decodec                 K   rw   r   )rn   indices_to_codesr   )r   tokensr   r?   r   r   r   r)   W  r{   zFSQBottleneck.decode_tokensr   r|   r   r   r   r   r   A  s
    
r   )r0   r   torch.nnr   re   einopsr   vector_quantize_pytorchr   r   nn.quantizer   r   Moduler	   r#   r+   jitscriptrA   rB   rC   rQ   rZ   r[   rb   rh   r}   r~   r   r   r   r   r   r   <module>   s.    

"05