o
    }oi@M                     @   s   d Z ddlmZ ddlZddlZddlmZ ddlm  m	Z
 ddlmZ ddlmZmZmZmZmZmZ G dd dejZG dd	 d	ejZG d
d dejZG dd dejZG dd dejZdS )z5Quantizers for discrete image and video tokenization.    )OptionalN)reduce)defaultentropypack_one	rearrange	round_ste
unpack_onec                       sZ   e Zd ZdZdee def fddZdejdejfdd	Z	d
ejdejfddZ
  ZS )ResidualFSQuantizerzjResidual Finite Scalar Quantization

    Follows Algorithm 1. in https://arxiv.org/pdf/2107.03312.pdf
    levelsnum_quantizersc                    s<   t    |dtj| _t fddt|D | _	d S )Ndtypec                    s   g | ]}t  d qS )r   )FSQuantizer).0_r    o/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/common/video_tokenizers/modules/quantizers.py
<listcomp>.       z0ResidualFSQuantizer.__init__.<locals>.<listcomp>)
super__init__gettorchfloat32r   nn
ModuleListrangelayers)selfr   r   ignore_kwargs	__class__r   r   r   +   s   
"zResidualFSQuantizer.__init__xreturnc                 C   s   g }|}d}d}t | jD ]\}}||\}}	}
|| ||	  }||	 }||
 }q|| _tj|dd}||| j|| jfS )Nr      dim)		enumerater   appenddetachresidualr   stacktor   )r   r#   indices_stackr+   quantized_outloss_outilayerquant_indiceszlossindicesr   r   r   forward0   s   

zResidualFSQuantizer.forwardr.   c                 C   s4   d}t | j|ddD ]\}}|||7 }q|S )Nr   r%   )zipr   	transposeindices_to_codes)r   r.   r/   r2   r6   r   r   r   r:   ?   s   z$ResidualFSQuantizer.indices_to_codes)__name__
__module____qualname____doc__listintr   r   Tensorr7   r:   __classcell__r   r   r!   r   r
   %   s
    r
   c                
       s   e Zd ZdZ				d dee dee dee dee f fdd	Z	d!de
jdede
jfddZde
jde
jfddZde
jde
jfddZde
jde
jfddZde
jde
jfddZd"de
jde
jfddZde
jde
jfddZ  ZS )#r   a  Finite Scalar Quantization: VQ-VAE Made Simple - https://arxiv.org/abs/2309.15505

    Code adapted from Jax version in Appendix A.1.

    Adapted from: https://github.com/lucidrains/vector-quantize-pytorch/blob/9502a1f447876d53fd37685b226bf28f250dc4a3/
    vector_quantize_pytorch/finite_scalar_quantization.py
    [Copyright (c) 2020 Phil Wang]
    https://github.com/lucidrains/vector-quantize-pytorch/blob/9502a1f447876d53fd37685b226bf28f250dc4a3/LICENSE
    Nr%   r   r'   keep_num_codebooks_dimscalec                    sR  t    |dtj| _tj|tjd}| jd|dd tj	tdg|d d  dtjd	}| jd
|dd || _
t|}	|	| _|	| }
|| _|
| _t||dk}|dkr[|s[J || _t|t|| | _| j|
k}|rvt| j|
nt | _|rt|
| jnt | _|| _| j  | _| jt| jdd}| jd|dd d S )Nr   )r   _levelsF
persistentr%   r   )r'   r   _basis)project_outimplicit_codebook)r   r   r   r   r   r   tensorint32register_buffercumprodrD   lencodebook_dimnum_codebookseffective_codebook_dimr   rC   r'   r   LinearIdentity
project_inrJ   has_projectionsrE   proditemcodebook_sizer:   arange)r   r   r'   rR   rC   rD   r    rE   rI   rQ   rS   rW   rK   r!   r   r   r   Q   s.   
	&
zFSQuantizer.__init__MbP?r4   epsr$   c                 C   sN   | j d d|  d }t| j d dkdd}||  }||  | | S )z&Bound `z`, an array of shape (..., d).r%      r   g      ?        )rE   r   whereatanhtanh)r   r4   r]   half_loffsetshiftr   r   r   bound{   s   zFSQuantizer.boundc                 C   s    t | |}| jd }|| S )z5Quantizes z, returns quantized zhat, same shape as z.r^   )r   rf   rE   )r   r4   	quantized
half_widthr   r   r   quantize   s   
zFSQuantizer.quantizezhat_normalizedc                 C   s   | j d }|| | S Nr^   rE   )r   rj   rh   r   r   r   _scale_and_shift      
zFSQuantizer._scale_and_shiftzhatc                 C   s   | j d }|| | S rk   rl   )r   ro   rh   r   r   r   _scale_and_shift_inverse   rn   z$FSQuantizer._scale_and_shift_inversec                 C   s<   |j d | jks
J | | }|| j jddtjS )z.Converts a `code` to an index in the codebook.rH   r&   )	shaperQ   rm   floatrI   sumr-   r   rM   )r   ro   r   r   r   codes_to_indices   s   zFSQuantizer.codes_to_indicesTr6   c                 C   sp   |j dt| j k}t|d}|| j | j }| |}| jr$t|d}|r+| |}|r2t|d}|| j	S )zInverse of `codes_to_indices`.   z... -> ... 1z... c d -> ... (c d)b ... d -> b d ...)
ndimr@   rC   r   rI   rE   rp   rJ   r-   r   )r   r6   rJ   is_img_or_videocodes_non_centeredcodesr   r   r   r:      s   




zFSQuantizer.indices_to_codesc                 C   s  |j dk}|rt|d}t|d\}}|jd | jks)J d| j d|jd  | |}t|d| jd}| |}| |}t|d	}| 	|}|rjt
||d}t|d
}t
||d}t|jg ddd}nt|jddgddd}| jst|d}||| j|fS )z
        einstein notation
        b - batch
        n - sequence (or flattened spatial dimensions)
        d - feature dimension, which is also log2(codebook size)
        c - number of codebook dim
           b d ... -> b ... db * drH   zexpected dimension of z but found dimension of b n (c d) -> b n c dcb n c d -> b n (c d)rv   b * cr%   r^   ru   Tr'   keepdimr%   r^   ... 1 -> ...)rw   r   r   rq   r'   rV   rR   ri   rt   rJ   r	   r   
zeros_likemean	unsqueezerC   r-   r   )r   r4   rx   psrz   r6   out
dummy_lossr   r   r   r7      s(   

,






zFSQuantizer.forward)Nr%   NN)r\   )T)r;   r<   r=   r>   r?   r@   r   boolrr   r   r   rA   rf   ri   rm   rp   rt   r:   r7   rB   r   r   r!   r   r   F   s,    *r   c                       sn   e Zd ZdZ						ddeded	ed
edededef fddZdd Z	dd Z
dddZdd Z  ZS )VectorQuantizeru  Improved version over VectorQuantizer. Mostly
    avoids costly matrix multiplications and allows for post-hoc remapping of indices.

    Adapted from: https://github.com/CompVis/taming-transformers/blob/3ba01b241669f5ade541ce990f7650a3b8f65318/
    taming/modules/vqvae/quantize.py

    [Copyright (c) 2020 Patrick Esser and Robin Rombach and Björn Ommer]
    https://github.com/CompVis/taming-transformers/blob/3ba01b241669f5ade541ce990f7650a3b8f65318/License.txt
          ?NrandomFTnum_embeddingsembedding_dimbetaremapunknown_indexsane_index_shapelegacyc	           
         s   t    || _|| _|| _|| _ fdd| _t| j| j| _	| j	j
jd| j d| j  || _| jd urp| dtt| j | jjd | _|| _| jdkr^| j| _| jd | _td	| j d
| j d| j d n|| _|| _|	dtj| _d S )Nc                    s    r	t j| ddS | S )NrH   r&   )F	normalize)r#   use_normr   r   <lambda>   r   z*VectorQuantizer.__init__.<locals>.<lambda>g            ?usedr   extrar%   z
Remapping z indices to z indices. Using z for unknown indices.r   )r   r   n_ee_dimr   r   normr   	Embedding	embeddingweightdatauniform_r   rN   r   rL   nploadr   rq   re_embedr   printr   r   r   r   )
r   r   r   r   r   r   r   r   r   r    r!   r   r   r      s0   


zVectorQuantizer.__init__c                 C   s   |j }t|dksJ ||d d}| j|}|d d d d d f |d k }|d}|ddk }| jdkrOt	j
d| j|| j dj|jd||< n| j||< ||S )	Nr%   r   rH   )NN.r^   r   )sizedevice)rq   rP   reshaper   r-   longargmaxrs   r   r   randintr   r   )r   indsishaper   matchnewunknownr   r   r   remap_to_used  s   "

(

zVectorQuantizer.remap_to_usedc                 C   s   |j }t|dksJ ||d d}| j|}| j| jj d kr,d||| jj d k< t|d d d f |j d dg d d f d|}||S )Nr%   r   rH   )rq   rP   r   r   r-   r   r   gather)r   r   r   r   backr   r   r   unmap_to_all  s   2
zVectorQuantizer.unmap_to_allc              	   C   sV  |d u s|dksJ d|du sJ d|du sJ dt |d }|d| j}tj|d ddd	tj| jjd dd
 dtd|t | jjd  }tj	|dd

d}tj|jd | j|jd}|d|d t|| jj|j}	d }
| |	| |}	}tj|	|  d g ddd	}tj|	 | d g ddd	}| js| j| | }n|| j|  }||	|   }	tj|dd
}tt|t|d   }t |	d }	| jd ur|d|jd d}| |d}|dd}| jr||	jd |	jd |	jd }|	||d|
|  | j|   |  ffS )Nr   z)Only for interface compatible with GumbelFzb c h w -> b h w crH   r^   r%   Tr   r&   z	bd,dn->bnz
n d -> d nr   r   r   g|=zb h w c -> b c h wru   )r   
contiguousviewr   r   rs   r   r   einsumargminr   zerosrq   r   r   scatter_matmulr   r   r*   r   r   explogr   squeezer   r   r   )r   r4   temprescale_logitsreturn_logitsz_flatteneddencoding_indices	encodingsz_qmin_encodingscommit_lossemb_lossr5   	avg_probs
perplexitymin_encoding_indicesr   r   r   r7      s^      
 

zVectorQuantizer.forwardc                 C   sb   | j d ur||d d}| |}|d}| |}|d ur/||}|dddd }|S )Nr   rH   ru   r%   r^   )r   r   r   r   r   permuter   )r   r6   rq   r   r   r   r   get_codebook_entry_  s   




z"VectorQuantizer.get_codebook_entry)r   Nr   FTF)NFF)r;   r<   r=   r>   r@   rr   strr   r   r   r   r7   r   rB   r   r   r!   r   r      s6    (

?r   c                       sd   e Zd ZdZddddddded	ed
ee dedef
 fddZdde	j
dede	j
fddZ  ZS )LFQuantizera`  Lookup-Free Quantization

    Adapted from: https://github.com/lucidrains/vector-quantize-pytorch/blob/9502a1f447876d53fd37685b226bf28f250dc4a3/
    vector_quantize_pytorch/lookup_free_quantization.py
    [Copyright (c) 2020 Phil Wang]
    https://github.com/lucidrains/vector-quantize-pytorch/blob/9502a1f447876d53fd37685b226bf28f250dc4a3/LICENSE
    Ng?r   g{Gz?F)	embed_dimentropy_loss_weightcommitment_loss_weightdefault_tempentropy_lossrZ   rQ   r   r   r   c                   s  t    || _|| _|| _|| _|| _|p|}||k}	|	r$t||nt	 | _
|	r1t||nt	 | _|dtj| _|rd| |ksJJ d|| _| jddt|d dd dd | jd	td
dd t|}
|
d  | j@ dk }d| d }| jd|dd dS dS )a  Lookup-Free Quantization

        Args:
            codebook_size (int): The number of entries in the codebook.
            codebook_dim (int): The number of bits in each code.
            embed_dim (Optional[int], optional): The dimension of the input embedding. Defaults to None.
            entropy_loss_weight (float, optional): Whether to use entropy loss. Defaults to 0.1.
            commitment_loss_weight (float, optional): Weight for commitment loss. Defaults to 0.25.
            default_temp (float, optional): The temprature to use. Defaults to 0.01.
            entropy_loss (bool, optional): Flag for entropy loss. Defaults to False.
        r   r^   z'codebook size must be 2 ** codebook_dimmaskr%   rH   FrF   zeror_   ).Nr   r   codebookN)r   r   r   rQ   r   entrop_loss_weightr   r   rT   rU   rV   rJ   r   r   r   r   rZ   rN   r[   rL   r@   r   rr   )r   rZ   rQ   r   r   r   r   r   r    rW   	all_codesbitsr   r!   r   r   r   z  s2   

zLFQuantizer.__init__r4   r   r$   c              	   C   s  |p| j }t|d}t|d\}}| |}t|d| jd}|}t|}t|dk|| }|||   }||  d j	g dd}t|d	}| 
|}t||d}t|d
}| j| }| jrt|dk | j  dd}	t|	|d}	t|	d}	dtd|| j|j }
|
 | jdd}t|j	ddgd}t|dd}t|	 }|| }|| j| 7 }||ddd|	| j|	   | j|	   | j|	   | j|	   ffS ||ddd| j|	   fS )Nr|   r}   r~   r   r   r^   r   r&   r   rv   zb n c d -> b n crs   r   r   z... i d, j d -> ... i jrH   r%   z... c d -> c dr   )r   r   r   rV   rR   r   	ones_liker`   r*   r   rJ   r	   r   r   r   r@   r   r   r   r-   r   softmaxr   r   r   )r   r4   r   r   original_inputcodebook_valuer   r   r5   r6   distanceprobper_sample_entropyavg_probcodebook_entropyentropy_aux_lossr   r   r   r7     sV   








zLFQuantizer.forwardN)r;   r<   r=   r>   r@   r   rr   r   r   r   rA   r7   rB   r   r   r!   r   r   q  s&    	$6r   c                       s0   e Zd ZdZ fddZdejfddZ  ZS )InvQuantizerJitz<Use for decoder_jit to trace quantizer in discrete tokenizerc                    s   t    || _d S r   )r   r   	quantizer)r   r   r!   r   r   r     s   

zInvQuantizerJit.__init__r6   c                 C   s   | j |}|| j jS r   )r   r:   r-   r   )r   r6   rz   r   r   r   r7     s   zInvQuantizerJit.forward)	r;   r<   r=   r>   r   r   rA   r7   rB   r   r   r!   r   r     s    r   )r>   typingr   numpyr   r   torch.nnr   torch.nn.functional
functionalr   einopsr   6nemo.collections.common.video_tokenizers.modules.utilsr   r   r   r   r   r	   Moduler
   r   r   r   r   r   r   r   r   <module>   s     
!   