o
    ei=                     @   s^   d Z ddlZddlmZ ddlm  mZ ddlmZ G dd dej	Z
G dd dej	ZdS )z_
Gumbel Softmax implementation with multiple groups possible.

Authors
 * Rudolf A. Braun 2022
    N)vector_normc                       s0   e Zd ZdZ fddZdd Zdd Z  ZS )GumbelVectorQuantizera  Vector quantization using gumbel softmax. Copied from fairseq implementation.
    Arguments
    ---------
    input_dim: int
        Input dimension (channels).
    num_vars: int
        Number of quantized vectors per group.
    temp_tuple: float
        Temperature for training. this should be a tuple of 3 elements: (start, stop, decay factor).
    groups: int
        Number of groups for vector quantization.
    vq_dim: int
        Dimensionality of the resulting quantized vector.

    Example
    -------
    >>> quantiser = GumbelVectorQuantizer(128, 100, (2.0, 0.25, 0.999995,), 2, 50 )
    >>> inputs = torch.rand(10, 12, 128)
    >>> output = quantiser(inputs)
    >>> output["x"].shape
    torch.Size([10, 12, 50])
    c                    s  t    || _|| _|| _|| _|| dks"J d| d| d|| }tt	d|| || _
tj| j
 t| j|| | _tjj| jjddd tj| jj t|dksaJ ||\| _| _| _| j| _tjttt| j| j dd	| _d S )
Nr   zdim z must be divisible by groups z for concatenation   )meanstd   F)requires_grad)super__init__groups	input_dimnum_varsvq_dimnn	ParametertorchFloatTensorvarsinituniform_Linearweight_projnormal_weightzeros_biaslenmax_tempmin_temp
temp_decay	curr_templogtensorfloatmax_ent)selfr   r   
temp_tupler   r   var_dim	__class__ Y/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/speechbrain/nnet/quantisers.pyr
   &   s.   
zGumbelVectorQuantizer.__init__c                 C   s   t | j| j|  | j| _dS )z-Update the temperature given the current stepN)maxr   r   r   r    )r%   stepsr*   r*   r+   update_tempF   s   
z!GumbelVectorQuantizer.update_tempc                 C   s  | j | j | jd}|j\}}}|d|}| |}||| | j d}|d\}}|j|j 	d|ddd|| | jd}t
j| dd}	t
t
j|	t
|	d  dd  |d< t
j||| | jd ddjdd}
t
t
j|
t
|
d  dd  |d	< | j|d
< | jrtj| | jdd|}n|}||| d}| j}|d| }||| | j| j d}|d}|||d}||d< |S )6Forward the latent vector to obtain a quantised output)r   tempr   g      ?r   dimgHz>code_perplexityprob_perplexr0   T)tauhardx)r   r   r    shapereshaper   viewr,   	new_zerosscatter_r   r   r#   expsumr!   softmaxtrainingFgumbel_softmaxtype_asr   	unsqueeze)r%   r9   resultbsztszfsz_khard_x
hard_probs	avg_probsr   r*   r*   r+   forwardL   sX   




zGumbelVectorQuantizer.forward)__name__
__module____qualname____doc__r
   r.   rP   __classcell__r*   r*   r(   r+   r      s
     r   c                       s(   e Zd ZdZ fddZdd Z  ZS )RandomProjectionQuantizera  Vector quantization using a projection and a randomly initialised codebook
    this is useful for models like BEST-RQ for instance.

    The output is the indices of the closest code in the codebook for each
    time step of the input.

    ref: https://arxiv.org/pdf/2202.01855

    Arguments
    ---------
    input_dim: int
        Input dimension (channels).
    cb_dim: int
        Size of each code in the codebook.
    cb_vocab: int
        Number of codes in the codebook

    Example
    -------
    >>> quantiser = RandomProjectionQuantizer(16, 16, 32)
    >>> inputs = torch.rand(10, 12, 16)
    >>> output = quantiser(inputs)
    >>> output.shape
    torch.Size([10, 12])
    c              	      s\   t    || _|| _|| _t||f}| dtj	
| | dtt|| d S )NPCB)r	   r
   r   cb_dimcb_vocabr   emptyregister_bufferr   r   xavier_uniform_rC   	normalizerandn)r%   r   rY   rZ   P_initr(   r*   r+   r
      s   
z"RandomProjectionQuantizer.__init__c                 C   s:   t j|| j dd}t| jd|d ddjddS )r/      r2   r   r1   )rC   r^   rW   r   rX   rF   argmin)r%   r9   r*   r*   r+   rP      s   z!RandomProjectionQuantizer.forward)rQ   rR   rS   rT   r
   rP   rU   r*   r*   r(   r+   rV      s    rV   )rT   r   torch.nnr   torch.nn.functional
functionalrC   torch.linalgr   Moduler   rV   r*   r*   r*   r+   <module>   s    q