o
    }oi                     @   sf   d dl Z d dlm  mZ d dl mZ d dlmZ d dlmZ d dl	m
Z
mZmZ G dd deZdS )    N)nn)jasper_activations)NeuralModule)EncodedRepresentationLossType
NeuralTypec                       s^   e Zd Z			d fdd	Zdd Zdd Zd	d
 Zedd Zedd Z	dddZ
  ZS )GumbelVectorQuantizergelu   c                    sZ  t    |_|_|_|_|_|| dks%J d| d| d|| }|s-|nd}tt	
d|| |_tjj |	dkrutd   fddj|
 tjg fd	d
t|	d D t|| R  _ntj|| _tjjjjddd tjjj t|dksJ d|\___j_d_dS )a  Vector quantization using gumbel softmax

        Args:
            dim: input dimension (channels)
            num_vars: number of quantized vectors per group
            temp: temperature for training. this should be a tuple of 3 elements: (start, stop, decay factor)
            groups: number of groups for vector quantization
            combine_groups: whether to use the vectors for all groups
            vq_dim: dimensionality of the resulting quantized vector
            time_first: if true, expect input in BxTxC format, otherwise in BxCxT
            activation: what activation to use (should be a module). this is only used if weight_proj_depth is > 1
            weight_proj_depth: number of layers (with activation in between) to project input before computing logits
            weight_proj_factor: this is used only if weight_proj_depth is > 1. scales the inner dimensionality of
                                projections by this factor
        r   zdim z must be divisible by groups z for concatenationr
   r	   c                    s   t t | | S N)r   
SequentialLinear)	input_dim
output_dim)
activation h/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/asr/parts/submodules/ssl_quantizers.pyblockM   s   z-GumbelVectorQuantizer.__init__.<locals>.blockc                    s$   g | ]} |d krj nqS )r   )r   ).0i)r   	inner_dimselfr   r   
<listcomp>R   s   $ z2GumbelVectorQuantizer.__init__.<locals>.<listcomp>)meanstd   zQQuantize temperature should be a tuple of 3 elements: (start, stop, decay factor)N)super__init__groupscombine_groupsr   num_vars
time_firstr   	ParametertorchFloatTensorvarsinituniform_r   r   ranger   weight_projnormal_weightzeros_biaslenmax_tempmin_temp
temp_decay	curr_tempcodebook_indices)r   dimr    tempr   r   vq_dimr!   r   weight_proj_depthweight_proj_factorvar_dim
num_groups	__class__)r   r   r   r   r   r      s6   
"

zGumbelVectorQuantizer.__init__c                 C   s   t | j| j|  | j| _d S r   )maxr/   r1   r0   r2   )r   num_updatesr   r   r   set_num_updates`   s   z%GumbelVectorQuantizer.set_num_updatesc                 C   s   | j d u rVddlm} t| jg| j }t|| }tj|tj	| j
jd | _ | jsV| j | j| j d| _ td| jD ]}| j d d |f  | j| 7  < q=| j  | _ | j S )Nr   )product)dtypedevicer
   )r3   	itertoolsr@   r(   r    r   listr#   tensorlongr%   rB   flattenr   view)r   r@   pindsbr   r   r   get_codebook_indicesc   s   
"z*GumbelVectorQuantizer.get_codebook_indicesc                 C   s   |   }|d| j}|d}||k sJ d| d| tjd||| fd}|| }| jdd|	 ||d}|S )NrC   r   zsample size z" is greater than size of codebook )lowhighsize)
rM   rI   r   rP   r#   randintr%   squeezeindex_selectrH   )r   rL   nindicescb_size
sample_idxzr   r   r   sample_from_codebookr   s   
"z*GumbelVectorQuantizer.sample_from_codebookc                 C   s&   | j rdtdt iS dtdt iS )z*Returns definitions of module input ports.xBTDr\   r^   r]   )r!   r   r   r   r   r   r   input_types}   s   z!GumbelVectorQuantizer.input_typesc                 C   s:   | j rtdt tt ddS tdt tt ddS )z+Returns definitions of module output ports.r[   )elements_type)rZ   quantize_prob_pplr_   )r!   r   r   r   r`   r   r   r   output_types   s   



z"GumbelVectorQuantizer.output_typesFc                 C   s  | j s	|dd}|j\}}}|d|}| |}||| | j d}|d\}}|j|j 	d|ddd|| | jd}| j
| j }	tj||| | jd ddjdd}
ttj|
t|
d  dd  }|	| |	 }| jrtj| | jdd	|}n|}||| d}| j}| jr|d| jd}|d| }||| | j| j
d}|d
}|||d}| j}| j s|dd}|r|d||d}||| }t| jD ]}|| j
9 }||d d d d |f 7 }q||||fS |||fS )Nr
      rC   g      ?)r4   r   gHz>T)tauhard)r!   	transposeshapereshaper)   rI   r   r=   	new_zerosscatter_r    r#   softmaxfloatr   expsumlogtrainingFgumbel_softmaxr2   type_asr%   r   repeat	unsqueezeargmaxrG   r(   )r   rZ   
return_idsbsztszfsz_khard_xr    	avg_probsrc   r%   cur_codebook_temp
hard_x_max
target_idsr   r   r   r   forward   sD   
.*(


zGumbelVectorQuantizer.forward)r	   r
   r
   )F)__name__
__module____qualname__r   r?   rM   rY   propertyra   rd   r   __classcell__r   r   r;   r   r      s    
B

r   )r#   torch.nn.functionalr   
functionalrt   ,nemo.collections.asr.parts.submodules.jasperr   	nemo.corer   nemo.core.neural_typesr   r   r   r   r   r   r   r   <module>   s   