o
    ̳iM$                     @   s   d dl Z d dlZd dlZd dlmZ ddlmZ ddlm	Z	m
Z
mZ ddlT ddlmZ ddlmZ d dlmZ dd	lmZ d d
lmZ dd ZG dd dejZG dd dejZdd Zedkrge  dS dS )    N   )
ResidualVQ)WNConv1dDecoderBlockResLSTM)*)activations)blocks)utilsTransformerBlock)RotaryPositionalEmbeddingsc                 C   s6   t | tjrtjj| jdd tj| jd d S d S )Ng{Gz?)stdr   )
isinstancennConv1dinittrunc_normal_weight	constant_biasm r   L/home/ubuntu/.local/lib/python3.10/site-packages/xcodec2/vq/codec_decoder.pyinit_weights   s   r   c                       s   e Zd Z											
					d" fdd	Zd#ddZdd Zdd Zdd Zdd Zdd Z	dd Z
dd Zd d! Z  ZS )$CodecDecoder      0   TF         r#   r#   r    r      	   r            ? @      c              
      s   t    t|| _|| _|| _t|	|
||d|||d| _|}t	||dddg}|r5|t
|||dg7 }t|D ]\}}|d|  }|d|d   }|t||||g7 }q9|ttj|dd	d
t	|ddddt g7 }tj| | _|   d S )Nr    num_quantizersdimcodebook_sizecodebook_dimthreshold_ema_dead_code
commitmentweight_initfull_commit_loss   r%   )kernel_sizepadding)
num_layersbidirectionalr   T)alpha_logscale)
activation)super__init__npprod
hop_lengthngf	up_ratiosr   	quantizerr   r   	enumerater   Activation1dr   	SnakeBetar   Tanh
Sequentialmodelreset_parameters)selfin_channelsupsample_initial_channelr@   use_rnnrnn_bidirectionalrnn_num_layersrA   	dilationsvq_num_quantizersvq_dimvq_commit_weightvq_weight_initvq_full_commit_lossr.   r/   channelslayersistride	input_dim
output_dim	__class__r   r   r<      sD   

zCodecDecoder.__init__c                 C   s0   |du r|  |\}}}|||fS | |}|S )NTrB   rH   rJ   xvqqcommit_lossr   r   r   forwardQ   s
   

zCodecDecoder.forwardc                 C      | j  | _ | j |}|S NrB   evalvq2embrJ   ra   r`   r   r   r   ri   X      zCodecDecoder.vq2embc                 C      | j  | _ | j  }|S rf   rB   rh   get_embrJ   embsr   r   r   rn   ]      
zCodecDecoder.get_embc                 C   $   |d d d d d f }|  |}|S rf   rH   rj   r   r   r   inference_vqb      
zCodecDecoder.inference_vqc                 C   $   |  |\}}}}| |}|d fS rf   r^   rJ   r`   rb   lossperpr   r   r   inference_0g      
zCodecDecoder.inference_0c                 C      |  |}|d fS rf   rs   rJ   r`   r   r   r   	inferencel      
zCodecDecoder.inferencec                 C      dd }|  | dS ):Remove weight normalization module from all of the layers.c                 S   *   z
t jj|  W d S  ty   Y d S w rf   torchr   r
   remove_weight_norm
ValueErrorr   r   r   r   _remove_weight_normt   
   z<CodecDecoder.remove_weight_norm.<locals>._remove_weight_normNapplyrJ   r   r   r   r   r   q      zCodecDecoder.remove_weight_normc                 C   r   )9Apply weight normalization module from all of the layers.c                 S   .   t | tjst | tjrtjj|  d S d S rf   r   r   r   ConvTranspose1dr   r
   weight_normr   r   r   r   _apply_weight_norm      z:CodecDecoder.apply_weight_norm.<locals>._apply_weight_normNr   rJ   r   r   r   r   apply_weight_norm|      zCodecDecoder.apply_weight_normc                 C      |  t d S rf   r   r   rJ   r   r   r   rI         zCodecDecoder.reset_parameters)r   r   r   TFr    r!   r$   r   r'   r(   FFr)   r*   T)__name__
__module____qualname__r<   rd   ri   rn   rt   rz   r~   r   r   rI   __classcell__r   r   r\   r   r      s4    
:	r   c                       s   e Zd Z													
			d) fdd	Zd*ddZdd Zdd Zdd Zdd Zdd Z	dd Z
dd Zdd  Zd!d" Zd#d$ Zd%d& Zd'd( Z  ZS )+ CodecDecoder_oobleck_Transformerr*   r!   r$   r   r   r(   Fr)         @   c              
      s   t    t|| _|| _|| _| _t|||	|
d|||d| _	t
|d fddt|D }tj| | _tjdd| _tj| j| j| j||| j| jd| _|   d S )	Nr    r+   )r-   c                    s   g | ]	}t  d qS ))r-   n_headsrotary_embedr   ).0_heads
hidden_dimtime_rotary_embedr   r   
<listcomp>   s    z=CodecDecoder_oobleck_Transformer.__init__.<locals>.<listcomp>gư>)eps)capacitydilated_unitupsampling_unitratiosrP   pre_network_convpost_network_conv)r;   r<   r=   r>   r?   r   rA   r   r   rB   r   ranger   rG   transformers	LayerNormfinal_layer_normr	   DilatedResidualDecoderr   r   pre_conv	post_convconv_blocksrI   )rJ   r@   rA   rP   rQ   rR   rS   rT   rU   r.   r/   r   depthr   pos_meb_dimtransformer_blocksr\   r   r   r<      s>   

z)CodecDecoder_oobleck_Transformer.__init__Tc                 C   sR   |du r|  |\}}}|||fS | |}| |}|ddd}| |}|S )NTr   r    r   )rB   r   r   permuter   r_   r   r   r   rd      s   



z(CodecDecoder_oobleck_Transformer.forwardc                 C   re   rf   rg   rj   r   r   r   ri      rk   z'CodecDecoder_oobleck_Transformer.vq2embc                 C   rl   rf   rm   ro   r   r   r   rn      rq   z(CodecDecoder_oobleck_Transformer.get_embc                 C   rr   rf   rs   rj   r   r   r   rt      ru   z-CodecDecoder_oobleck_Transformer.inference_vqc                 C   rv   rf   r^   rw   r   r   r   rz      r{   z,CodecDecoder_oobleck_Transformer.inference_0c                 C   r|   rf   rs   r}   r   r   r   r~      r   z*CodecDecoder_oobleck_Transformer.inferencec                 C   r   )r   c                 S   r   rf   r   r   r   r   r   r      r   zPCodecDecoder_oobleck_Transformer.remove_weight_norm.<locals>._remove_weight_normNr   r   r   r   r   r      r   z3CodecDecoder_oobleck_Transformer.remove_weight_normc                 C   r   )r   c                 S   r   rf   r   r   r   r   r   r      r   zNCodecDecoder_oobleck_Transformer.apply_weight_norm.<locals>._apply_weight_normNr   r   r   r   r   r      r   z2CodecDecoder_oobleck_Transformer.apply_weight_normc                 C   r   rf   r   r   r   r   r   rI      r   z1CodecDecoder_oobleck_Transformer.reset_parametersc                 C   s   t j| j|ddS Nr   )rK   out_channelsr5   )r   r   r   )rJ   r   r   r   r   r      s   z)CodecDecoder_oobleck_Transformer.pre_convc                 C   s   t j|dddS r   )r   r   )rJ   rK   r   r   r   r     s   z*CodecDecoder_oobleck_Transformer.post_convc                 C   s   t j||dtjtjdS )Nr%   )r   dilationr5   r:   normalization)r	   DilatedConvolutionalUnitr   ReLUr
   r   )rJ   r   r   r   r   r   r        z-CodecDecoder_oobleck_Transformer.dilated_unitc                 C   s   t j|||tjtjdS )N)rZ   r[   rY   r:   r   )r	   UpsamplingUnitr   r   r
   r   )rJ   rZ   r[   rY   r   r   r   r     r   z0CodecDecoder_oobleck_Transformer.upsampling_unit)r*   r!   r$   r   r   r(   FFr)   r   r   r   r   r   r   )r   r   r   r<   rd   ri   rn   rt   rz   r~   r   r   rI   r   r   r   r   r   r   r   r\   r   r      s:    
9
	
r   c                  C   s   t t j r	dnd} td|   t | }td d}d}d}t |||| }td|j  |	  ||d	d
}d}d S )NcudacpuzUsing device: zModel initialized.r    r   d   zDummy input shape: F)ra   r   )
r   devicer   is_availableprintr   torandnshaperh   )r   rH   
batch_sizerK   sequence_lengthdummy_inputoutput_no_vqcr   r   r   main  s   r   __main__)sysnumpyr=   r   torch.nnr   residual_vqr   moduler   r   r   alias_free_torch r   r	   r
   bs_roformer5r   torchtune.modulesr   r   Moduler   r   r   r   r   r   r   r   <module>   s(    s 
