o
    ̳i&                     @   s  d dl Z d dlZd dlmZ d dlZddlmZmZmZ ddl	T ddl
mZ ddlmZ d dlmZ dd	l
mZ d d
lmZ dd ZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZdd Zedkre ZedddZeeZe dej! dS dS )    N)nn   )WNConv1dEncoderBlockResLSTM)*)activationsTransformerBlock)RotaryPositionalEmbeddings)blocks)utilsc                 C   s6   t | tjrtjj| jdd tj| jd d S d S )Ng{Gz?)stdr   )
isinstancer   Conv1dinittrunc_normal_weight	constant_biasm r   L/home/ubuntu/.local/lib/python3.10/site-packages/xcodec2/vq/codec_encoder.pyinit_weights   s   r   c                       sT   e Zd Z							d fdd		Zd
d Zdd Zdd Zdd Zdd Z  Z	S )CodecEncoder0   TF   r   r      r      r      	      c              	      s   t    t|| _|| _|| _|}td|dddg| _t	|D ]\}	}
|d9 }|  jt
||
|dg7  _q!|rF|  jt|||dg7  _|  jttj|dd	d
t||dddg7  _tj| j | _|| _|   d S )Nr      r"   kernel_sizepaddingr   stride	dilations)
num_layersbidirectionalTalpha_logscale
activation)super__init__npprod
hop_lengthngf	up_ratiosr   block	enumerater   r   Activation1dr   	SnakeBetar   
Sequentialenc_dimreset_parameters)selfr7   use_rnnrnn_bidirectionalrnn_num_layersr8   r+   out_channelsd_modelir*   	__class__r   r   r3      s.   
zCodecEncoder.__init__c                 C   s   |  |}|S Nr9   )r@   xoutr   r   r   forward=   s   
zCodecEncoder.forwardc                 C   
   |  |S rI   rJ   r@   rK   r   r   r   	inferenceA      
zCodecEncoder.inferencec                 C      dd }|  | dS ):Remove weight normalization module from all of the layers.c                 S   *   z
t jj|  W d S  ty   Y d S w rI   torchr   r   remove_weight_norm
ValueErrorr   r   r   r   _remove_weight_normG   
   z<CodecEncoder.remove_weight_norm.<locals>._remove_weight_normNapplyr@   rY   r   r   r   rW   D      zCodecEncoder.remove_weight_normc                 C   rR   )9Apply weight normalization module from all of the layers.c                 S   "   t | tjrtjj|  d S d S rI   r   r   r   rV   r   weight_normr   r   r   r   _apply_weight_normR      z:CodecEncoder.apply_weight_norm.<locals>._apply_weight_normNr[   r@   rc   r   r   r   apply_weight_normO      zCodecEncoder.apply_weight_normc                 C      |  t d S rI   r\   r   r@   r   r   r   r?   X      zCodecEncoder.reset_parameters)r   TFr   r   r!   r$   
__name__
__module____qualname__r3   rM   rP   rW   rf   r?   __classcell__r   r   rG   r   r      s    )	r   c                       s$   e Zd Z fddZdd Z  ZS )	Transposec                    s   t t|   || _|| _d S rI   )r2   rq   r3   dim1dim2)r@   rr   rs   rG   r   r   r3   ]   s   
zTranspose.__init__c                 C   s   | | j| jS rI   )	transposerr   rs   rO   r   r   r   rM   b      zTranspose.forward)rm   rn   ro   r3   rM   rp   r   r   rG   r   rq   \   s    rq   c                       sX   e Zd Zdg ddddddf fdd	Zd	d
 Zdd Zdd Zdd Zdd Z  Z	S )CodecEncoder_Transformerr   r   r!   r$      @   c                    s   t    t|| _|| _|| _|}td|dddg| _t	|D ]\}	}
|d9 }|  jt
||
|dg7  _q!tj| j | _ttj|ddd	t||dddg| _tj| j | _|   d S )
Nr   r%   r"   r&   r   r)   Tr.   r0   )r2   r3   r4   r5   r6   r7   r8   r   conv_blocksr:   r   r   r=   r;   r   r<   conv_final_blockr?   )r@   r7   r8   r+   
hidden_dimdepthheadspos_meb_dimrE   rF   r*   rG   r   r   r3   f   s   
	z!CodecEncoder_Transformer.__init__c                 C   s&   |  |}| |}|ddd}|S Nr   r   r   )ry   rz   permuterO   r   r   r   rM      s   

z CodecEncoder_Transformer.forwardc                 C   rN   rI   rJ   rO   r   r   r   rP      rQ   z"CodecEncoder_Transformer.inferencec                 C   rR   )rS   c                 S   rT   rI   rU   r   r   r   r   rY      rZ   zHCodecEncoder_Transformer.remove_weight_norm.<locals>._remove_weight_normNr[   r]   r   r   r   rW      r^   z+CodecEncoder_Transformer.remove_weight_normc                 C   rR   )r_   c                 S   r`   rI   ra   r   r   r   r   rc      rd   zFCodecEncoder_Transformer.apply_weight_norm.<locals>._apply_weight_normNr[   re   r   r   r   rf      rg   z*CodecEncoder_Transformer.apply_weight_normc                 C   rh   rI   ri   rj   r   r   r   r?      rk   z)CodecEncoder_Transformer.reset_parametersrl   r   r   rG   r   rv   e   s    .
	rv   c                       s   e Zd Z							d fdd		Zd
d Zdd Zdd Zdd Zdd Zdd Z	de
de
de
fddZdd Zdd Z  ZS ) Codec_oobleck_Transformer    r   r!   r$   rw      rx   c           	   	      s   t    t|| _|| _|| _| _tj	|| j
| j||| j| jd| _t|d fddt|D }tj| | _tjdd| _|   d S )N)capacitydilated_unitdownsampling_unitratiosr+   pre_network_convpost_network_convdimc                       g | ]	}t  d qS )r   n_headsrotary_embedr	   .0_r}   r{   time_rotary_embedr   r   
<listcomp>       z6Codec_oobleck_Transformer.__init__.<locals>.<listcomp>ư>eps)r2   r3   r4   r5   r6   r7   r8   r{   r   DilatedResidualEncoderr   r   pre_conv	post_convry   r   ranger   r=   transformers	LayerNormfinal_layer_normr?   )	r@   r7   r8   r+   r{   r|   r}   r~   transformer_blocksrG   r   r   r3      s*   
	
z"Codec_oobleck_Transformer.__init__c                 C   s0   |  |}|ddd}| |}| |}|S r   )ry   r   r   r   rO   r   r   r   rM      s
   


z!Codec_oobleck_Transformer.forwardc                 C   rN   rI   rJ   rO   r   r   r   rP      rQ   z#Codec_oobleck_Transformer.inferencec                 C   rR   )rS   c                 S   rT   rI   rU   r   r   r   r   rY      rZ   zICodec_oobleck_Transformer.remove_weight_norm.<locals>._remove_weight_normNr[   r]   r   r   r   rW      r^   z,Codec_oobleck_Transformer.remove_weight_normc                 C   rR   )r_   c                 S   r`   rI   ra   r   r   r   r   rc      rd   zGCodec_oobleck_Transformer.apply_weight_norm.<locals>._apply_weight_normNr[   re   r   r   r   rf      rg   z+Codec_oobleck_Transformer.apply_weight_normc                 C   rh   rI   ri   rj   r   r   r   r?     rk   z*Codec_oobleck_Transformer.reset_parametersc                 C   s   t j||dtjtjdS )Nr"   )r'   r1   normalization)r   DilatedConvolutionalUnitr   ReLUr   rb   )r@   r{   dilationr   r   r   r        z&Codec_oobleck_Transformer.dilated_unit	input_dim
output_dimr*   c                 C   s   t j|||tjtjdS )N)r   )r   DownsamplingUnitr   r   r   rb   )r@   r   r   r*   r   r   r   r     r   z+Codec_oobleck_Transformer.downsampling_unitc                 C   s   t d|dS Nr   )r   r   )r@   rD   r   r   r   r     rk   z"Codec_oobleck_Transformer.pre_convc                 C   s   t || jdS r   )r   r   r{   )r@   in_channelsr   r   r   r     ru   z#Codec_oobleck_Transformer.post_conv)r   r   r!   r$   rw   r   rx   )rm   rn   ro   r3   rM   rP   rW   rf   r?   r   intr   r   r   rp   r   r   rG   r   r      s$    )	r   c                       s4   e Zd Zd fdd	Zdejdejfd	d
Z  ZS )CodecEncoder_only_Transformerr$   rw   r   rx   c                    sR   t    |}t|d fddt|D }tj| | _tjdd| _d S )Nr   c                    r   r   r	   r   r   r   r   r   &  r   z:CodecEncoder_only_Transformer.__init__.<locals>.<listcomp>r   r   )	r2   r3   r   r   r   r=   r   r   r   )r@   r{   r|   r}   r~   r   rG   r   r   r3     s   

z&CodecEncoder_only_Transformer.__init__rK   returnc                 C   s   |  |}| |}|S rI   )r   r   rO   r   r   r   rM   0  s   

z%CodecEncoder_only_Transformer.forward)r$   rw   r   rx   )rm   rn   ro   r3   rV   TensorrM   rp   r   r   rG   r   r     s    r   c                 C   s*   t dd |  D }|}|d }||fS )Nc                 s   s    | ]}|  V  qd S rI   )numel)r   pr   r   r   	<genexpr>A  s    z!get_model_size.<locals>.<genexpr>i   )sum
parameters)modeltotal_paramsmodel_size_bytesmodel_size_mbr   r   r   get_model_size?  s   r   __main__i>  zOutput shape:)"sysrV   r   numpyr4   moduler   r   r   alias_free_torch r   bs_roformer5r
   torchtune.modulesr   r   torch.nnr   r   Moduler   rq   rv   r   r   r   rm   r   randnrK   outputprintshaper   r   r   r   <module>   s0    I	Uc"