o
    ̳i                     @   s   d dl Z d dlmZ d dlm  mZ d dlmZmZ d dlZd dl	m
Z
 d dlZd dlmZ G dd de jjZG dd dejZG d	d
 d
ejZG dd dejZedkrxeddZeddedZe dddZeeZeej dZdS dS )    N)Module
ModuleList)	rearrange)RotaryPositionalEmbeddingsc                       s0   e Zd Zddedef fddZdd Z  ZS )	RMSNormư>dimepsc                    s&   t    || _tt|| _dS )z<https://github.com/meta-llama/llama/blob/main/llama/model.pyN)super__init__r	   nn	Parametertorchonesweight)selfr   r	   	__class__ K/home/ubuntu/.local/lib/python3.10/site-packages/xcodec2/vq/bs_roformer5.pyr      s   
zRMSNorm.__init__c                 C   s2   t j|d ddd}|t || j  | j }|S )N   T)r   keepdim)r   meanrsqrtr	   r   )r   xnorm_xoutputr   r   r   forward   s   zRMSNorm.forward)r   )__name__
__module____qualname__intfloatr   r   __classcell__r   r   r   r   r      s    r   c                       s.   e Zd Zdeddf fddZdd Z  ZS )MLPr   returnNc                    sD   t    tj|d| dd| _t | _tjd| |dd| _d S )N   Fbias)r
   r   r   Linearfc1SiLUsilufc2)r   r   r   r   r   r      s   

zMLP.__init__c                 C   s"   |  |}| |}| |}|S N)r+   r-   r.   r   r   r   r   r   r   $   s   


zMLP.forward)r   r    r!   r"   r   r   r$   r   r   r   r   r%      s    r%   c                       s2   e Zd Zdededef fddZdd Z  ZS )	Attentionr   n_headsrotary_embedc                    sv   t    || dksJ || _|| _|| _ttjjd| _	| j	s%J dtj
|d| dd| _tj
||dd| _d S )Nr   scaled_dot_product_attentionzMust have flash attention.   Fr(   )r
   r   r2   r   r3   hasattrr   r   
functionalflashr*   c_attnc_projr   r   r2   r3   r   r   r   r   -   s   
zAttention.__init__c           	      C   sx   |  \}}}t| |dd| jd\}}}| |}| |}| jr0tjjj	|||dddd}t|d}| 
|}|S )	z
        Args:
            x: (b, t, h*d)

        Constants:
            b: batch_size
            t: time steps
            r: 3
            h: heads_num
            d: heads_dim
        zb t (r h d) -> r b h t dr5   )rhNr   F)	attn_mask	dropout_p	is_causalzb h t d -> b t (h d))sizer   r9   r2   r3   r8   r   r   r7   r4   r:   )	r   r   BTCqkvyr   r   r   r   <   s   



zAttention.forward)r   r    r!   r"   r   r   r   r$   r   r   r   r   r1   +   s    r1   c                       s:   e Zd Zdededef fddZdejfddZ  Z	S )	TransformerBlockr   r2   r3   c                    sJ   t    || _|| _t|| _t|| _t|||d| _t	|d| _
d S )Nr   r2   r3   r   )r
   r   r   r2   r   att_normffn_normr1   attr%   mlpr;   r   r   r   r   \   s   


zTransformerBlock.__init__r   c                 C   s,   ||  | | }|| | | }|S r/   )rN   rL   rO   rM   r0   r   r   r   r   h   s   zTransformerBlock.forward)
r   r    r!   r"   r   r   r   Tensorr   r$   r   r   r   r   rI   [   s
    rI   __main__   rK   i      rJ   r      )r   torch.nnr   torch.nn.functionalr7   Fr   r   
torchaudioeinopsr   numpynptorchtune.modulesr   r   r%   r1   rI   r   rotary_embed_128transformer_blockrandnr   rH   printshapecr   r   r   r   <module>   s0    0

