o
    i                     @   s   d dl Z d dlmZ d dlmZ d dlmZ G dd de jjZG dd dejZ	G dd	 d	ejZ
G d
d dejZedkr_eddZeddedZe dddZeeZeej dZdS dS )    N)	rearrange)RotaryPositionalEmbeddingsc                       s0   e Zd Zddedef fddZdd Z  ZS )	RMSNormư>dimepsc                    s&   t    || _tt|| _dS )z<https://github.com/meta-llama/llama/blob/main/llama/model.pyN)super__init__r   nn	Parametertorchonesweight)selfr   r   	__class__ I/home/ubuntu/.local/lib/python3.10/site-packages/neucodec/bs_roformer5.pyr	      s   
zRMSNorm.__init__c                 C   s2   t j|d ddd}|t || j  | j }|S )N   T)r   keepdim)r   meanrsqrtr   r   )r   xnorm_xoutputr   r   r   forward   s   zRMSNorm.forward)r   )__name__
__module____qualname__intfloatr	   r   __classcell__r   r   r   r   r      s    r   c                       s.   e Zd Zdeddf fddZdd Z  ZS )MLPr   returnNc                    sD   t    tj|d| dd| _t | _tjd| |dd| _d S )N   Fbias)r   r	   r
   Linearfc1SiLUsilufc2)r   r   r   r   r   r	      s   

zMLP.__init__c                 C   s"   |  |}| |}| |}|S N)r)   r+   r,   r   r   r   r   r   r      s   


zMLP.forward)r   r   r   r    r	   r   r"   r   r   r   r   r#      s    r#   c                       s2   e Zd Zdededef fddZdd Z  ZS )	Attentionr   n_headsrotary_embedc                    sv   t    || dksJ || _|| _|| _ttjjd| _	| j	s%J dtj
|d| dd| _tj
||dd| _d S )Nr   scaled_dot_product_attentionzMust have flash attention.   Fr&   )r   r	   r0   r   r1   hasattrr   r
   
functionalflashr(   c_attnc_projr   r   r0   r1   r   r   r   r	   $   s   
zAttention.__init__c           	      C   sx   |  \}}}t| |dd| jd\}}}| |}| |}| jr0tjjj	|||dddd}t|d}| 
|}|S )	z
        Args:
            x: (b, t, h*d)

        Constants:
            b: batch_size
            t: time steps
            r: 3
            h: heads_num
            d: heads_dim
        zb t (r h d) -> r b h t dr3   )rhNr   F)	attn_mask	dropout_p	is_causalzb h t d -> b t (h d))sizer   r7   r0   r1   r6   r   r
   r5   r2   r8   )	r   r   BTCqkvyr   r   r   r   5   s   



zAttention.forward)r   r   r   r    r   r	   r   r"   r   r   r   r   r/   #   s    r/   c                       s:   e Zd Zdededef fddZdejfddZ  Z	S )	TransformerBlockr   r0   r1   c                    sJ   t    || _|| _t|| _t|| _t|||d| _t	|d| _
d S )Nr   r0   r1   r   )r   r	   r   r0   r   att_normffn_normr/   attr#   mlpr9   r   r   r   r	   Y   s   


zTransformerBlock.__init__r   c                 C   s,   ||  | | }|| | | }|S r-   )rL   rJ   rM   rK   r.   r   r   r   r   e   s   zTransformerBlock.forward)
r   r   r   r    r   r	   r   Tensorr   r"   r   r   r   r   rG   X   s    rG   __main__   rI   i      rH   r      )r   torch.nnr
   einopsr   torchtune.modulesr   Moduler   r#   r/   rG   r   rotary_embed_128transformer_blockrandnr   rF   printshapecr   r   r   r   <module>   s$    5

