o
    ´©i?  ã                   @   sø   d dl mZ ddlZddlmZ ddlmZmZ ddlZd dl	m
Z
 dejdejdefd	d
„ZG dd„ dejƒZdd„ Zdejdejdejdejfdd„ZG dd„ dejƒZG dd„ dejƒZG dd„ dejƒZG dd„ dejƒZG dd„ dejƒZdS )é   )ÚMiniCPM4Configé    N)ÚListÚTuple)ÚStaticKVCacheÚhiddenÚweightÚepsc                 C   sB   | j }|  tj¡ d¡jddd}| t || ¡  |¡} | | S )Né   éÿÿÿÿT)ÚdimÚkeepdim)ÚdtypeÚtoÚtorchÚfloat32ÚpowÚmeanÚrsqrt)r   r   r	   Ú	old_dtypeÚvariance© r   úQ/home/ubuntu/.local/lib/python3.10/site-packages/voxcpm/modules/minicpm4/model.pyÚrms_layernorm	   s   r   c                       s&   e Zd Zd‡ fdd„	Zdd„ Z‡  ZS )ÚMiniCPMRMSNormçíµ ÷Æ°>c                    s&   t ƒ  ¡  t t |¡¡| _|| _dS )z=
        MiniCPMRMSNorm is equivalent to T5LayerNorm
        N)ÚsuperÚ__init__ÚnnÚ	Parameterr   Úonesr   Úvariance_epsilon)ÚselfÚhidden_sizer	   ©Ú	__class__r   r   r      s   

zMiniCPMRMSNorm.__init__c                 C   s   t || j| jƒS ©N)r   r   r!   )r"   Úhidden_statesr   r   r   Úforward   s   zMiniCPMRMSNorm.forward)r   ©Ú__name__Ú
__module__Ú__qualname__r   r(   Ú__classcell__r   r   r$   r   r      s    r   c                 C   s&   | j ddd\}}tj| |fddS )z*Rotates half the hidden dims of the input.r
   r   ©r   )Úchunkr   Úcat)ÚxÚx1Úx2r   r   r   Úrotate_half   s   r4   ÚqÚkÚcosÚsinc                 C   sZ   | j }|  tj¡} | tj¡}| | t| ƒ|  }|| t|ƒ|  }| |¡| |¡fS )aa  
    Args:
        q: Tensor(batch_size, num_heads, seq_len, head_dim)
        k: Tensor(batch_size, num_key_value_heads, seq_len, head_dim)
        cos: Tensor(seq_len, head_dim)
        sin: Tensor(seq_len, head_dim)
    Returns:
        Tensor(batch_size, num_heads, seq_len, head_dim), Tensor(batch_size, num_key_value_heads, seq_len, head_dim)
    )r   r   r   r   r4   )r5   r6   r7   r8   Ú
orig_dtypeÚq_embedÚk_embedr   r   r   Úapply_rotary_pos_emb#   s   
r<   c                       sN   e Zd ZdZdef‡ fdd„Zdd„ Zdejde	ejejf fd	d
„Z
‡  ZS )ÚMiniCPMLongRoPEzoMiniCPMRotaryEmbedding extended with Dynamic NTK scaling. Credits to the Reddit users /u/bloc97 and /u/emozillaÚconfigc                    s  t ƒ  ¡  || _|jr|jn|j|j | _|j| _|j	| _	|j
j| _|j
j| _|j
j| _| j	| j }t dt |¡t | j¡  ¡| _d| jt d| jd¡ ¡ | j   }| jd|dd d| _| jdt d¡dd | jd	t d¡dd | j| j	| jjtjd
 d S )Nr   ç      ð?r   r
   Úinv_freqF)Ú
persistentÚ
cos_cachedÚ
sin_cached)Úseq_lenÚdevicer   )r   r   r>   Úkv_channelsr#   Únum_attention_headsr   Ú
rope_thetaÚbaseÚmax_position_embeddingsÚrope_scalingÚshort_factorÚlong_factorÚ original_max_position_embeddingsÚmathÚsqrtÚlogÚscaling_factorr   ÚarangeÚfloatÚregister_bufferÚmax_seq_len_cachedÚemptyÚ_set_cos_sin_cacher@   rE   r   )r"   r>   Úscaler@   r$   r   r   r   8   s,   



ÿ$
ýzMiniCPMLongRoPE.__init__c                 C   sÂ   || _ tj| j || jjd}|| jkrtj| jtj|d}n
tj| j	tj|d}t 
t |d| ¡j|d| jj|d |¡¡}tj||fdd}| ¡  |¡| j | _| ¡  |¡| j | _dS )u   è®¾ç½®coså’Œsinç¼“å­˜)rE   r   ©r   rE   r?   ©rE   r   r.   N)rV   r   rS   r@   r   rN   ÚtensorrM   r   rL   ÚmulÚouterr   r0   r7   rR   rB   r8   rC   )r"   rD   rE   r   ÚtÚext_factorsÚfreqsÚembr   r   r   rX   U   s   
þz"MiniCPMLongRoPE._set_cos_sin_cacheÚposition_idsÚreturnc                 C   s   | j | }| j| }||fS )u³   
        Args:
            position_ids: Tensor(seq_len) æˆ– Tensor(batch_size, seq_len)
        Returns:
            Tensor(seq_len, head_dim), Tensor(seq_len, head_dim)
        )rB   rC   )r"   rc   r7   r8   r   r   r   r(   j   s   

zMiniCPMLongRoPE.forward)r*   r+   r,   Ú__doc__r   r   rX   r   ÚTensorr   r(   r-   r   r   r$   r   r=   5   s
    (r=   c                       sž   e Zd Zdedef‡ fdd„Zdejdeejejf de	deejeejejf f fd	d
„Z
dejdeejejf dedeejejf dejf
dd„Z‡  ZS )ÚMiniCPMAttentionr>   Ú	layer_idxc                    sÜ   t ƒ  ¡  || _|| _|j| _|j| _|jd u r|j|j n|j| _|j	| _	| j| j	 | _
|j| _d| _tj| j| j| j dd| _tj| j| j	| j dd| _tj| j| j	| j dd| _tj| j| j | jdd| _d S )Ng     ˆÃ@F©Úbias)r   r   r>   rh   r#   rG   Ú	num_headsrF   Úhead_dimÚnum_key_value_headsÚnum_key_value_groupsrJ   rH   r   ÚLinearÚq_projÚk_projÚv_projÚo_proj©r"   r>   rh   r$   r   r   r   x   s   
 zMiniCPMAttention.__init__r'   Úposition_embÚ	is_causalrd   c                 C   s
  |  ¡ \}}}|  |¡}|  |¡}|  |¡}	| ||| j| j¡ dd¡}| ||| j| j¡ dd¡}|	 ||| j| j¡ dd¡}	|\}
}t	|||
|ƒ\}}| 
¡ }| 
¡ }|	 
¡ }	tjjj|||	|dd}| dd¡ 
¡ }| ||| j| j ¡}|  |¡}||	f}||fS )Nr   r
   T)rv   Ú
enable_gqa)Úsizerp   rq   rr   Úviewrk   rl   Ú	transposerm   r<   Ú
contiguousr   r   Ú
functionalÚscaled_dot_product_attentionÚreshapers   )r"   r'   ru   rv   ÚbszÚq_lenÚ_Úquery_statesÚ
key_statesÚvalue_statesr7   r8   Úattn_outputÚpast_key_valuer   r   r   r(   ‰   s0   


û
zMiniCPMAttention.forwardÚposition_idÚkv_cachec                 C   sT  |  ¡ \}}|  |¡}|  |¡}|  |¡}	| |d| j| j¡ dd¡}| |d| j| j¡ dd¡}|	 |d| j| j¡ dd¡}	|\}
}t	|||
|ƒ\}}|\}}||d d …d d …|d d …f< |	|d d …d d …|d d …f< t
j|  d¡|jd|k}| ¡ }| ¡ }| ¡ }t
jjj||||dd}| dd¡ ¡ }| || j| j ¡}|  |¡}|S )Nr   r
   r[   T)Ú	attn_maskrw   )rx   rp   rq   rr   ry   rk   rl   rz   rm   r<   r   rS   rE   r{   r   r|   r}   r~   rs   )r"   r'   ru   r‡   rˆ   r   r   r‚   rƒ   r„   r7   r8   Ú	key_cacheÚvalue_cacher‰   r…   r   r   r   Úforward_step²   s6   


û
zMiniCPMAttention.forward_step©r*   r+   r,   r   Úintr   r   rf   r   Úboolr(   rŒ   r-   r   r   r$   r   rg   w   s,    þýü
û)þýüûúrg   c                       s$   e Zd Z‡ fdd„Zdd„ Z‡  ZS )Ú
MiniCPMMLPc                    sp   t ƒ  ¡  || _|j| _|j| _tj| j| jdd| _tj| j| jdd| _tj| j| jdd| _	t 
¡ | _d S )NFri   )r   r   r>   r#   Úintermediate_sizer   ro   Ú	gate_projÚup_projÚ	down_projÚSiLUÚact_fn©r"   r>   r$   r   r   r   ã   s   
zMiniCPMMLP.__init__c                 C   s    |   |  |  |¡¡|  |¡ ¡S r&   )r”   r–   r’   r“   )r"   r1   r   r   r   r(   í   s    zMiniCPMMLP.forwardr)   r   r   r$   r   r   â   s    
r   c                       s    e Zd Zdedef‡ fdd„Zdejdeejejf de	deejeejejf f fd	d
„Z
dejdeejejf dejdeejejf dejf
dd„Z‡  ZS )ÚMiniCPMDecoderLayerr>   rh   c                    sj   t ƒ  ¡  |j| _t||d| _t|ƒ| _t|j|jd| _	t|j|jd| _
|j| _|j| _|j| _d S )N)r>   rh   ©r	   )r   r   r#   rg   Ú	self_attnr   Úmlpr   Úrms_norm_epsÚinput_layernormÚpost_attention_layernormÚscale_depthÚnum_hidden_layersÚuse_muprt   r$   r   r   r   ò   s   

zMiniCPMDecoderLayer.__init__r'   ru   rv   rd   c                 C   sœ   |}|   |¡}| j|||d\}}| jr"||| jt | j¡   }n|| }|}|  |¡}|  |¡}| jrF||| jt | j¡   }||fS || }||fS )a$  
        Args:
            hidden_states (`torch.FloatTensor`): input to the layer of shape `(batch, seq_len, embed_dim)`
            position_ids (`torch.LongTensor`): position ids of shape `(batch_size, seq_len)`
            is_causal (`bool`): whether the attention mask is causal
        )r'   ru   rv   )	r   rš   r¡   rŸ   rO   rP   r    rž   r›   )r"   r'   ru   rv   ÚresidualÚpresent_key_valuer   r   r   r(   ÿ   s$   

ý

þzMiniCPMDecoderLayer.forwardr‡   rˆ   c                 C   s”   |}|   |¡}| jj||||d}| jr"||| jt | j¡   }n|| }|}|  |¡}|  	|¡}| jrD||| jt | j¡   }|S || }|S )N)r'   ru   r‡   rˆ   )
r   rš   rŒ   r¡   rŸ   rO   rP   r    rž   r›   )r"   r'   ru   r‡   rˆ   r¢   r   r   r   rŒ   %  s&   
ü

þz MiniCPMDecoderLayer.forward_stepr   r   r   r$   r   r˜   ñ   s,    þýü
û&þýüûúr˜   c                       s   e Zd ZdZdef‡ fdd„Z	ddejdede	eje
e	ejejf  f fd	d
„Zdejdejdejfdd„Zdededejfdd„Z‡  ZS )ÚMiniCPMModelzŸ
    Transformer decoder consisting of *config.num_hidden_layers* layers. Each layer is a [`MiniCPMDecoderLayer`]

    Args:
        config: MiniCPMConfig
    r>   c                    s†   t ƒ  ¡  ˆ j| _ˆ | _ˆ jdkrt ˆ jˆ j¡| _nt ¡ | _t 	‡ fdd„t
ˆ jƒD ƒ¡| _tˆ jˆ jd| _tˆ ƒ| _d | _d S )Nr   c                    s   g | ]}t ˆ |ƒ‘qS r   )r˜   )Ú.0rh   ©r>   r   r   Ú
<listcomp>[  s    z)MiniCPMModel.__init__.<locals>.<listcomp>r™   )r   r   Ú
vocab_sizer>   r   Ú	Embeddingr#   Úembed_tokensÚIdentityÚ
ModuleListÚranger    Úlayersr   rœ   Únormr=   Úrope_embrˆ   r—   r$   r¦   r   r   P  s   


ÿ

zMiniCPMModel.__init__TÚinputs_embedsrv   rd   c           	      C   sf   t jd| d¡t j|jd}|  |¡}|}g }| jD ]}||||ƒ\}}| |¡ q|  |¡}||fS )a{  
        Args:
            inputs_embeds: Tensor(batch_size, seq_length, hidden_size)
            is_causal: bool, whether the attention mask is causal
        Returns:
            hidden_states: Tensor(batch_size, seq_length, hidden_size)
            next_decoder_cache: List[(batch_size, num_heads, seq_length, head_dim), (batch_size, num_heads, seq_length, head_dim)]
        r   r   rZ   )	r   rS   rx   ÚlongrE   r°   r®   Úappendr¯   )	r"   r±   rv   rc   ru   r'   Únext_decoder_cacheÚdecoder_layerÚ
this_cacher   r   r   r(   c  s   

ý
zMiniCPMModel.forwardr‡   c              	   C   sZ   | j dus	J dƒ‚|  |¡}|}t| jƒD ]\}}| |||| j  |¡¡}q|  |¡}|S )zž
        Args:
            inputs_embeds: Tensor(batch_size, hidden_size)
        Returns:
            hidden_states: Tensor(batch_size, hidden_size)
        NzKV cache is not setup)rˆ   r°   Ú	enumerater®   rŒ   Úget_layer_cacher¯   )r"   r±   r‡   ru   r'   Úirµ   r   r   r   rŒ     s   

ü
zMiniCPMModel.forward_stepÚ
batch_sizeÚ
max_lengthr   c              	   C   sD   t | jj| jj| jjd u r| jj| jj n| jj||||d| _d S )N)Ú
num_layersÚnum_kv_headsÚdim_kv_headrº   rE   r   r»   )r   r>   r    rm   rF   r#   rG   rˆ   )r"   rº   r»   rE   r   r   r   r   Úsetup_cacheœ  s   "ùzMiniCPMModel.setup_cache)T)r*   r+   r,   re   r   r   r   rf   r   r   r   r(   rŒ   rŽ   r   r¿   r-   r   r   r$   r   r¤   H  s&    ýþý
üþý
ü r¤   )r>   r   r   Útorch.nnr   Útypingr   r   rO   Úcacher   rf   rT   r   ÚModuler   r4   r<   r=   rg   r   r˜   r¤   r   r   r   r   Ú<module>   s    "BkW