o
    پi]>                     @   s\  d Z ddlmZmZmZmZmZ ddlZddlmZ ddl	m
Z
 ddlmZ ddlmZ ddlmZmZ dd	lmZmZ dd
lmZ ddlmZ ddlmZmZmZ ddlmZmZ ddl m!Z!m"Z" ddl#m$Z$m%Z% ddl&m'Z'm(Z( ddl)m*Z* de+de,de,fddZ-de,de,de,fddZ.G dd dej/Z0G dd dej/Z1G dd  d ej/Z2e2gZ3dS )!z>Inference-only deci model compatible with HuggingFace weights.    )IterableOptionalTupleTypeUnionN)nn)LlamaConfig)get_pp_group)RMSNorm)LogitsProcessorLogitsProcessorOutput)PoolerPoolingType)QuantizationConfig)PPMissingLayer)DEFAULT_VOCAB_PADDING_SIZEParallelLMHeadVocabParallelEmbedding)ForwardBatchPPProxyTensors)default_weight_loadermaybe_remap_kv_scale_name)LlamaAttentionLlamaMLP)
add_prefixmake_layers)loggerffn_multn_embdreturnc                 C   s   t d|  | d }t|dS )N         )int_find_multiple)r   r   intermediate_size r&   R/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/nemotron_nas.py_ffn_mult_to_intermediate_size-   s   
r(   nkc                 C   s    | | dkr| S | | | |  S )Nr   r&   )r)   r*   r&   r&   r'   r$   3   s   r$   c                       sp   e Zd Z		ddededee deddf
 fdd	Zd
e	j
de	j
dedee	j
 dee	j
e	j
f f
ddZ  ZS )DeciLMDecoderLayerN config	layer_idxquant_configprefixr   c                    sJ  t    |j| }|jj| _|jj| _|j| _t	|dd}t	|dd }|d ur3t	|dd r3|j
|d< t	|dd}t	|dd}	t	|d	d
pJt	|dd
}
t|drS|j}
| js||j|jj }t|| j|j|||||	||td||
d| _t|j|jd| _| js|jj}t||j}t| j||j|td|d| _t|j|jd| _d S d S )N
rope_thetai'  rope_scaling original_max_position_embeddingsmax_position_embeddingsi    rope_is_neox_styleTattention_biasFbiasqkv_bias	self_attn)r-   hidden_size	num_headsnum_kv_headslayer_idr1   r2   r5   r4   r/   r0   r7   epsmlp)r:   r%   
hidden_actr/   r0   )super__init__block_configs	attentionno_op_is_no_op_attentionffn_is_no_op_ffnr:   getattrr3   hasattrr8   num_attention_headsn_heads_in_groupr   r   r9   r
   rms_norm_epsinput_layernormr   r(   r   rA   r@   post_attention_layernorm)selfr-   r.   r/   r0   block_configr1   r2   r4   r5   r6   r<   r   r%   	__class__r&   r'   rC   <   sj   





zDeciLMDecoderLayer.__init__	positionshidden_statesforward_batchresidualc                 C   sh   | j rn|d u r|}| |}n| ||\}}| j|||d}| js0| ||\}}| |}||fS )N)rU   rV   rW   )rG   rO   r9   rI   rP   r@   )rQ   rU   rV   rW   rX   r&   r&   r'   forward   s"   	
zDeciLMDecoderLayer.forwardNr,   )__name__
__module____qualname__r   r#   r   r   strrC   torchTensorr   r   rY   __classcell__r&   r&   rS   r'   r+   :   s2    Dr+   c                       s   e Zd Zddeddedee dedee f fdd	Z	d
e
jde
jfddZ		dd
ee
j de
jdedee
j dee dee
jef fddZ  ZS )	DeciModelNr,   )r/   r0   
layer_typer-   r/   r0   rc   c          	         s   t    d } | _| _ j| _|r|j|jpd nd} j| }t	 j
r2t| j jd| _nt | _dtdtf fdd}t j|t	 jt	 jtd|d	\| _| _| _t	 jrit j jd
| _d S tdd| _d S )N   r   )org_num_embeddingsr/   idxr0   c                    s    | |dS )N)r.   r/   r0   r&   )rf   r0   r-   rc   r/   r&   r'   	get_layer   s   z%DeciModel.__init__.<locals>.get_layerlayers)pp_rankpp_sizer0   r>   T)return_tuple)rB   rC   r-   r/   pad_token_idpadding_idxlora_extra_vocab_size	max_loras
vocab_sizer	   is_first_rankr   r:   embed_tokensr   r#   r^   r   num_hidden_layersrank_in_group
world_sizer   ri   start_layer	end_layeris_last_rankr
   rN   norm)	rQ   r-   r/   r0   rc   lora_config
lora_vocabrq   rh   rS   rg   r'   rC      s:   


zDeciModel.__init__	input_idsr   c                 C   s
   |  |S N)rs   rQ   r}   r&   r&   r'   get_input_embeddings   s   
zDeciModel.get_input_embeddingsrU   rW   inputs_embedspp_proxy_tensorsc                 C   s   t  jr|d ur|}n| |}d }n|d usJ |d }|d }d}t| j| jD ]!}	| j|	 }
|
jsB|
||||\}}|d7 }q*|
||||\}}q*t  jsWt	||dS | 
||\}}|S )NrV   rX   r   rd   )rV   rX   )r	   rr   r   rangerw   rx   ri   rG   ry   r   rz   )rQ   r}   rU   rW   r   r   rV   rX   kv_cache_indexilayer_r&   r&   r'   rY      s2   



zDeciModel.forward)NN)r[   r\   r]   r+   r   r   r   r^   r   rC   r_   r`   r   r   r   r   rY   ra   r&   r&   rS   r'   rb      s:    2rb   c                       s  e Zd Zg dddgdZg dZdddZd	gZd
ddddddddddddd	ddZddddede	e
 def fddZ		d0dede	e
 defdd Zd!ejd"ejfd#d$Ze 		%	d1d!ejd&ejd'ed(e	ej d)ed*e	e d"efd+d,Zd-eeeejf  d"dfd.d/Z  ZS )2DeciLMForCausalLM)q_projk_projv_proj	gate_projup_proj)qkv_projgate_up_proj)r   o_projr   	down_projrs   lm_headinput_embeddingsoutput_embeddings)rs   r   r   zmodel.layersr9   r   r   r   r   rO   r@   r   rP   zmodel.embed_tokensz
model.norm)ri   rE   wqwkwvwoattention_normfeed_forwardw1w2w3ffn_normtok_embeddingsoutputrz   Nr,   )r/   r0   r-   r/   r0   c             	      s   t    d }|| _|| _| j||td|d| _| jjr#| jj| _	n$|j
| _|r1|  j|j7  _t| j|j|j
|s<tn|j|td|d| _	t|| _ttjdd| _d S )Nmodelr-   r/   r0   r   )re   padding_sizer/   r0   T)pooling_type	normalize)rB   rC   r-   r{   _init_modelr   r   tie_word_embeddingsrs   r   rq   unpadded_vocab_sizero   r   r:   r   lora_vocab_padding_sizer   logits_processorr   r   LASTpooler)rQ   r-   r/   r0   r{   rS   r&   r'   rC   (  s0   

zDeciLMForCausalLM.__init__c                 C   s   t |||dS )Nr   )rb   )rQ   r-   r/   r0   r&   r&   r'   r   N  s   zDeciLMForCausalLM._init_modelr}   r   c                 C   s   | j |S r~   )r   r   r   r&   r&   r'   r   V  s   z&DeciLMForCausalLM.get_input_embeddingsFrU   rW   r   get_embeddingr   c                 C   sB   | j |||||d}t jr|s| ||| j|S | ||S |S )N)r   )r   r	   ry   r   r   r   )rQ   r}   rU   rW   r   r   r   rV   r&   r&   r'   rY   Y  s   

zDeciLMForCausalLM.forwardweightsc                 C   sx  g d}t |  }|D ]\}}d|v rqd|v sd|v rq| jjr'd|v r'q| jjd urR| jj| }rR|| }t|dt}|	 dkrH|n|d }||| qd|v r`t
||}|d u r`q|D ]-\}	}
}|
|vrlqb||
|	}|d	r|||vr|qb||vrqb|| }|j}||||  n)|d	r||vrq|| v r|| }t|dt}||| qtd
| d qd S )N))	.qkv_projz.q_projq)r   z.k_projr*   )r   z.v_projv).gate_up_projz
.gate_projr   )r   z.up_projrd   zrotary_emb.inv_freqzrotary_emb.cos_cachedzrotary_emb.sin_cachedzlm_head.weightweight_loaderr   scalez.biasz
Parameter z not found in params_dict)dictnamed_parametersr-   r   r   r/   get_cache_scalerJ   r   dimr   replaceendswithr   keysr   warning)rQ   r   stacked_params_mappingparams_dictnameloaded_weight
scale_nameparamr   
param_nameweight_nameshard_idr&   r&   r'   load_weightst  sZ   	

zDeciLMForCausalLM.load_weightsrZ   )NFN)r[   r\   r]   packed_modules_mappingsupported_lora_modulesembedding_modulesembedding_padding_modulesmistral_mappingr   r   r   r^   rC   r   r_   r`   r   no_gradr   boolr   r   rY   r   r   r   ra   r&   r&   rS   r'   r      s    	)
(r   )4__doc__typingr   r   r   r   r   r_   r   transformersr   sglang.srt.distributedr	   sglang.srt.layers.layernormr
   "sglang.srt.layers.logits_processorr   r   sglang.srt.layers.poolerr   r   sglang.srt.layers.quantizationr   sglang.srt.layers.utilsr   *sglang.srt.layers.vocab_parallel_embeddingr   r   r   ,sglang.srt.model_executor.forward_batch_infor   r   $sglang.srt.model_loader.weight_utilsr   r   sglang.srt.models.llamar   r   sglang.srt.utilsr   r   sglang.utilsr   floatr#   r(   r$   Moduler+   rb   r   
EntryClassr&   r&   r&   r'   <module>   s0   f_ 
6