o
    پi                     @   sR  d dl Z d dlZd dlZd dlmZmZmZ d dlZd dlmZ d dl	m
Z
 d dlmZmZmZ d dlmZ d dlmZmZmZ d dlmZ d d	lmZmZmZmZ d d
lmZ d dlmZ d dl m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z)m*Z*m+Z+m,Z, d dl-m.Z.m/Z/ d dl0m1Z1 d dl2m3Z3 d dl4m5Z5 d dl6m7Z7 d dl8m9Z9 e:e;Z<G dd dej=Z>G dd dej=Z?de@de@deAdejBdejCf
d d!ZDd"d# ZEG d$d% d%e)ZFG d&d' d'ej=ZGG d(d) d)ej=ZHG d*d+ d+ej=ZIG d,d- d-ej=ZJeKe5d.ZLd/eMd0eeM d1eNdeeMeOeM eNf fd2d3ZPG d4d5 d5eJZQeJeQgZRdS )6    N)IterableOptionalTuple)nn)PretrainedConfig)get_tensor_model_parallel_rank$get_tensor_model_parallel_world_size tensor_model_parallel_all_reduce)
GeluAndMul)fused_dual_residual_rmsnormfused_rmsnormgelu_and_mul_triton)RMSNorm)MergedColumnParallelLinearQKVParallelLinearReplicatedLinearRowParallelLinear)LogitsProcessor)FusedMoE)fused_moe_router_shim)TopK)QuantizationConfig)RadixAttention)RotaryEmbedding_yarn_find_correction_range_yarn_get_mscaleget_rope)ParallelLMHeadVocabParallelEmbedding)get_is_capture_mode)ForwardBatch)DefaultModelLoader)default_weight_loader)
add_prefixc                       sV   e Zd Z					ddedededee d	ed
ededdf fddZdd Z	  Z
S )Grok1MLPN TFhidden_sizeintermediate_sizelayer_idquant_configprefixuse_presharded_weightssplit_gate_upreturnc	           	   	      s`   t    t||gd d|td||d| _t||d|td|||d| _tdd| _|| _	d S )	N   Fgate_up_proj)biasr)   r*   r+   	down_proj)r0   r)   r*   reduce_resultsr+   tanh)approximate)
super__init__r   r#   r/   r   r1   r
   act_fnr(   )	selfr&   r'   r(   r)   r*   r2   r+   r,   	__class__ J/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/grok.pyr6   D   s(   
	
zGrok1MLP.__init__c                 C   s,   |  |\}}t|\}}| |\}}|S N)r/   r   r1   )r8   xgate_up_r;   r;   r<   forwarde   s   zGrok1MLP.forward)Nr%   TFF)__name__
__module____qualname__intr   r   strboolr6   rA   __classcell__r;   r;   r9   r<   r$   C   s0    	
!r$   c                       s   e Zd Z								ddedededed	ed
edeej dee dee de	de	de	de	de
f fddZdejdejfddZ  ZS )Grok1MoENTFr%   configr(   num_expertstop_kr&   r'   params_dtyper)   tp_sizer2   r+   inplace
no_combiner*   c                    sr   t    || _t||dtjd d| _d| _t	t
| j}t|d||d| _t|||||||d|
|||d| _d S )NF)r0   rM   r)         >@)rL   renormalizer(   custom_routing_functiongelu)rK   rL   r(   r&   r'   rM   r)   
activationr2   r+   rO   rP   )r5   r6   r&   r   torchfloat32gaterouter_logit_softcapping	functoolspartialr   r   topkr   experts)r8   rJ   r(   rK   rL   r&   r'   rM   r)   rN   r2   r+   rO   rP   r*   rS   r9   r;   r<   r6   m   sB   
zGrok1MoE.__init__hidden_statesr-   c                 C   s   |  || jj}| ||S r=   )r\   rX   weightr]   )r8   r^   topk_outputr;   r;   r<   rA      s   zGrok1MoE.forward)NNNTFTFr%   )rB   rC   rD   r   rE   r   rV   dtyper   rG   rF   r6   TensorrA   rH   r;   r;   r9   r<   rI   l   sN    		
7rI   lowhighdimra   r-   c                 C   s<   | |kr| d8 } t j||d|  ||   }t |dd}|S )NgMbP?ra   r      )rV   arangeclamp)rc   rd   re   ra   linear_func	ramp_funcr;   r;   r<   _yarn_linear_ramp_mask   s
   rl   c           	   	   C   sx   t | dd }|r:t | dd }t | dd }t | dd}t | dd}t | dd}t | d	d
}|||||||tjd}|S d S )N	rope_type original_max_position_embeddingsscaling_factorextrapolation_factor      ?attn_factor	beta_fast    	beta_slowrg   )extra_methodmax_position_embeddingsro   rp   rr   rs   ru   ra   )getattrrV   bfloat16)	rJ   rm   rn   ro   rp   rr   rs   ru   rope_scalingr;   r;   r<   get_rope_scaling   s*   
r{   c                       s   e Zd ZdZdddddddededed	ed
ededejde	dededededdf fddZ
dedejfddZdejfddZ  ZS )ScalingRotaryEmbeddingz\Scale the RotaryEmbedding in a way similar to YaRN method. https://arxiv.org/pdf/2309.00071.yarn_logrg   rt   )rv   rp   rr   rs   ru   	head_size
rotary_dimrw   baseis_neox_stylero   ra   rv   rp   rr   rs   ru   r-   Nc                   sR   || _ || _|	| _|
| _|| _|| _tt| j |
 | _t	 
|||||| d S r=   )ro   rv   rp   rr   rs   ru   floatr   mscaler5   r6   )r8   r~   r   rw   r   r   ro   ra   rv   rp   rr   rs   ru   r9   r;   r<   r6      s   zScalingRotaryEmbedding.__init__c                 C   sh  | j tjd| jdtjd| j  }d| }d||  }t| j| j| j| j | j\}}dt	||| jd tjd | j
 }| jdv rD|}|S | jdv rU|d|  ||  }|S | jdkrott|| t|d|   }|S | jd	krtjd| jdtjd}	| j t| j| j dtj  t| jdtj    }
tjd|
|	| j   tjd}|S td
| j )Nr   r.   rf   rq   rg   )original)yarnyarn_linearr}   theta_scalezUnknown extrapolation method: )r   rV   rh   r   r   r   rs   ru   rw   rl   rp   rv   explogmathro   pitensorrW   
ValueError)r8   ro   	pos_freqsinv_freq_extrapolationinv_freq_interpolationrc   rd   inv_freq_maskinv_freq	exponentstheta_scale_exponentr;   r;   r<   _compute_inv_freq   s`   	




z(ScalingRotaryEmbedding._compute_inv_freqc                 C   sX   |  | j}tj| j| j tjd}td||}| }| }tj	||fdd}|S )Nrf   z	i,j -> ijre   )
r   ro   rV   rh   rw   rW   einsumcossincat)r8   r   tfreqsr   r   cacher;   r;   r<   _compute_cos_sin_cache  s   z-ScalingRotaryEmbedding._compute_cos_sin_cache)rB   rC   rD   __doc__rE   rG   r   rV   ra   rF   r6   rb   r   r   rH   r;   r;   r9   r<   r|      sH    
/r|   c                       s   e Zd Z								dded	ed
edededededee dedee	j
j dededdf fddZde	jde	jdede	jfddZ  ZS )Grok1Attentionr      '  NTFr%   rJ   r&   	num_headsnum_kv_headsr(   max_position
rope_thetar)   r2   
alt_streamload_presharded_attnr*   r-   c                    sF  t    || _|| _|| _t }t }|| _| j| dks J | j| | _|| _	| j	|kr8| j	| dks7J n	|| j	 dksAJ t
d| j	| | _t|dd| _| j| j | _| j| j | _| jd | _|| _t|}|| _|
putj | _t|| j| j| j	d|||| jtd|d
| _t| j| j |d||	||| jtd	|d
	| _t| j| j|t| jdd| _t|dd| _ |d urt!| jf| j s| jn| jd t| jdd|| _d}nt| j| j s| jn| jd |t| jdd| _d}t
t|ddd}t|dd}t"| j| j| j| j|||||td|d
| _#t| jdd| j#_$d S )Nr   rg   head_dim   g      Fqkv_proj)r0   r)   tp_rankrN   r   r*   o_proj)r0   r)   r2   r   rN   r+   r*   T)r   r   r   r   rope_rotate_half_dimsr.   )r   r   r   NONEattn_logit_softcappingrQ   g        attn_logit_softcapping_methodr3   attn)r   r(   	logit_capr)   pos_encoding_modelogit_capping_methodr*   attn_temperature_lenr   )%r5   r6   rJ   r(   r&   r   r   total_num_headsr   total_num_kv_headsmaxr   rx   r   q_sizekv_sizescalingr   r{   r   rV   cudaStreamr   r   r#   r   r   r   r   rE   
rotary_embr   r|   r   r   xai_temperature_len)r8   rJ   r&   r   r   r(   r   r   r)   r2   r   r   r*   attn_tp_rankattn_tp_sizerz   r   r   r   r9   r;   r<   r6   +  s   


	zGrok1Attention.__init__	positionsr^   forward_batchc                 C   sb   |  |\}}|j| j| j| jgdd\}}}| |||\}}| ||||}	| |	\}
}|
S )Nr   r   )r   splitr   r   r   r   r   )r8   r   r^   r   qkvr@   qkvattn_outputoutputr;   r;   r<   rA     s    zGrok1Attention.forward)r   r   r   NTNFr%   )rB   rC   rD   r   rE   r   r   r   rG   rV   r   r   rF   r6   rb   r    rA   rH   r;   r;   r9   r<   r   *  sZ    	

xr   c                       s   e Zd Z								ddededee ded	ed
edeej	j
 dededdf fddZ		ddejdejdedeej dee deejejef fddZdd Z  ZS )Grok1DecoderLayerr   NFr%   rJ   r(   r)   load_presharded_moer   load_presharded_mlpr   skip_moer*   r-   c
                    s  t    |j _|j _t|dd _| _|ptj	
  _t|dd}
t| j|jt|dr3|jn|j|j||
|d j|td|	d _t|dd	 } jd
krt|||j|j|jt|dt|dd | j |ddtd|	d _ jrt|j|j|d|||d _nt t|j|jd _t|j|jd _t|j|jd _t|j|jd _  jd
krΈ jrt! dkr fdd _"d S  j# _"d S  j _"d S t )Nresidual_moeFr   r   context_lenr   )rJ   r&   r   r   r   r(   r   r)   r2   r   r   r*   merge_gate_upTr   moe_intermediate_sizer'   block_sparse_moe)rJ   r(   rK   rL   r&   r'   r)   r2   r+   rO   rP   r*   )r&   r'   r)   r2   r+   r(   r,   epsrg   c                    s   t  | S r=   )r	   moe_with_rmoe)r>   r8   r;   r<   <lambda>  s    z,Grok1DecoderLayer.__init__.<locals>.<lambda>)$r5   r6   num_local_expertsrK   r&   rx   r   r(   rV   r   r   r   r   num_attention_headshasattrr   rw   num_key_value_headsr#   	self_attnrI   num_experts_per_tokr   r$   r'   mlpNotImplementedErrorr   rms_norm_epspre_attn_normpost_attn_normpre_moe_normpost_moe_normr   ffnr   )r8   rJ   r(   r)   r   r   r   r   r   r*   r   r,   r9   r   r<   r6     s   





zGrok1DecoderLayer.__init__r   r^   r   residualdeferred_normc                 C   s   |}|}|d ur|d usJ t |||j| jj|j\}}nt|| jj| jj|}}| j|||d}t dkr;t|}t ||| jj| j	j| jj\}}| 
|}||| jfS )N)r   r^   r   rg   )r   r_   r   variance_epsilonr   r   r   r	   r   r   r   r   )r8   r   r^   r   r   r   hidden_states_originalresidual_originalr;   r;   r<   rA     sD   	

	

	zGrok1DecoderLayer.forwardc                 C   s   | j d ur;t r;tj }| j | | |}tj| j  | |}W d    n1 s/w   Y  || j  n
| |}| |}|| d S )Ng;f?)	r   r   rV   r   current_streamwait_streamr   streamr   )r8   r>   r   
mlp_result
moe_resultr;   r;   r<   r   H  s   



zGrok1DecoderLayer.moe_with_rmoe)r   NFFFNFr%   NN)rB   rC   rD   r   rE   r   r   rG   rV   r   r   rF   r6   rb   r    r   r   rA   r   rH   r;   r;   r9   r<   r     s\    
	
a
7r   c                       s   e Zd Z							ddedee dededed	ed
ededdf fddZ	dde	j
de	j
dede	j
de	j
f
ddZ  ZS )
Grok1ModelNFr%   rJ   r)   r   load_presharded_embeddingr   r   replicate_embeddingr*   r-   c	           	         s   t     _ j_ j_t j j|| td|d_	t
j _t fddt jD _t j jd_d S )Nembed_tokens)r+   	enable_tpr*   c                    s$   g | ]}t  |jd qS ))r)   r   r   r   r   )r   r   ).0irJ   r   r   r   r)   r8   r;   r<   
<listcomp>q  s    
z'Grok1Model.__init__.<locals>.<listcomp>r   )r5   r6   rJ   pad_token_idpadding_idx
vocab_sizer   r&   r#   r   rV   r   r   r   r   
ModuleListrangenum_hidden_layerslayersr   r   norm)	r8   rJ   r)   r   r   r   r   r   r*   r9   r   r<   r6   W  s$   

zGrok1Model.__init__	input_idsr   r   input_embedsc           
      C   s~   |d u r|  |}|| jj n|}d\}}tt| jD ]}| j| |||||\}}}qt|||j| j	j|j
\}}	|S )Nr   )r   mul_rJ   embedding_multiplier_scaler  lenr  r   r_   r  r   )
r8   r  r   r   r  r^   r   r   r   r@   r;   r;   r<   rA     s"   

zGrok1Model.forward)NFFFFFr%   r=   )rB   rC   rD   r   r   r   rG   rF   r6   rV   rb   r    rA   rH   r;   r;   r9   r<   r   V  sN    	
.r   c                       s   e Zd Z		ddedee deddf fddZe	 	dd	ej
d
ej
dedej
dej
f
ddZ			ddeeeej
f  dedededB deeej
f f
ddZdd Zdd Z  ZS )Grok1ForCausalLMNr%   rJ   r)   r*   r-   c                    s2  t    || _|| _t|dd| _t|ddo"| jjdko"t dk| _t|dd| _	t|dd| _
d}t|d	|| _t dkrFttd
t t|dd| _t||| j| j
| j	| j| jtd|d| _d }| jr~t|j|jd|td|d| _t|dd| _nt|j|j| j
|td|d| _t|| _t | _d S )Nr   Fr   Tr   rg   r   r   replicate_lm_head_prepare_weightsr   model)r)   r   r   r   r   r   r*   lm_head)r0   rM   r*   )skip_all_gather)r+   rM   r*   )r5   r6   rJ   r)   rx   r   r   r   r   r   r   r  setattrr!   _prepare_presharded_weightsr   r   r#   r  r   r&   r   r  r   logits_processorr   setloaded_param_names)r8   rJ   r)   r*   default_replicate_lm_headlm_head_params_dtyper9   r;   r<   r6     sb   



zGrok1ForCausalLM.__init__r  r   r   r  c                 C   s"   |  ||||}| ||| j|S r=   )r  r  r  )r8   r  r   r   r  r^   r;   r;   r<   rA     s   
zGrok1ForCausalLM.forwardFTweightsignore_parent_namecheck_hit_namesmodel_configc              	      s  d u rj g }|g d7 }|ddg7 }j}tjddd|d}t t }t  dtd	t	j
f fd
d}	|D ]b\}
}d|
v rKqB|D ]!\}}}||
vrWqM|
||}
|
drg|
vrgqM|	|
||  n5|D ]}|\}}}}||
vr~qq|
||}
|	|
||
||d  n|
dr|
vrqB|
d u rqB|	|
|d qB|rt dkr|  }dd |D }tdt| dt  dt|  t|dkrtd|d S t dkrtd|d  S )N))r   q_projr   )r   k_projr   )r   v_projr   )r/   	gate_projr   )r/   up_projrg   w1w2w3)ckpt_gate_proj_nameckpt_down_proj_nameckpt_up_proj_namerK   nameloaded_weightc                    s   d| v r| tjj }| }r| dd } | vr'td| d d S |  }t|dt}|||g|R i |  	|  j
	| d S )Nr  .r   zSkipping name=z in load_weights_wrapperweight_loader)torV   rW   output_multiplier_scaler   loggerinforx   r"   addr  )r&  r'  argskwargsoriginal_nameparamr)  	hit_namesr  r  params_dictr8   r;   r<   load_weight_wrapper  s    

z:Grok1ForCausalLM.load_weights.<locals>.load_weight_wrapperzrotary_emb.inv_freqz.bias)shard_id	expert_id)r&  r'     c                 S   s   h | ]}d |vr|qS )scaler;   )r   r>   r;   r;   r<   	<setcomp>R  s    z0Grok1ForCausalLM.load_weights.<locals>.<setcomp>z#all_names: z, #hit_names: z, #missing_exclude_scales: r   zMload_weights failed because some weights are missing: missing_exclude_scales=r(  z@load_weights failed because it did not hit any names. all_names=z hit_names=)rJ   r   r   make_expert_params_mappingdictnamed_parametersr  keysrF   rV   rb   replaceendswithr	  r,  r-  r   )r8   r  r  r  r  stacked_params_mappingrK   expert_params_mapping	all_namesr6  r&  r'  
param_nameweight_namer7  mappingr8  missingmissing_exclude_scalesr;   r3  r<   load_weights  s    
	zGrok1ForCausalLM.load_weightsc                 C   s   | j }t|dt|dd }t|dd}|jdkr"|j|rdnd }nd}|j|j |j |j }|j|j |j |j d }|j|j |j |j }|j| |j | d }|j| |j | }	|j|j d }
|| | | |	 |
 S )Nr   r'   r   Fr   rg   r.   )	rJ   rx   r   r  r&   r   r   r   r   )r8   cfgr   r   rK   wqwkvoutffn1ffn2embedr;   r;   r<   get_num_params_analyticalb  sp   

z*Grok1ForCausalLM.get_num_params_analyticalc                 C   s   t dd |  D t  S )Nc                 s   s    | ]}|  V  qd S r=   )numel)r   pr;   r;   r<   	<genexpr>  s    z8Grok1ForCausalLM.get_num_params_torch.<locals>.<genexpr>)sum
parametersr   r   r;   r;   r<   get_num_params_torch  s   z%Grok1ForCausalLM.get_num_params_torch)Nr%   r=   )FTN)rB   rC   rD   r   r   r   rF   r6   rV   no_gradrb   r    rA   r   r   rG   r=  rJ  rR  rX  rH   r;   r;   r9   r<   r
    sR    A
u0r
  r  model_name_or_pathrevisionfall_back_to_ptc                 C   s   dd l }dd l}t dkrt| |||S |j|s2ddlm} ddg}||| jj	||| jj
d}n|}t }	d|	dd	g}|d
|	dddg7 }g }
|D ]}|
| |j||7 }
qN|
d drgd}nd}||
|fS )Nr   rg   )download_weights_from_hfz*.safetensorsz*.bin)ignore_patternsz*-03dz.binz*-TP-z.safetensorsz*-TP-common.safetensorssafetensorsTF)globosr   old_prepare_weightspathisdir$sglang.srt.model_loader.weight_utilsr]  load_configdownload_dirr^  r   joinrA  )r8   rZ  r[  r\  ra  rb  r]  allow_patterns	hf_folderr   hf_weights_filespatternuse_safetensorsr;   r;   r<   r    s2   

r  c                   @   s   e Zd ZdZdS )Grok1ModelForCausalLMz#An alias for backward-compatbility.N)rB   rC   rD   r   r;   r;   r;   r<   ro    s    ro  )SrZ   loggingr   typingr   r   r   rV   r   transformersr   sglang.srt.distributedr   r   r	   sglang.srt.layers.activationr
   sglang.srt.layers.elementwiser   r   r   sglang.srt.layers.layernormr   sglang.srt.layers.linearr   r   r   r   "sglang.srt.layers.logits_processorr   &sglang.srt.layers.moe.fused_moe_tritonr   sglang.srt.layers.moe.routerr   sglang.srt.layers.moe.topkr   *sglang.srt.layers.quantization.base_configr   !sglang.srt.layers.radix_attentionr   "sglang.srt.layers.rotary_embeddingr   r   r   r   *sglang.srt.layers.vocab_parallel_embeddingr   r   +sglang.srt.model_executor.cuda_graph_runnerr   ,sglang.srt.model_executor.forward_batch_infor    sglang.srt.model_loader.loaderr!   rf  r"   sglang.srt.utilsr#   	getLoggerrB   r,  Moduler$   rI   r   rE   ra   rb   rl   r{   r|   r   r   r   r
  rx   rc  rF   rG   listr  ro  
EntryClassr;   r;   r;   r<   <module>   sx   
)=
\  #H 
|
+