o
    پiG                     @   sv  d dl mZ d dlmZmZmZmZmZ d dlZd dlm	Z	 d dl
mZ d dlmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZmZ d dlmZmZ d dlmZ d dlmZ d dl m!Z! d dl"m#Z# d dl$m%Z%m&Z&m'Z' d dl(m)Z)m*Z* d dl+m,Z,m-Z- d dl.m/Z/m0Z0 G dd de	j1Z2G dd de	j1Z3G dd de	j1Z4G dd de	j1Z5G dd de	j1Z6e6Z7dS )    )Iterable)AnyListOptionalTupleUnionN)nn)PretrainedConfig)get_pp_group$get_tensor_model_parallel_world_size)get_tensor_model_parallel_rank)
SiluAndMul)RMSNorm)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessorLogitsProcessorOutput)QuantizationConfig)RadixAttention)get_rope)PPMissingLayer)DEFAULT_VOCAB_PADDING_SIZEParallelLMHeadVocabParallelEmbedding)ForwardBatchPPProxyTensors)default_weight_loaderkv_cache_scales_loader)
add_prefixmake_layersc                       sN   e Zd Z			ddedededee ded	ed
df fddZdd Z	  Z
S )SolarMLPNF hidden_sizeintermediate_size
hidden_actquant_configbiasprefixreturnc                    sh   t    t||gd ||| dd| _t||||| dd| _|dkr.td| dt | _d S )	N   .gate_up_proj)
input_sizeoutput_sizesr'   r&   r(   z
.down_projr,   output_sizer'   r&   r(   siluzUnsupported activation: z!. Only silu is supported for now.)	super__init__r   gate_up_projr   	down_proj
ValueErrorr   act_fn)selfr#   r$   r%   r&   r'   r(   	__class__ K/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/solar.pyr2   >   s(   
	
zSolarMLP.__init__c                 C   s*   |  |\}}| |}| |\}}|S N)r3   r6   r4   )r7   xgate_up_r:   r:   r;   forward]   s   
zSolarMLP.forward)NFr"   )__name__
__module____qualname__intstrr   r   boolr2   r@   __classcell__r:   r:   r8   r;   r!   <   s(    r!   c                       s   e Zd Z							ddeded	ed
ededeeee	f  dedee
 dedededdf fddZdejdedejdejfddZ  ZS )SolarAttention'  N    Fr"   r   configr#   	num_headsnum_kv_heads
rope_thetarope_scalingmax_position_embeddingsr&   r'   r(   layer_idr)   c              	      sd  t    || _t }|| _| j| dksJ | j| | _|| _| j|kr/| j| dks.J n	|| j dks8J td| j| | _t	|dd | _
| j
d u rT| j| j | _
| j| j
 | _| j| j
 | _| j
d | _|| _|| _t|| j
| j| j|	||
 dd| _t| j| j
 ||	||
 dd| _t| j
| j
|||d	| _t| j| j
| j| j|||
 d
d| _d S )Nr      head_dimg      	.qkv_proj)r#   	head_sizetotal_num_headstotal_num_kv_headsr'   r&   r(   z.o_projr.   )
rotary_dimmax_positionbaserO   z.attn)rM   rQ   r&   r(   )r1   r2   r#   r   rV   rL   rW   maxrM   getattrrS   q_sizekv_sizescalingrN   rP   r   qkv_projr   o_projr   
rotary_embr   attn)r7   rK   r#   rL   rM   rN   rO   rP   r&   r'   r(   rQ   tp_sizer8   r:   r;   r2   f   sf   


	
zSolarAttention.__init__	positionsforward_batchhidden_statesc                 C   sd   |  |\}}|j| j| j| jgdd\}}}| |||\}}| j||||d}	| |	\}
}|
S )N)dim)rf   )r`   splitr]   r^   rb   rc   ra   )r7   re   rf   rg   qkvr?   qkvattn_outputoutputr:   r:   r;   r@      s    zSolarAttention.forward)rI   NrJ   NFr"   r   )rA   rB   rC   r	   rD   floatr   dictrE   r   r   rF   r2   torchTensorr   r@   rG   r:   r:   r8   r;   rH   d   sT    	
FrH   c                       sp   e Zd Z		ddededee deddf
 fdd	Zd
e	j
de	j
dedee	j
 dee	j
e	j
f f
ddZ  ZS )SolarDecoderLayerNr"   rK   rQ   r&   r(   r)   c           	         s   t    |j| _t|dd}t|dd }|d ur$t|dd r$|j|d< t|dd}t|ddp5t|d	d}t||| j|jt|d
|j|||||| dd| _t| j|j	|j
|t|dd| dd| _t|j|jd| _t|j|jd| _d S )NrN   rI   rO    original_max_position_embeddingsrP   rJ   attention_biasFr'   num_key_value_headsz
.self_attn)rK   rQ   r#   rL   rM   rN   rO   rP   r&   r'   r(   mlp_biasz.mlp)r#   r$   r%   r&   r'   r(   eps)r1   r2   r#   r\   rv   rH   num_attention_heads	self_attnr!   r$   r%   mlpr   rms_norm_epsinput_layernormpost_attention_layernorm)	r7   rK   rQ   r&   r(   rN   rO   rP   rw   r8   r:   r;   r2      sP   


zSolarDecoderLayer.__init__re   rg   rf   residualc                 C   sZ   |d u r|}|  |}n|  ||\}}| j|||d}| ||\}}| |}||fS )N)re   rg   rf   )r   r}   r   r~   )r7   re   rg   rf   r   r:   r:   r;   r@      s   
zSolarDecoderLayer.forwardNr"   )rA   rB   rC   r	   rD   r   r   rE   r2   rs   rt   r   tupler@   rG   r:   r:   r8   r;   ru      s2    3ru   c                       s   e Zd Z		ddedee def fddZdej	d	ej	fd
dZ
		ddeej	 dej	dedeej	 dee d	eej	eej	eej	 f ef fddZded	dfddZ  ZS )
SolarModelNr"   rK   r&   r(   c                    s   t     | _ j| _ j| _t | _| jjr't j j	t
d|d| _nt | _t j fdd| dd\| _| _| _t jrPt j	 jd| _d S t | _d S )Nembed_tokens)r&   r(   c                    s   t  | |dS )N)rK   r&   rQ   r(   )ru   )idxr(   rK   r&   r:   r;   <lambda>!  s    z%SolarModel.__init__.<locals>.<lambda>z.layers)r(   rz   )r1   r2   rK   
vocab_sizeorg_vocab_sizer
   pp_groupis_first_rankr   r#   r   r   r   r    num_hidden_layersstart_layer	end_layerlayersis_last_rankr   r   norm)r7   rK   r&   r(   r8   r   r;   r2   
  s*   


zSolarModel.__init__	input_idsr)   c                 C   s
   |  |S r<   )r   )r7   r   r:   r:   r;   get_input_embeddings.  s   
zSolarModel.get_input_embeddingsre   rf   inputs_embedspp_proxy_tensorsc                 C   s  |   jr|d ur|}n| |}d }n|d usJ |d }|d }d }d }	d }
d }| jr3| jjd n| jjd }t| j| jD ]}}|| jj	v rV|
 }|d urT|
 nd }
|| jjv rj|
 }	|d urh|
 nd }|| jjv r|| |d|   }|
d ur|d ur|
| |d|   }|| jjv r|	| |d|   }|d ur|d ur|| |d|   }| j| }|||||d\}}q@|   jst||dS | ||\}}|S )Nrg   r   r   rR   )re   rg   rf   r   )rg   r   )r   r   r   trainingrK   bskcn_tvranger   r   bskcn_1clonebskcn_2bskcn_3bskcn_4r   r   r   r   )r7   r   re   rf   r   r   rg   r   	bskcn_h_1	bskcn_h_2	bskcn_r_1	bskcn_r_2r   ilayerr?   r:   r:   r;   r@   1  sR   



zSolarModel.forwardquantization_param_pathc                 C   sv   t  }t }t|||| jj| jjjD ]%\}}t| j| t	j
s&| j| j}t|jdr5||j_||j_qtdd S )Nk_scalez8Self attention has no KV cache scaling factor attribute!)r   r   r   rK   r   r9   
model_type
isinstancer   r   Identityr}   hasattrrc   r   v_scaleRuntimeError)r7   r   rd   tp_rank	layer_idxscaling_factorlayer_self_attnr:   r:   r;   load_kv_cache_scalesl  s$   
zSolarModel.load_kv_cache_scalesr   )NN)rA   rB   rC   r	   r   r   rE   r2   rs   rt   r   r   r   r   r   r   r@   r   rG   r:   r:   r8   r;   r     s6    $
;r   c                       s   e Zd Zg dddgdZg dZddgZdd	d
dddZ		ddedee	 de
f fddZ	d dejdejdedeej deejef f
ddZdeee
ejf  fddZ  ZS )!SolarForCausalLM))q_projrl   )k_projrm   )v_projrn   )	gate_projr   )up_projrR   )r`   r3   )z.gate_proj..down_proj.z	.up_proj.z.q_proj.z.k_proj.z.v_proj..o_proj.r   r   )rT   r   )rT   rR   )rT   r*   )r+   r   )r+   rR   )z.q_projz.k_projz.v_projz
.gate_projz.up_projNr"   rK   r&   r(   c                    s   t    t | _|| _|| _t|| jtd|d| _| jj	rO|j
| _t| j|j|j
t|d| _|jr>| jjr>| jjj| j_t|dd}t| j|j
|| _d S t | _d S )Nmodel)rK   r&   r(   )org_num_embeddingspadding_sizer&   logit_scaleg      ?)r1   r2   r
   r   rK   r&   r   r   r   r   r   unpadded_vocab_sizer   r#   r   lm_headtie_word_embeddingsr   r   weightr\   r   logits_processorr   )r7   rK   r&   r(   r   r8   r:   r;   r2     s2   


zSolarForCausalLM.__init__r   re   rf   r   r)   c                 C   s4   | j ||||d}|  jr| | j||}|S |S )N)r   re   rf   r   )r   r   r   r   r   )r7   r   re   rf   r   rg   logitsr:   r:   r;   r@     s   
zSolarForCausalLM.forwardweightsc                 C   s   t |  }|D ]S\}}d}| j D ]1\}}|D ]&\}}	||v r?|||}
|
|v r?||
 }t|dt}||||	 d} nq|rD nq|rHq||v r[|| }t|dt}||| qd S )NFweight_loaderT)rr   named_parameterspacked_modules_mappingitemsreplacer\   r   )r7   r   params_dictnameloaded_weight	is_packedpacked_namesourcessrc_nameshard_idmodel_param_nameparamr   r:   r:   r;   load_weights  s6   
zSolarForCausalLM.load_weightsr   r<   )rA   rB   rC   r   #default_bitsandbytes_target_modulescolumn_parallel_weights_modules#bitsandbytes_stacked_params_mappingr	   r   r   rE   r2   rs   rt   r   r   r   r@   r   r   r   rG   r:   r:   r8   r;   r     sH    	(
$r   )8collections.abcr   typingr   r   r   r   r   rs   r   transformersr	   sglang.srt.distributedr
   r   %sglang.srt.distributed.parallel_stater   sglang.srt.layers.activationr   sglang.srt.layers.layernormr   sglang.srt.layers.linearr   r   r   "sglang.srt.layers.logits_processorr   r   sglang.srt.layers.quantizationr   !sglang.srt.layers.radix_attentionr   "sglang.srt.layers.rotary_embeddingr   sglang.srt.layers.utilsr   *sglang.srt.layers.vocab_parallel_embeddingr   r   r   ,sglang.srt.model_executor.forward_batch_infor   r   $sglang.srt.model_loader.weight_utilsr   r   sglang.srt.utilsr   r    Moduler!   rH   ru   r   r   
EntryClassr:   r:   r:   r;   <module>   s2   (VNzw