o
    پi>                     @   sr  d Z ddlZddlmZmZmZ ddlZddlmZ ddlm	Z	 ddl
mZmZ ddlmZ ddlmZ dd	lmZmZmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZmZ ddl m!Z! ddl"m#Z# ddl$m%Z%m&Z& e& Z'de(dej)fddZ*G dd dej+Z,G dd dej+Z-G dd dej+Z.G dd dej+Z/G dd dej+Z0G d d! d!e0Z1e1gZ2dS )"zBInference-only BaiChuan model compatible with HuggingFace weights.    N)IterableOptionalTuple)nn)PretrainedConfig)get_tensor_model_parallel_rank$get_tensor_model_parallel_world_size)
SiluAndMul)RMSNorm)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)RadixAttention)get_rope)ParallelLMHeadVocabParallelEmbedding)ForwardBatch)default_weight_loader)
add_prefixis_nputotal_num_headsreturnc                 C   s   dt t |  }tjddt |d     tjd}tjdd| tjd}t||}|| kritjddt d| d     tjd}t	|| | }tjddd|  dtjd}tj
|t||gdd}|S )N      )dtype   )startendstepr   r   )dim)mathfloorlog2torchtensorfloat32arangeint32powmincat)r   closest_power_of_2basepowersslopes
extra_basenum_remaining_headsextra_powers r4   N/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/baichuan.py_get_alibi_slopes7   s(   r6   c                       sD   e Zd Z		ddedededee def
 fdd	Zd
d Z  Z	S )BaiChuanMLPN hidden_sizeintermediate_size
hidden_actquant_configprefixc                    sh   t    t||gd d|td|d| _t||d|td|d| _|dkr.td| dt | _	d S )	Nr   Fgate_up_projbiasr<   r=   	down_projsiluzUnsupported activation: z!. Only silu is supported for now.)
super__init__r   r   r>   r   rA   
ValueErrorr	   act_fn)selfr9   r:   r;   r<   r=   	__class__r4   r5   rD   Q   s(   

zBaiChuanMLP.__init__c                 C   s*   |  |\}}| |}| |\}}|S N)r>   rF   rA   )rG   xgate_up_r4   r4   r5   forwardo   s   
zBaiChuanMLP.forwardNr8   )
__name__
__module____qualname__intstrr   r   rD   rN   __classcell__r4   r4   rH   r5   r7   O   s    r7   c                       s   e Zd ZdZddddejdfdeded	ed
edede	e
 dede	ej def fddZdejdejdedejfddZ  ZS )BaiChuanAttentionz=Multi-headed attention from 'Attention Is All You Need' paper'      Nr   r8   r9   	num_headsposition_embedding
rope_thetamax_position_embeddingsr<   layer_idr   r=   c
              
      s  t    || _t }
|| _| j| _| j|
 dksJ || j | _|| _|| _|| _	| j|
kr9| j|
 dks8J n	|
| j dksBJ t
d| j|
 | _| j| _t|| j| j| jd|td|	d| _t| j| j |d|td|	d| _| jd | _t| j| j| j| j||td|	d	| _| jd
krt }|| j }|d | j }t| j}||| }tj||trdndd| _nt| j| j| j	| jd| _i | _| jd
krtr| j| jd< d S d S d S )Nr   r   FW_packr?   o_projg      attn)num_kv_headsr]   r<   r=   ALIBInpucuda)r   device)
rotary_dimmax_positionr.   r0   )rC   rD   r9   r   r   total_num_kv_headshead_dimrZ   r[   r\   maxra   rY   r   r   r^   r   r_   scalingr   r`   r   r6   r%   r&   _is_npualibi_slopesr   
rotary_embattn_kwargs)rG   r9   rY   rZ   r[   r\   r<   r]   r   r=   tp_sizetp_rank
head_starthead_endrm   rH   r4   r5   rD   y   sx   

	




zBaiChuanAttention.__init__	positionshidden_statesforward_batchr   c                 C   sj   |  |\}}|jddd\}}}| jdkr| |||\}}| j||||fi | j}	| |	\}
}|
S )Nr   )chunksr!   rb   )r^   chunkrZ   rn   r`   ro   r_   )rG   rt   ru   rv   qkvrM   qkvattn_outputoutputr4   r4   r5   rN      s   
zBaiChuanAttention.forward)rP   rQ   rR   __doc__r%   bfloat16rS   rT   floatr   r   r   rD   Tensorr   rN   rU   r4   r4   rH   r5   rV   v   sH    	
TrV   c                       sr   e Zd Z			ddedededee def
 fd	d
Zde	j
de	j
dedee	j
 dee	j
e	j
f f
ddZ  ZS )BaiChuanDecoderLayerr   Nr8   configrZ   r]   r<   r=   c                    s   t    |j| _t|dd}t|dd}t| j|j|||||td|d| _t| j|j	|j
|td|d| _t|j|jd	| _t|j|jd	| _d S )
Nr[   rW   r\   rX   	self_attn)r9   rY   rZ   r[   r]   r\   r<   r=   mlp)r9   r:   r;   r<   r=   eps)rC   rD   r9   getattrrV   num_attention_headsr   r   r7   r:   r;   r   r
   rms_norm_epsinput_layernormpost_attention_layernorm)rG   r   rZ   r]   r<   r=   r[   r\   rH   r4   r5   rD      s2   

zBaiChuanDecoderLayer.__init__rt   ru   rv   residualr   c                 C   sZ   |d u r|}|  |}n|  ||\}}| j|||d}| ||\}}| |}||fS )N)rt   ru   rv   )r   r   r   r   )rG   rt   ru   rv   r   r4   r4   r5   rN      s   
zBaiChuanDecoderLayer.forward)r   Nr8   )rP   rQ   rR   r   rT   rS   r   r   rD   r%   r   r   r   rN   rU   r4   r4   rH   r5   r      s4    "r   c                	       sX   e Zd Z		ddededee def fddZd	ej	d
ej	de
dej	fddZ  ZS )BaiChuanModelNr8   r   rZ   r<   r=   c                    sz   t     | _ j| _ j| _t j j jtdd| _	t
 fddt jD | _t j jd| _d S )Nembed_tokens)org_num_embeddingsr=   c              
      s*   g | ]}t  |td | dqS )zlayers.)r]   rZ   r<   r=   )r   r   ).0ir   rZ   r=   r<   r4   r5   
<listcomp>.  s    z*BaiChuanModel.__init__.<locals>.<listcomp>r   )rC   rD   r   pad_token_idpadding_idx
vocab_sizer   r9   r   r   r   
ModuleListrangenum_hidden_layerslayersr
   r   normrG   r   rZ   r<   r=   rH   r   r5   rD     s    
zBaiChuanModel.__init__	input_idsrt   rv   r   c           	      C   sR   |  |}d }tt| jD ]}| j| }|||||\}}q| ||\}}|S rJ   )r   r   lenr   r   )	rG   r   rt   rv   ru   r   r   layerrM   r4   r4   r5   rN   ;  s   


zBaiChuanModel.forwardrO   )rP   rQ   rR   r   rT   r   r   rD   r%   r   r   rN   rU   r4   r4   rH   r5   r     s*     r   c                	       s   e Zd ZdgddgdZg dZddgiZg Z		dd	ed
ede	e
 def fddZdejdejdedejfddZdeeeejf  fddZ  ZS )BaiChuanBaseForCausalLMr^   	gate_projup_proj)r^   r>   )r^   r_   r>   rA   r   Nr8   r   rZ   r<   r=   c                    sl   t    || _|| _t|||td|d| _| jjr!| jj| _	nt
|j|j|td|d| _	t|| _d S )Nmodelr=   lm_head)r<   r=   )rC   rD   r   r<   r   r   r   tie_word_embeddingsr   r   r   r   r9   r   logits_processorr   rH   r4   r5   rD   c  s   
z BaiChuanBaseForCausalLM.__init__r   rt   rv   r   c                 C   s    |  |||}| ||| j|S rJ   )r   r   r   )rG   r   rt   rv   ru   r4   r4   r5   rN   }  s   
zBaiChuanBaseForCausalLM.forwardweightsc                 C   s   ddg}t |  }|D ]`\}}d|v rq|dkr(| jjdk}|r(tjj|}|D ](\}}}	||vr4q*|||}|	drD||vrDq*|| }
|
j
}||
||	  n|	dr]||vr]q|| }
t|
dt}||
| qd S )N)r>   r   r   )r>   r   r   zrotary_emb.inv_freqzlm_head.weighti  z.biasweight_loader)dictnamed_parametersr   r   r%   r   
functional	normalizereplaceendswithr   r   r   )rG   r   stacked_params_mappingparams_dictnameloaded_weightis_baichuan2
param_nameweight_nameshard_idparamr   r4   r4   r5   load_weights  s8   
z$BaiChuanBaseForCausalLM.load_weightsrO   )rP   rQ   rR   packed_modules_mappingsupported_lora_modulesembedding_modulesembedding_padding_modulesr   rT   r   r   rD   r%   r   r   rN   r   r   r   rU   r4   r4   rH   r5   r   O  s>    
$r   c                       s4   e Zd ZdZ		ddee def fddZ  ZS )	BaichuanForCausalLMz"Baichuan 13B and Baichuan2 7B/13B.Nr8   r<   r=   c                    s:   |j dkrt j|d||d d S t j|d||d d S )Ni   ROPEr   rb   )r9   rC   rD   )rG   r   r<   r=   rH   r4   r5   rD     s   
zBaichuanForCausalLM.__init__rO   )	rP   rQ   rR   r   r   r   rT   rD   rU   r4   r4   rH   r5   r     s    r   )3r   r"   typingr   r   r   r%   r   transformersr   sglang.srt.distributedr   r   sglang.srt.layers.activationr	   sglang.srt.layers.layernormr
   sglang.srt.layers.linearr   r   r   "sglang.srt.layers.logits_processorr   *sglang.srt.layers.quantization.base_configr   !sglang.srt.layers.radix_attentionr   "sglang.srt.layers.rotary_embeddingr   *sglang.srt.layers.vocab_parallel_embeddingr   r   ,sglang.srt.model_executor.forward_batch_infor   $sglang.srt.model_loader.weight_utilsr   sglang.srt.utilsr   r   rl   rS   r   r6   Moduler7   rV   r   r   r   r   
EntryClassr4   r4   r4   r5   <module>   s6   'f=6b
