o
    پiC(                     @   s  d Z ddlmZmZmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ ddlmZmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ G dd dejZ G dd dejZ!G dd dejZ"G dd dejZ#G dd dejZ$e$Z%dS )zDInference-only GPTBigCode model compatible with HuggingFace weights.    )IterableOptionalTupleN)nn)GPTBigCodeConfig)$get_tensor_model_parallel_world_size)
get_act_fn)ColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)RadixAttention)VocabParallelEmbedding)ForwardBatch)default_weight_loader)
add_prefixc                	       R   e Zd Z		ddededee def fddZd	e	j
d
ede	j
fddZ  ZS )GPTBigCodeAttentionN layer_idconfigquant_configprefixc              
      s   t    |j| _|j}t | _|| j dksJ || j | _| j| | _| jd | _|j	| _	| j	r8d}d| _
n|}| j| _
| j| j
 | _t| j| j||d|td|d| _t| j| jd|td|d| _t| j| j| j| j
||td|d	| _d S )
Nr   g         Tc_attnbiasr   r   c_projattn)scalingnum_kv_headsr   r   r   )super__init__hidden_sizenum_attention_headsr    tensor_model_parallel_world_size	num_headshead_dimscalemulti_queryr!   kv_dimr
   r   r   r   r   r   r   )selfr   r   r   r   total_num_headstotal_num_kv_heads	__class__ Q/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/gpt_bigcode.pyr#   +   sP   

zGPTBigCodeAttention.__init__hidden_statesforward_batchreturnc           	      C   sV   |  |\}}|j| j| j | j| jgdd\}}}| ||||}| |\}}|S )N)dim)r   splitr$   r&   r+   r   r   )	r,   r3   r4   qkv_qkvattn_outputr1   r1   r2   forward^   s   
zGPTBigCodeAttention.forwardNr   __name__
__module____qualname__intr   r   r   strr#   torchTensorr   r?   __classcell__r1   r1   r/   r2   r   )   s&    3r   c                	       sN   e Zd Z		ddededee def fddZd	e	j
d
e	j
fddZ  ZS )	GPTBigMLPNr   intermediate_sizer   r   r   c                    sX   t    |j}t||d|td|d| _t||d|td|d| _t|j	||| _
d S )NTc_fcr   r   )r"   r#   r$   r	   r   rL   r   r   r   activation_functionact)r,   rK   r   r   r   r$   r/   r1   r2   r#   s   s&   

zGPTBigMLP.__init__r3   r5   c                 C   s*   |  |\}}| |}| |\}}|S N)rL   rN   r   )r,   r3   r:   r1   r1   r2   r?      s   
zGPTBigMLP.forwardr@   )rB   rC   rD   rE   r   r   r   rF   r#   rG   rH   r?   rI   r1   r1   r/   r2   rJ   q   s    rJ   c                	       r   )GPTBigCodeBlockNr   r   r   r   r   c                    s   t    |j}|jd ur|jnd| }tj||jd| _t|||t	d|d| _
tj||jd| _t|||t	d|d| _d S )N   epsr   r   mlp)r"   r#   r$   n_innerr   	LayerNormlayer_norm_epsilonln_1r   r   r   ln_2rJ   rU   )r,   r   r   r   r   r$   	inner_dimr/   r1   r2   r#      s   
zGPTBigCodeBlock.__init__r3   r4   r5   c                 C   sH   |}|  |}| j||d}|| }|}| |}| |}|| }|S )N)r3   r4   )rY   r   rZ   rU   )r,   r3   r4   residualr>   feed_forward_hidden_statesr1   r1   r2   r?      s   


zGPTBigCodeBlock.forwardr@   rA   r1   r1   r/   r2   rP      s&    rP   c                       sT   e Zd Z		ddedee def fddZdej	d	ej	d
e
dej	fddZ  ZS )GPTBigCodeModelNr   r   r   r   c                    s   t     | _ jrJ  j| _d} j| | _t| j| j jtdd| _	t
 j| j| _t
 fddt jD | _t
j| j jd| _d S )Nr   wte)org_num_embeddingsr   c              	      s(   g | ]}t | td | dqS )zh.rT   )rP   r   ).0ir   r   r   r1   r2   
<listcomp>   s    z,GPTBigCodeModel.__init__.<locals>.<listcomp>rR   )r"   r#   r   add_cross_attentionr$   	embed_dim
vocab_sizer   r   r_   r   	Embeddingmax_position_embeddingswpe
ModuleListrangenum_hidden_layershrW   rX   ln_f)r,   r   r   r   
lora_vocabr/   rc   r2   r#      s&   

zGPTBigCodeModel.__init__	input_idsposition_idsr4   r5   c           	      C   sR   |  |}| |}|| }tt| jD ]}| j| }|||}q| |}|S rO   )r_   rj   rl   lenrn   ro   )	r,   rq   rr   r4   inputs_embedsposition_embedsr3   rb   layerr1   r1   r2   r?      s   



zGPTBigCodeModel.forwardr@   )rB   rC   rD   r   r   r   rF   r#   rG   rH   r   r?   rI   r1   r1   r/   r2   r^      s&    r^   c                	       s   e Zd ZddgiZg dZdddZg Z		dded	ee	 d
e
f fddZe dejdejdedejfddZdeee
ejf  fddZ  ZS )GPTBigCodeForCausalLMr   )rL   r   r_   r   input_embeddingsoutput_embeddings)r_   lm_headNr   r   r   r   c                    sL   t    || _|| _t||td|d| _| jj| _|j	| _
t|| _d S )NtransformerrT   )r"   r#   r   r   r^   r   r{   r_   rz   rg   unpadded_vocab_sizer   logits_processor)r,   r   r   r   r/   r1   r2   r#      s   

zGPTBigCodeForCausalLM.__init__rq   	positionsr4   r5   c                 C   s    |  |||}| ||| j|S rO   )r{   r}   rz   )r,   rq   r~   r4   r3   r1   r1   r2   r?     s   
zGPTBigCodeForCausalLM.forwardweightsc                 C   s   t | jdd}|D ]8\}}d|v rq
d|v rq
|| }t|dt}d|v s*d|v r=|||d |||d	 |||d
 q
||| q
d S )NF)remove_duplicatezlm_head.weightz
.attn.biasweight_loaderzc_attn.input_scalezc_attn.weight_scaler;   r<   r=   )dictnamed_parametersgetattrr   )r,   r   params_dictnameloaded_weightparamr   r1   r1   r2   load_weights  s   z"GPTBigCodeForCausalLM.load_weightsr@   )rB   rC   rD   packed_modules_mappingsupported_lora_modulesembedding_modulesembedding_padding_modulesr   r   r   rF   r#   rG   no_gradrH   r   r?   r   r   r   rI   r1   r1   r/   r2   rw      s6    
$rw   )&__doc__typingr   r   r   rG   r   transformersr   sglang.srt.distributedr   sglang.srt.layers.activationr   sglang.srt.layers.linearr	   r
   r   "sglang.srt.layers.logits_processorr   *sglang.srt.layers.quantization.base_configr   !sglang.srt.layers.radix_attentionr   *sglang.srt.layers.vocab_parallel_embeddingr   ,sglang.srt.model_executor.forward_batch_infor   $sglang.srt.model_loader.weight_utilsr   sglang.srt.utilsr   Moduler   rJ   rP   r^   rw   
EntryClassr1   r1   r1   r2   <module>   s*   H$+2>