o
    پiY0                     @   s>  d Z ddlmZ ddlmZmZ ddlZddlmZ ddlm	Z	 ddl
mZmZ ddlmZ dd	lmZmZmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZmZmZ ddlm Z  ddl!m"Z" ddl#m$Z$m%Z% G dd dej&Z'G dd dej&Z(G dd dej&Z)G dd dej&Z*G dd dej&Z+e+Z,dS )zPyTorch Starcoder2 model.    )Iterable)OptionalTupleN)nn)Starcoder2Config)get_pp_group$get_tensor_model_parallel_world_size)
get_act_fn)ColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)RadixAttention)get_rope)DEFAULT_VOCAB_PADDING_SIZEParallelLMHeadVocabParallelEmbedding)ForwardBatch)default_weight_loader)
add_prefixmake_layersc                	       sZ   e Zd Z			ddedee dedef fdd	Zd
e	j
de	j
dede	j
fddZ  ZS )Starcoder2AttentionN r   configquant_configprefixlayer_idc              	      st  t    || _|j| _t }|j| _| j| dksJ | j| | _|j| _	| j	|kr5| j	| dks4J n	|| j	 dks>J t
d| j	| | _| j| j | _| j| j | _| j| j | _| jd | _|j| _|j| _|j| _t| j| j| j| j	| j|| dd| _t| j| j | j| j|| dd| _t| j| j| jt| jdd| _t| j| j| j| j||| d	d
| _d S )Nr      g      z	.qkv_projbiasr   r   z.o_projT)
rotary_dimmax_positionbaseis_neox_stylez.attn)num_kv_headsr   r   r   )super__init__r   hidden_sizer   num_attention_headstotal_num_heads	num_headsnum_key_value_headstotal_num_kv_headsmaxr%   head_dimq_sizekv_sizescaling
rope_thetamax_position_embeddingsuse_biasr   qkv_projr   o_projr   int
rotary_embr   attn)selfr   r   r   r   tp_size	__class__ P/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/starcoder2.pyr'   7   sf   

	
zStarcoder2Attention.__init__	positionshidden_statesforward_batchreturnc                 C   sb   |  |\}}|j| j| j| jgdd\}}}| |||\}}| ||||}	| |	\}
}|
S )N)dim)r6   splitr0   r1   r9   r:   r7   )r;   rA   rB   rC   qkv_qkvattn_outputoutputr?   r?   r@   forwardy   s    zStarcoder2Attention.forward)Nr   r   )__name__
__module____qualname__r   r   r   strr8   r'   torchTensorr   rO   __classcell__r?   r?   r=   r@   r   5   s,    Br   c                       sJ   e Zd Z		ddedee def fddZdej	d	ej	fd
dZ
  ZS )Starcoder2MLPNr   r   r   r   c                    sZ   t    t|j|j|j|| dd| _t|j|j|j|| dd| _t	|j
| _d S )Nz.c_fcr   z.c_proj)r&   r'   r
   r(   intermediate_sizer5   c_fcr   c_projr	   
hidden_actactr;   r   r   r   r=   r?   r@   r'      s    
zStarcoder2MLP.__init__rB   rD   c                 C   s*   |  |\}}| |}| |\}}|S N)rY   r\   rZ   )r;   rB   rI   r?   r?   r@   rO      s   
zStarcoder2MLP.forwardNr   )rP   rQ   rR   r   r   r   rS   r'   rT   rU   rO   rV   r?   r?   r=   r@   rW      s    rW   c                	       sX   e Zd Z		ddededee def fddZd	e	j
d
e	j
dede	j
fddZ  ZS )Starcoder2DecoderLayerNr   r   r   r   r   c                    sl   t    |j| _t|||| dd| _t||| dd| _tj|j|j	d| _
tj|j|j	d| _d S )Nz
.self_attn)r   r   r   r   z.mlpr   r   eps)r&   r'   r(   r   	self_attnrW   mlpr   	LayerNormnorm_epsiloninput_layernormpost_attention_layernorm)r;   r   r   r   r   r=   r?   r@   r'      s   
zStarcoder2DecoderLayer.__init__rA   rB   rC   rD   c                 C   sJ   |}|  |}| j|||d}|| }|}| |}| |}|| }|S )N)rA   rB   rC   )rh   rd   ri   re   )r;   rA   rB   rC   residualr?   r?   r@   rO      s   


zStarcoder2DecoderLayer.forwardr_   )rP   rQ   rR   r   r8   r   r   rS   r'   rT   rU   r   rO   rV   r?   r?   r=   r@   r`      s*    r`   c                       sb   e Zd Z		ddedee def fddZ	ddej	d	ej	d
e
deej	 dej	f
ddZ  ZS )Starcoder2ModelNr   r   r   r   c                    s   t     | _ j| _t j j| dd| _t }|j}|j	}| j
 | | _|d  j
 | | _t j
 fdd| dd| _tj j jd| _d S )	Nz.embed_tokensra   r   c                    s   t  | |dS )N)r   r   r   r   )r`   )idxr   r   r   r?   r@   <lambda>   s    z*Starcoder2Model.__init__.<locals>.<lambda>z.layersr   rb   )r&   r'   r   
vocab_sizer   r(   embed_tokensr   
world_sizeranknum_hidden_layersstart_layer	end_layerr   layersr   rf   rg   norm)r;   r   r   r   pp_grouppp_sizepp_rankr=   rm   r@   r'      s(   
zStarcoder2Model.__init__	input_idsrA   rC   inputs_embedsrD   c                 C   sP   |d u r
|  |}n|}t| j| jD ]}| j| }||||}q| |}|S r^   )rq   rangeru   rv   rw   rx   )r;   r|   rA   rC   r}   rB   ilayerr?   r?   r@   rO      s   

zStarcoder2Model.forwardr_   r^   )rP   rQ   rR   r   r   r   rS   r'   rT   rU   r   rO   rV   r?   r?   r=   r@   rk      s,    &rk   c                       s~   e Zd Z		ddedee def fddZ	ddej	d	ej	d
e
deej	 dej	f
ddZdeeeej	f  fddZ  ZS )Starcoder2ForCausalLMNr   r   r   r   c                    s   t    || _t||td|d| _|j| _|j| _|jr$| jj	| _
n|j| _t| j|j|jt|| dd| _
t|d| _d S )Nmodelro   z.lm_head)org_num_embeddingspadding_sizer   r   )r   )r&   r'   r   rk   r   r   rp   unpadded_vocab_sizetie_word_embeddingsrq   lm_headr   r(   r   r   logits_processorr]   r=   r?   r@   r'     s&   
zStarcoder2ForCausalLM.__init__r|   rA   rC   r}   rD   c                 C   s$   | j ||||d}| ||| j|S )N)r|   rA   rC   r}   )r   r   r   )r;   r|   rA   rC   r}   rB   r?   r?   r@   rO   2  s   
zStarcoder2ForCausalLM.forwardweightsc                 C   s   g d}t |  }|D ]I\}}d|v rqd}|D ]#\}}}	||v r<|||}|| }
t|
dt}||
||	 d} nq|r@q||}
|
d u rJqt|
dt}||
| qd S )N))r6   q_projrJ   )r6   k_projrK   )r6   v_projrL   zrotary_emb.inv_freqsFweight_loaderT)dictnamed_parametersreplacegetattrr   get)r;   r   stacked_params_mappingparams_dictnameloaded_weight
is_stacked
param_nameweight_nameshard_idparamr   r?   r?   r@   load_weightsC  s2   	
z"Starcoder2ForCausalLM.load_weightsr_   r^   )rP   rQ   rR   r   r   r   rS   r'   rT   rU   r   rO   r   r   r   rV   r?   r?   r=   r@   r     s.     
$r   )-__doc__collections.abcr   typingr   r   rT   r   transformersr   sglang.srt.distributedr   r   sglang.srt.layers.activationr	   sglang.srt.layers.linearr
   r   r   "sglang.srt.layers.logits_processorr   *sglang.srt.layers.quantization.base_configr   !sglang.srt.layers.radix_attentionr   "sglang.srt.layers.rotary_embeddingr   *sglang.srt.layers.vocab_parallel_embeddingr   r   r   ,sglang.srt.model_executor.forward_batch_infor   $sglang.srt.model_loader.weight_utilsr   sglang.srt.utilsr   r   Moduler   rW   r`   rk   r   
EntryClassr?   r?   r?   r@   <module>   s.   R#29Q