o
    پir&                     @   s  d Z ddlmZmZmZmZ ddlZddlmZ ddlm	Z	 ddl
mZ ddlmZ ddlmZmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ G dd dej Z!G dd dej Z"G dd dej Z#G dd dej Z$G dd dej Z%e%Z&dS )z?Inference-only GPT-2 model compatible with HuggingFace weights.    )IterableOptionalTupleTypeN)nn)
GPT2Config)$get_tensor_model_parallel_world_size)NewGELU)ColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)RadixAttention)VocabParallelEmbedding)ForwardBatch)default_weight_loader)
add_prefixc                	       sR   e Zd Z		ddededee def fddZd	e	j
d
ede	j
fddZ  ZS )GPT2AttentionN layer_idconfigquant_configprefixc              	      s   t    |j| _|j}t }|| dksJ || | _| j| | _| jd | _t| j| j|d|t	d|d| _
t| j| jd|t	d|d| _t| j| j| j|||d| _d S )Nr   g      Tc_attnbiasr   r   c_proj)scalingnum_kv_headsr   r   )super__init__hidden_sizenum_attention_headsr   	num_headshead_dimscaler   r   r   r   r   r   attn)selfr   r   r   r   total_num_heads tensor_model_parallel_world_size	__class__ J/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/gpt2.pyr!   -   s>   

zGPT2Attention.__init__hidden_statesforward_batchreturnc           	      C   sD   |  |\}}|jddd\}}}| ||||}| |\}}|S )N   )chunksdim)r   chunkr'   r   )	r(   r/   r0   qkv_qkvattn_outputr-   r-   r.   forwardU   s
   zGPT2Attention.forwardNr   )__name__
__module____qualname__intr   r   r   strr!   torchTensorr   r=   __classcell__r-   r-   r+   r.   r   +   s&    (r   c                       sZ   e Zd Zeddfdededeej de	e
 def
 fdd	Zd
ejdejfddZ  ZS )GPT2MLPNr   intermediate_sizer   	act_layerr   r   c                    sP   t    |j}t||d|td|d| _t||d|td|d| _| | _d S )NTc_fcr   r   )	r    r!   r"   r
   r   rJ   r   r   act)r(   rH   r   rI   r   r   r"   r+   r-   r.   r!   c   s"   
zGPT2MLP.__init__r/   r1   c                 C   s*   |  |\}}| |}| |\}}|S N)rJ   rK   r   )r(   r/   r8   r-   r-   r.   r=   }   s   
zGPT2MLP.forward)r?   r@   rA   r	   rB   r   r   r   Moduler   r   rC   r!   rD   rE   r=   rF   r-   r-   r+   r.   rG   a   s(    rG   c                       s^   e Zd Zeddfdededeej de	e
 def
 fdd	Zd
ejdedejfddZ  ZS )	GPT2BlockNr   r   r   rI   r   r   c                    s   t    |j}|jd ur|jnd| }tj||jd| _t|||t	d|d| _
tj||jd| _t||||t	d|d| _d S )N   epsr'   r   mlp)rI   r   r   )r    r!   r"   n_innerr   	LayerNormlayer_norm_epsilonln_1r   r   r'   ln_2rG   rS   )r(   r   r   rI   r   r   r"   	inner_dimr+   r-   r.   r!      s   
zGPT2Block.__init__r/   r0   r1   c                 C   sH   |}|  |}| j||d}|| }|}| |}| |}|| }|S )N)r/   r0   )rW   r'   rX   rS   )r(   r/   r0   residualr<   feed_forward_hidden_statesr-   r-   r.   r=      s   


zGPT2Block.forward)r?   r@   rA   r	   rB   r   r   r   rM   r   r   rC   r!   rD   rE   r   r=   rF   r-   r-   r+   r.   rN      s,    rN   c                       sT   e Zd Z		ddedee def fddZdej	d	ej	d
e
dej	fddZ  ZS )	GPT2ModelNr   r   r   r   c                    s   t     | _ jrJ  jrJ  jrJ  j| _t j	| j| _
t j| j| _t fddt jD | _tj| j jd| _d S )Nc              	      s(   g | ]}t | td | dqS )zh.)r   r   )rN   r   ).0ir   r   r   r-   r.   
<listcomp>   s    z&GPT2Model.__init__.<locals>.<listcomp>rP   )r    r!   r   add_cross_attentionscale_attn_by_inverse_layer_idxreorder_and_upcast_attnr"   	embed_dimr   
vocab_sizewter   	Embeddingmax_position_embeddingswpe
ModuleListrangenum_hidden_layershrU   rV   ln_fr(   r   r   r   r+   r_   r.   r!      s   



zGPT2Model.__init__	input_idsposition_idsr0   r1   c           	      C   sR   |  |}| |}|| }tt| jD ]}| j| }|||}q| |}|S rL   )rf   ri   rk   lenrm   rn   )	r(   rp   rq   r0   inputs_embedsposition_embedsr/   r^   layerr-   r-   r.   r=      s   



zGPT2Model.forwardr>   )r?   r@   rA   r   r   r   rC   r!   rD   rE   r   r=   rF   r-   r-   r+   r.   r\      s&    r\   c                       sp   e Zd Z		ddedee def fddZdej	d	ej	d
e
dej	fddZdeeeej	f  fddZ  ZS )GPT2LMHeadModelNr   r   r   r   c                    sD   t    || _|| _t||td|d| _| jj| _t	|| _
d S )NtransformerrR   )r    r!   r   r   r\   r   rw   rf   lm_headr   logits_processorro   r+   r-   r.   r!      s   

zGPT2LMHeadModel.__init__rp   	positionsr0   r1   c                 C   s    |  |||}| ||| j|S rL   )rw   ry   rx   )r(   rp   rz   r0   r/   r-   r-   r.   r=      s   
zGPT2LMHeadModel.forwardweightsc                 C   s   t | jdd}|D ]>\}}d|v rq
d|v sd|v rq
|ds%d| }|| }dD ]}||vr2q+|ds8q+| }q+t|d	t}||| q
d S )
NF)remove_duplicatezlm_head.weightz
.attn.biasz.attn.masked_biasztransformer.)r   r   rJ   z.weightweight_loader)dictnamed_parameters
startswithendswithtgetattrr   )r(   r{   params_dictnameloaded_weightparamconv1d_weight_namer}   r-   r-   r.   load_weights  s$   


zGPT2LMHeadModel.load_weightsr>   )r?   r@   rA   r   r   r   rC   r!   rD   rE   r   r=   r   r   r   rF   r-   r-   r+   r.   rv      s(    
$rv   )'__doc__typingr   r   r   r   rD   r   transformersr   %sglang.srt.distributed.parallel_stater   sglang.srt.layers.activationr	   sglang.srt.layers.linearr
   r   r   "sglang.srt.layers.logits_processorr   *sglang.srt.layers.quantization.base_configr   !sglang.srt.layers.radix_attentionr   *sglang.srt.layers.vocab_parallel_embeddingr   ,sglang.srt.model_executor.forward_batch_infor   $sglang.srt.model_loader.weight_utilsr   sglang.srt.utilsr   rM   r   rG   rN   r\   rv   
EntryClassr-   r-   r-   r.   <module>   s*   6&109