o
    پi{>                     @   s  d dl mZmZmZmZ d dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZmZmZ d dlmZmZmZ d dlmZ d d	lmZmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dl m!Z! dZ"G dd dej#Z$G dd dej#Z%G dd dej#Z&G dd dej#Z'G dd dej#Z(G dd dej#Z)G dd dej#Z*G dd dej#Z+G dd  d ej#Z,G d!d" d"ej#Z-G d#d$ d$e-Z.G d%d& d&ej#Z/e-e.e/gZ0dS )'    )IterableOptionalSetTupleN)nn)$get_tensor_model_parallel_world_size)
get_act_fn)ColumnParallelLinearQKVParallelLinearRowParallelLinear)CrossEncodingPoolerPoolerPoolingType)QuantizationConfig)AttentionTypeRadixAttention)VocabParallelEmbedding)ForwardBatch)default_weight_loader)get_global_server_args)
add_prefixc                       sB   e Zd Zdef fddZdejdejdedejfdd	Z  Z	S )
BertEmbeddingconfigc                    s   t    |j| _t|j|j| _t|j|j| _t|j	|j| _
tj|j|jd| _ttd|jf| _|j| _| jdkrDtdd S )Neps   absolutez4Only 'absolute' position_embedding_type is supported)super__init__hidden_sizesizer   
vocab_sizeword_embeddingsmax_position_embeddingsposition_embeddingstype_vocab_sizetoken_type_embeddingsr   	LayerNormlayer_norm_eps	Parametertorchemptyposition_idsposition_embedding_type
ValueErrorselfr   	__class__ J/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/bert.pyr      s*   

zBertEmbedding.__init__	input_ids	positionsforward_batchreturnc           
      C   sb   |  }| |}| |}|j}|d u rtj|tj|jd}| |}|| | }	| 	|	}	|	S )N)dtypedevice)
r    r"   r$   token_type_idsr*   zeroslongr:   r&   r'   )
r0   r5   r6   r7   input_shapeinputs_embedsr$   r;   r&   
embeddingsr3   r3   r4   forward4   s   




zBertEmbedding.forward
__name__
__module____qualname__
BertConfigr   r*   Tensorr   rA   __classcell__r3   r3   r1   r4   r      s    r   c                       s<   e Zd Zdef fddZdejdedejfddZ  Z	S )	
BertPoolerr   c                    s*   t    t|j|j| _t | _d S N)r   r   r   Linearr   denseTanh
activationr/   r1   r3   r4   r   R   s   
zBertPooler.__init__hidden_statesr7   r8   c                 C   s(   |dd d f }|  |}| |}|S )Nr   )rL   rN   )r0   rO   r7   first_token_tensorpooled_outputr3   r3   r4   rA   W   s   

zBertPooler.forwardrB   r3   r3   r1   r4   rI   P   s    rI   c                       sN   e Zd Z		ddedee def fddZdej	d	e
d
ej	fddZ  ZS )BertEncoderN r   quant_configprefixc                    s>   t     | _| _t fddt jD | _d S )Nc              	      s&   g | ]}t  | d | dqS )z.layer.)r   layer_idrT   rU   )	BertLayer).0	layer_idxr   rU   rT   r3   r4   
<listcomp>o   s    z(BertEncoder.__init__.<locals>.<listcomp>)	r   r   r   rT   r   
ModuleListrangenum_hidden_layerslayerr0   r   rT   rU   r1   rZ   r4   r   e   s   

zBertEncoder.__init__rO   r7   r8   c                 C   s   | j D ]}|||}q|S rJ   )r_   )r0   rO   r7   r_   r3   r3   r4   rA   z   s   
zBertEncoder.forwardNrS   )rC   rD   rE   rF   r   r   strr   r*   rG   r   rA   rH   r3   r3   r1   r4   rR   c   s"    rR   c                	       sN   e Zd Z			ddededee def fdd	Zd
e	j
defddZ  ZS )rW   r   NrS   r   rV   rT   rU   c                    sv   t    || _t|j|j||j|| dd| _t|j|j	|j
|| dd| _t|j|j	|j|| dd| _d S )Nz
.attention)r   num_attention_headsrV   r(   rT   rU   z.intermediate)r   intermediate_size
hidden_actrT   rU   .output)r   rd   r(   rT   rU   )r   r   rV   BertAttentionr   rc   r(   	attentionBertIntermediaterd   re   intermediate
BertOutputoutput)r0   r   rV   rT   rU   r1   r3   r4   r      s0   
	zBertLayer.__init__rO   r7   c                 C   s&   |  ||}| |}| ||}|S rJ   )rh   rj   rl   )r0   rO   r7   attn_outputintermediate_outputrl   r3   r3   r4   rA      s   
zBertLayer.forwardr   NrS   )rC   rD   rE   rF   intr   r   rb   r   r*   rG   r   rA   rH   r3   r3   r1   r4   rW      s    $rW   c                       s\   e Zd Z			ddededededee d	ef fd
dZde	j
dede	j
fddZ  ZS )rg   r   NrS   r   rc   r(   rV   rT   rU   c                    s@   t    t||||| dd| _t|||| dd| _d S )Nrf   )r   rc   rV   rT   rU   )r   r(   rT   rU   )r   r   BertSelfAttention	self_attnBertSelfOutputrl   )r0   r   rc   r(   rV   rT   rU   r1   r3   r4   r      s   
	zBertAttention.__init__rO   r7   r8   c                 C   s   |  ||}| ||S rJ   )rr   rl   )r0   rO   r7   self_outputr3   r3   r4   rA      s   zBertAttention.forwardro   )rC   rD   rE   rp   floatr   r   rb   r   r*   rG   r   rA   rH   r3   r3   r1   r4   rg      s0    rg   c                       sX   e Zd Z			ddedededee def
 fd	d
Zdej	de
dej	fddZ  ZS )rq   r   NrS   r   rc   rV   rT   rU   c              	      s   t    || _t }|| _| j| dksJ | j| | _| j| _| j| j | _| j| j | jks3J td| j| | _	| j| j | _
| j	| j | _| jd | _t| j| j| j| jd|| dd| _t| j| j| j| j	|| dtjd| _d S )	Nr   r   g      Tz	.qkv_proj)r   	head_sizetotal_num_headstotal_num_kv_headsbiasrT   rU   z.attn)	num_headshead_dimscalingnum_kv_headsrV   rU   	attn_type)r   r   r   r   rw   rz   rx   r{   maxr}   q_sizekv_sizer|   r
   qkv_projr   r   ENCODER_ONLYattn)r0   r   rc   rV   rT   rU   tp_sizer1   r3   r4   r      s>   

zBertSelfAttention.__init__rO   r7   r8   c           	      C   sB   |  |\}}|j| j| j| jgdd\}}}| ||||}|S )N)dim)r   splitr   r   r   )	r0   rO   r7   qkv_qkvrl   r3   r3   r4   rA     s    zBertSelfAttention.forwardro   )rC   rD   rE   rp   r   r   rb   r   r*   rG   r   rA   rH   r3   r3   r1   r4   rq      s,    -rq   c                	       sT   e Zd Z		ddededee def fddZd	e	j
d
e	j
de	j
fddZ  ZS )rs   NrS   r   r(   rT   rU   c                    s8   t    t||d|| dd| _tj||d| _d S NT.dense
input_sizeoutput_sizery   rT   rU   r   r   r   r   rL   r   r'   )r0   r   r(   rT   rU   r1   r3   r4   r     s   
zBertSelfOutput.__init__rO   input_tensorr8   c                 C       |  |\}}| || }|S rJ   rL   r'   r0   rO   r   r   r3   r3   r4   rA        zBertSelfOutput.forwardra   rC   rD   rE   rp   ru   r   r   rb   r   r*   rG   rA   rH   r3   r3   r1   r4   rs     s&    rs   c                       sR   e Zd Z		ddedededee def
 fdd	Zd
ej	dej	fddZ
  ZS )ri   NrS   r   rd   re   rT   rU   c                    s2   t    t||d|| dd| _t|| _d S )NTr   r   )r   r   r	   rL   r   intermediate_act_fn)r0   r   rd   re   rT   rU   r1   r3   r4   r   (  s   
zBertIntermediate.__init__rO   r8   c                 C   s   |  |\}}| |}|S rJ   )rL   r   )r0   rO   r   r3   r3   r4   rA   :  s   
zBertIntermediate.forwardra   )rC   rD   rE   rp   rb   r   r   r   r*   rG   rA   rH   r3   r3   r1   r4   ri   &  s    ri   c                       sX   e Zd Z		ddedededee def
 fdd	Zd
e	j
de	j
de	j
fddZ  ZS )rk   NrS   r   rd   r(   rT   rU   c                    s8   t    t||d|| dd| _tj||d| _d S r   r   )r0   r   rd   r(   rT   rU   r1   r3   r4   r   B  s   
zBertOutput.__init__rO   r   r8   c                 C   r   rJ   r   r   r3   r3   r4   rA   V  r   zBertOutput.forwardra   r   r3   r3   r1   r4   rk   @  s*    rk   c                       s   e Zd Zdddddedee dedef fd	d
Ze	
 		dde	jde	jdede	jdede	jfddZdeeee	jf  dee fddZ  ZS )	BertModelNFrS   )rT   use_bert_poolerrU   r   rT   r   rU   c                   sp   t    || _|| _t|| _t||td|d| _t	 j
r"tjntj}| jr/t|| _d S t|dd| _d S )Nencoder)r   rT   rU   T)pooling_type	normalize)r   r   r   r   r   r@   rR   r   r   r   is_embeddingr   CLSLASTrI   r   pooler)r0   r   rT   r   rU   r   r1   r3   r4   r   `  s$   


zBertModel.__init__r5   r6   r7   input_embedsget_embeddingr8   c                 C   s@   |dksJ | j |||d}| j||d}| js| ||}|S )NT)r5   r6   r7   )r7   )r@   r   r   r   r0   r5   r6   r7   r   r   rO   r3   r3   r4   rA   |  s   	zBertModel.forwardweightsc                 C   s   g d}t |  }|D ]V\}}|dd}| jsd|v rq|D ](\}}}||vr*q |||}|dr:||vr:q || }	|	j}
|
|	||  n|drS||vrSq|| }	t|	dt}
|
|	| qd S )N))r   queryr   )r   keyr   )r   valuer   r0   rr   r   z.biasweight_loader)dictnamed_parametersreplacer   endswithr   getattrr   )r0   r   stacked_params_mappingparams_dictnameloaded_weight
param_nameweight_nameshard_idparamr   r3   r3   r4   load_weights  s.   
zBertModel.load_weightsNF)rC   rD   rE   rF   r   r   boolrb   r   r*   no_gradrG   r   rA   r   r   r   r   rH   r3   r3   r1   r4   r   ^  s<    ,r   c                   @   s   e Zd ZdS )
ContrieverN)rC   rD   rE   r3   r3   r3   r4   r     s    r   c                       s   e Zd Zddddedee def fddZd	ee	ee
jf  fd
dZ		dde
jde
jdede
jdede
jfddZ  ZS )BertForSequenceClassificationNrS   )rT   rU   r   rT   rU   c                   sT   t    |j| _t||dtd|d| _t|j|j| _	t
|| j	| jj| _d S )NTbert)r   rT   r   rU   )r   r   
num_labelsr   r   r   r   rK   r   
classifierr   r   r`   r1   r3   r4   r     s   
z&BertForSequenceClassification.__init__r   c                    sf   g   fdd}| j |  t|  } D ]\}}|dr0|| }t|dt}||| qd S )Nc                  3   sD    D ]\} }|  dr| tdd  |fV  q | |f qd S )Nzbert.)
startswithlenappend)r   weightself_weightsr   r3   r4   weight_filter  s   
zABertForSequenceClassification.load_weights.<locals>.weight_filterr   r   )r   r   r   r   r   r   r   )r0   r   r   r   r   r   r   r   r3   r   r4   r     s   

z*BertForSequenceClassification.load_weightsFr5   r6   r7   r   r   r8   c                 C   s,   |dksJ | j |||||d}| ||S )NT)r5   r6   r7   r   r   )r   r   r   r3   r3   r4   rA     s   z%BertForSequenceClassification.forwardr   )rC   rD   rE   rF   r   r   rb   r   r   r   r*   rG   r   r   r   rA   rH   r3   r3   r1   r4   r     s4    r   )1typingr   r   r   r   r*   r   sglang.srt.distributedr   sglang.srt.layers.activationr   sglang.srt.layers.linearr	   r
   r   sglang.srt.layers.poolerr   r   r   *sglang.srt.layers.quantization.base_configr   !sglang.srt.layers.radix_attentionr   r   *sglang.srt.layers.vocab_parallel_embeddingr   ,sglang.srt.model_executor.forward_batch_infor   $sglang.srt.model_loader.weight_utilsr   sglang.srt.server_argsr   sglang.srt.utilsr   rF   Moduler   rI   rR   rW   rg   rq   rs   ri   rk   r   r   r   
EntryClassr3   r3   r3   r4   <module>   s8   6.#8Y=