o
    پia*                     @   s.  d Z ddlmZmZmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ ddlmZmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZmZ ddlmZ ddlmZm Z  ddl!m"Z" G dd dej#Z$G dd dej#Z%G dd dej#Z&G dd dej#Z'G dd dej#Z(e(Z)dS )z?Inference-only GPT-J model compatible with HuggingFace weights.    )IterableOptionalTupleN)nn)
GPTJConfig)$get_tensor_model_parallel_world_size)
get_act_fn)ColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)RadixAttention)get_rope)ParallelLMHeadVocabParallelEmbedding)ForwardBatch)default_weight_loadermaybe_remap_kv_scale_name)
add_prefixc                	       X   e Zd Z		ddededee def fddZd	e	j
d
e	j
dede	j
fddZ  ZS )GPTJAttentionN layer_idconfigquant_configprefixc              	      s   t    |j}|j}|| }t|||d|td|d| _t||d|td|d| _t	 }|| dks5J || }	|d }
t
|ddsEJ |jd	 dksNJ t
|d
d}t
|dd}t||j||dd| _t|	||
|	||d| _d S )NFqkv_proj)biasr   r   out_projr   g      rotaryT   
rope_thetai'  max_position_embeddingsi    )
rotary_dimmax_positionbaseis_neox_style)scalingnum_kv_headsr   r   )super__init__num_attention_headshidden_sizer
   r   r   r   r   r   getattrr$   r   
rotary_embr   attn)selfr   r   r   r   total_num_headsr-   head_dim tensor_model_parallel_world_size	num_headsr(   r"   r#   	__class__ K/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/gpt_j.pyr+   2   sT   
zGPTJAttention.__init__	positionshidden_statesforward_batchreturnc           
      C   sV   |  |\}}|jddd\}}}| |||\}}| ||||}	| |	\}	}|	S )N   )chunksdim)r   chunkr/   r0   r   )
r1   r:   r;   r<   qkv_qkvattn_outputr8   r8   r9   forwardg   s   zGPTJAttention.forwardNr   __name__
__module____qualname__intr   r   r   strr+   torchTensorr   rI   __classcell__r8   r8   r6   r9   r   0   s*    5r   c                	       sN   e Zd Z		ddededee def fddZd	e	j
d
e	j
fddZ  ZS )GPTJMLPNr   intermediate_sizer   r   r   c                    sP   t    |j}t|||td|d| _t|||td|d| _t|j	| _
d S )Nfc_inr   r   fc_out)r*   r+   n_embdr	   r   rV   r   rX   r   activation_functionact)r1   rU   r   r   r   r-   r6   r8   r9   r+   w   s   
zGPTJMLP.__init__r;   r=   c                 C   s*   |  |\}}| |}| |\}}|S N)rV   r[   rX   )r1   r;   rD   r8   r8   r9   rI      s   
zGPTJMLP.forwardrJ   )rL   rM   rN   rO   r   r   r   rP   r+   rQ   rR   rI   rS   r8   r8   r6   r9   rT   u   s    rT   c                	       r   )	GPTJBlockNr   r   r   r   r   c                    sl   t    |jd u rd|j n|j}tj|j|jd| _t|||t	d|d| _
t|||t	d|d| _d S )N   epsr0   r   mlprW   )r*   r+   n_innerrY   r   	LayerNormlayer_norm_epsilonln_1r   r   r0   rT   rb   )r1   r   r   r   r   	inner_dimr6   r8   r9   r+      s   
zGPTJBlock.__init__r:   r;   r<   r=   c                 C   s8   |}|  |}| j|||d}| |}|| | }|S )N)r:   r;   r<   )rf   r0   rb   )r1   r:   r;   r<   residualrH   
mlp_outputr8   r8   r9   rI      s   

zGPTJBlock.forwardrJ   rK   r8   r8   r6   r9   r]      s*    r]   c                       sx   e Zd Z		ddedee def fddZdej	d	ej	fd
dZ
	ddej	dej	dedeej	 d	ej	f
ddZ  ZS )	GPTJModelNr   r   r   r   c                    sX   t     j}t j|| _t fddt j	D | _
tj| jd| _d S )Nc              	      s(   g | ]}t | td | dqS )zh.rW   )r]   r   ).0ir   r   r   r8   r9   
<listcomp>   s    z&GPTJModel.__init__.<locals>.<listcomp>r_   )r*   r+   rY   r   
vocab_sizewter   
ModuleListrangen_layerhrd   re   ln_f)r1   r   r   r   	embed_dimr6   rm   r9   r+      s   
zGPTJModel.__init__	input_idsr=   c                 C   s
   |  |S r\   )rp   )r1   rw   r8   r8   r9   get_input_embeddings   s   
zGPTJModel.get_input_embeddingsr:   r<   inputs_embedsc                 C   s>   |d ur|}n|  |}| jD ]}||||}q| |}|S r\   )rx   rt   ru   )r1   rw   r:   r<   ry   r;   layerr8   r8   r9   rI      s   


zGPTJModel.forwardrJ   r\   )rL   rM   rN   r   r   r   rP   r+   rQ   rR   rx   r   rI   rS   r8   r8   r6   r9   rj      s.    rj   c                       s~   e Zd Z		ddedee def fddZ	ddej	d	ej	d
e
deej	 dej	f
ddZdeeeej	f  fddZ  ZS )GPTJForCausalLMNr   r   r   r   c                    sT   t    |jr
J || _t||td|d| _t|j|j	d|d| _
t|| _d S )Ntransformerra   T)r   r   )r*   r+   tie_word_embeddingsr   rj   r   r|   r   ro   rY   lm_headr   logits_processor)r1   r   r   r   r6   r8   r9   r+      s   

zGPTJForCausalLM.__init__rw   r:   r<   ry   r=   c                 C   s"   |  ||||}| ||| j|S r\   )r|   r   r~   )r1   rw   r:   r<   ry   r;   r8   r8   r9   rI   	  s   
zGPTJForCausalLM.forwardweightsc                 C   s&  g d}t |  }|D ]\}}d|v sd|v rq| jd urB| j| }rB|| }t|dt}| dkr8|n|d }||| q|D ](\}	}
}|
|vrNqD||
|	}|dr^||vr^qD|| }|j	}||||  n#t
||}|d u rwq|dr||vrq|| }t|dt}||| qd S )N))r   q_projrE   )r   k_projrF   )r   v_projrG   z	attn.biaszattn.masked_biasweight_loaderr   z.bias)dictnamed_parametersr   get_cache_scaler.   r   rA   replaceendswithr   r   )r1   r   stacked_params_mappingparams_dictnameloaded_weight
scale_nameparamr   
param_nameweight_nameshard_idr8   r8   r9   load_weights  sD   



zGPTJForCausalLM.load_weightsrJ   r\   )rL   rM   rN   r   r   r   rP   r+   rQ   rR   r   rI   r   r   r   rS   r8   r8   r6   r9   r{      s.    
$r{   )*__doc__typingr   r   r   rQ   r   transformersr   %sglang.srt.distributed.parallel_stater   sglang.srt.layers.activationr   sglang.srt.layers.linearr	   r
   r   "sglang.srt.layers.logits_processorr   *sglang.srt.layers.quantization.base_configr   !sglang.srt.layers.radix_attentionr   "sglang.srt.layers.rotary_embeddingr   *sglang.srt.layers.vocab_parallel_embeddingr   r   ,sglang.srt.model_executor.forward_batch_infor   $sglang.srt.model_loader.weight_utilsr   r   sglang.srt.utilsr   Moduler   rT   r]   rj   r{   
EntryClassr8   r8   r8   r9   <module>   s,   E!+0U