o
    
۾i1                     @   sp  d Z ddlmZ ddlmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZmZ dd	lmZmZ dd
lmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ ddlmZ ddlm Z m!Z! ddl"m#Z#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z*m+Z+m,Z,m-Z-m.Z. G dd dej/Z0G dd dej/Z1G dd dej/Z2e
G dd dej/Z3G dd dej/e(Z4dS ) z?Inference-only GPT-J model compatible with HuggingFace weights.    )Iterable)isliceN)nn)
GPTJConfig)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)
get_act_fn)	Attention)ColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)IntermediateTensors   )
SupportsPP)AutoWeightsLoaderis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                	       Z   e Zd Z			ddededB dedB def fddZd	ej	d
ej	dej	fddZ
  ZS )GPTJAttentionN configcache_configquant_configprefixc           	         s  t    |j| _|j| _| j| j | _t|j| j| jd|| dd| _t|j|jd|| dd| _	t
 }| j| dks@J | j| | _| jd }t|ddsSJ |jd	 dks\J t|d
i }|j| j |d< t|dd}t| j||dd| _t| j| j|||| dd| _d S )NFz	.qkv_projbiasr$   r%   z	.out_projr   g      rotaryT   rope_parameterspartial_rotary_factormax_position_embeddingsi    )max_positionr*   is_neox_style.attn)r#   r$   r%   )super__init__num_attention_headstotal_num_headshidden_size	head_sizer   qkv_projr   out_projr
   	num_headsgetattr
rotary_dimr   
rotary_embr   attn)	selfr"   r#   r$   r%   tp_world_sizescalingr*   r,   	__class__ T/home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/gpt_j.pyr1   ?   sT   

zGPTJAttention.__init__position_idshidden_statesreturnc           	      C   sT   |  |\}}|jddd\}}}| |||\}}| |||}| |\}}|S )N   )chunksdim)r6   chunkr;   r<   r7   )	r=   rD   rE   qkv_qkvattn_outputrB   rB   rC   forwardt   s   zGPTJAttention.forwardNNr!   __name__
__module____qualname__r   r   r   strr1   torchTensorrR   __classcell__rB   rB   r@   rC   r    >   s(    5r    c                	       sN   e Zd Z		ddedededB def fddZd	ej	d
ej	fddZ
  ZS )GPTJMLPNr!   intermediate_sizer"   r$   r%   c                    sP   t    |j}t|||| dd| _t|||| dd| _t|j| _	d S )Nz.fc_in)r$   r%   z.fc_out)
r0   r1   n_embdr   fc_inr   fc_outr   activation_functionact)r=   r]   r"   r$   r%   r4   r@   rB   rC   r1      s   
zGPTJMLP.__init__rE   rF   c                 C   s*   |  |\}}| |}| |\}}|S N)r_   rb   r`   )r=   rE   rM   rB   rB   rC   rR      s   
zGPTJMLP.forward)Nr!   )rU   rV   rW   intr   r   rX   r1   rY   rZ   rR   r[   rB   rB   r@   rC   r\      s    r\   c                	       r   )	GPTJBlockNr!   r"   r#   r$   r%   c                    sl   t    |jd u rd|j n|j}tj|j|jd| _t|||| dd| _	t
|||| dd| _d S )N   epsr/   r%   z.mlp)r0   r1   n_innerr^   r   	LayerNormlayer_norm_epsilonln_1r    r<   r\   mlp)r=   r"   r#   r$   r%   	inner_dimr@   rB   rC   r1      s   
zGPTJBlock.__init__rD   rE   rF   c                 C   s6   |}|  |}| j||d}| |}|| | }|S )N)rD   rE   )rm   r<   rn   )r=   rD   rE   residualrQ   
mlp_outputrB   rB   rC   rR      s   

zGPTJBlock.forwardrS   rT   rB   rB   r@   rC   re      s(    re   c                       s   e Zd Zdddedef fddZdejdejfd	d
Z	ddejdB dejde	dB dejdB deje	B f
ddZ
deeeejf  dee fddZ  ZS )	GPTJModelr!   ri   vllm_configr%   c                   s   t    |jj|j |j| _| _j| _t	j
| j| _tj fdd| dd\| _| _| _tj| jjd| _tdgj| _d S )Nc                    s   t  | dS )Nri   )re   ri   r#   r"   r$   rB   rC   <lambda>   s    z$GPTJModel.__init__.<locals>.<lambda>z.hri   rg   rE   )r0   r1   model_config	hf_configr#   r$   r"   r^   	embed_dimr   
vocab_sizewter   n_layerstart_layer	end_layerhr   rk   rl   ln_fr   make_empty_intermediate_tensors)r=   rs   r%   r@   rt   rC   r1      s(   

zGPTJModel.__init__	input_idsrF   c                 C   s
   |  |S rc   )rz   r=   r   rB   rB   rC   embed_input_ids   s   
zGPTJModel.embed_input_idsNrD   intermediate_tensorsinputs_embedsc                 C   sn   t  jr|d ur|}n
| |}n|d }t| j| j| jD ]}|||}qt  js0td|iS | 	|}|S )NrE   )
r	   is_first_rankr   r   r~   r|   r}   is_last_rankr   r   )r=   r   rD   r   r   rE   layerrB   rB   rC   rR      s   
zGPTJModel.forwardweightsc                 C   sX  g d}t |  }t }|D ]\}}d|v sd|v rq| jd urJ| j| }rJ|| }t|dt}	| dkr;|n|d }|	|| || q|D ].\}
}}||vrVqL|	||
}|
drf||vrfqLt|| rlqL|| }|j}	|	|||  n)t||}|d u rq|
dr||vrqt|| rq|| }t|dt}	|	|| || q|S )N))r6   q_projrN   )r6   k_projrO   )r6   v_projrP   )gate_up_proj	gate_projr   )r   up_projr   z	attn.biaszattn.masked_biasweight_loaderr   z.bias)dictnamed_parameterssetr$   get_cache_scaler9   r   rJ   addreplaceendswithr   r   r   )r=   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
scale_nameparamr   
param_nameweight_nameshard_idrB   rB   rC   load_weights   sP   






zGPTJModel.load_weightsrc   )rU   rV   rW   r   rX   r1   rY   rZ   r   r   rR   r   tupler   r   r[   rB   rB   r@   rC   rr      s     
,rr   c                       s   e Zd Zdddedef fddZdejdejfd	d
Z		ddejdB dejde	dB dejdB deje	B f
ddZ
dejdejdB fddZdeeeejf  dee fddZ  ZS )GPTJForCausalLMr!   ri   rs   r%   c                   sz   t    |jj}|j}|| _|| _|jrJ t|t|dd| _	t
|j|jd|t|dd| _t|j| _| j	j| _d S )Ntransformer)rs   r%   Tlm_headr&   )r0   r1   rv   rw   r$   r"   tie_word_embeddingsrr   r   r   r   ry   r^   r   r   logits_processorr   )r=   rs   r%   r"   r$   r@   rB   rC   r1   ,  s&   


zGPTJForCausalLM.__init__r   rF   c                 C   s   | j |S rc   )r   r   r   rB   rB   rC   r   B  s   zGPTJForCausalLM.embed_input_idsN	positionsr   r   c                 C   s   |  ||||}|S rc   )r   )r=   r   r   r   r   rE   rB   rB   rC   rR   E  s   zGPTJForCausalLM.forwardrE   c                 C   s   |  | j|| jj}|S rc   )r   r   r'   )r=   rE   logitsrB   rB   rC   compute_logitsQ  s   zGPTJForCausalLM.compute_logitsr   c                 C   s   t | }||S rc   )r   r   )r=   r   loaderrB   rB   rC   r   X  s   
zGPTJForCausalLM.load_weights)NN)rU   rV   rW   r   rX   r1   rY   rZ   r   r   rR   r   r   r   r   r   r[   rB   rB   r@   rC   r   +  s,    

,r   )5__doc__collections.abcr   	itertoolsr   rY   r   transformersr   vllm.compilation.decoratorsr   vllm.configr   r   vllm.distributedr	   r
   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.attentionr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   r   vllm.sequencer   
interfacesr   utilsr   r   r   r   r   Moduler    r\   re   rr   r   rB   rB   rB   rC   <module>   s4   	C j