o
    -i^6                     @   s  d Z ddlmZ ddlmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZmZ dd
lmZmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ ddlmZ ddlm Z m!Z! ddl"m#Z# ddl$m%Z% ddl&m'Z'm(Z( ddl)m*Z*m+Z+m,Z,m-Z-m.Z. G dd dej/Z0G dd dej/Z1G dd dej/Z2eG dd dej/Z3G dd dej/e(Z4G d d! d!ej/e'Z5d"ee6e7ej8f  d#ee6e7ej8f  fd$d%Z9dS )&z?Inference-only GPT-2 model compatible with HuggingFace weights.    )Iterable)isliceN)nn)
GPT2Config)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)
get_act_fn)ColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)DispatchPooler)QuantizationConfig)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)IntermediateTensors   )SupportsCrossEncoding
SupportsPP)AutoWeightsLoaderis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                	       T   e Zd Z			ddededB dedB def fddZd	ej	d
ej	fddZ
  ZS )GPT2AttentionN configcache_configquant_configprefixc                    s   t    |j| _|j}t }|| dksJ || | _| j| | _| jd | _t| j| j|d|| dd| _	t
| j| jd|| dd| _t| j| j| j||| dd| _d S )	Nr   g      Tz.c_attnbiasr$   r%   .c_proj.attn)scaler#   r$   r%   )super__init__hidden_sizenum_attention_headsr   	num_headshead_dimr*   r   c_attnr   c_projr   attn)selfr"   r#   r$   r%   total_num_heads tensor_model_parallel_world_size	__class__ \/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/model_executor/models/gpt2.pyr,   @   s>   

zGPT2Attention.__init__hidden_statesreturnc                 C   sB   |  |\}}|jddd\}}}| |||}| |\}}|S )N   )chunksdim)r1   chunkr3   r2   )r4   r;   qkv_qkvattn_outputr9   r9   r:   forwardh   s
   zGPT2Attention.forwardNNr!   __name__
__module____qualname__r   r   r   strr,   torchTensorrH   __classcell__r9   r9   r7   r:   r    ?   s$    (r    c                	       sN   e Zd Z		ddedededB def fddZd	ej	d
ej	fddZ
  ZS )GPT2MLPNr!   intermediate_sizer"   r$   r%   c                    sT   t    |j}t||d|| dd| _t||d|| dd| _t|j| _	d S )NTz.c_fcr&   r(   )
r+   r,   r-   r   c_fcr   r2   r   activation_functionact)r4   rS   r"   r$   r%   r-   r7   r9   r:   r,   t   s"   
zGPT2MLP.__init__r;   r<   c                 C   s*   |  |\}}| |}| |\}}|S N)rT   rV   r2   )r4   r;   rC   r9   r9   r:   rH      s   
zGPT2MLP.forward)Nr!   )rK   rL   rM   intr   r   rN   r,   rO   rP   rH   rQ   r9   r9   r7   r:   rR   s   s    rR   c                	       r   )	GPT2BlockNr!   r"   r#   r$   r%   c                    s   t    |j}|jd ur|jnd| }tj||jd| _t|||| dd| _	tj||jd| _
t|||| dd| _d S )N   epsr)   r%   z.mlp)r+   r,   r-   n_innerr   	LayerNormlayer_norm_epsilonln_1r    r3   ln_2rR   mlp)r4   r"   r#   r$   r%   r-   	inner_dimr7   r9   r:   r,      s   
zGPT2Block.__init__r;   r<   c                 C   sF   |}|  |}| j|d}|| }|}| |}| |}|| }|S )N)r;   )ra   r3   rb   rc   )r4   r;   residualrG   feed_forward_hidden_statesr9   r9   r:   rH      s   


zGPT2Block.forwardrI   rJ   r9   r9   r7   r:   rY      s$    rY   c                       s   e Zd Zdddedef fddZdejdejfd	d
Zdejdejde	dB dejdB deje	B f
ddZ
deeeejf  dee fddZ  ZS )	GPT2Modelr!   r]   vllm_configr%   c                   s   t    |jj|j |j| _jrJ jrJ j	r!J j
| _tj| j| dd| _tj| j| _tj fdd| dd\| _| _| _tj| jjd| _tdgj| _d S )	Nz.wter$   r%   c                    s   t  | dS )Nr]   )rY   r]   r#   r"   r$   r9   r:   <lambda>   s    z$GPT2Model.__init__.<locals>.<lambda>z.hr]   r[   r;   )r+   r,   model_config	hf_configr#   r$   r"   add_cross_attentionscale_attn_by_inverse_layer_idxreorder_and_upcast_attnr-   	embed_dimr   
vocab_sizewter   	Embeddingmax_position_embeddingswper   num_hidden_layersstart_layer	end_layerhr_   r`   ln_fr   n_embdmake_empty_intermediate_tensors)r4   rh   r%   r7   rj   r:   r,      s2   




zGPT2Model.__init__	input_idsr<   c                 C   s
   |  |S rW   )rs   r4   r~   r9   r9   r:   embed_input_ids   s   
zGPT2Model.embed_input_idsposition_idsintermediate_tensorsNinputs_embedsc                 C   s   t  jr|d u r| |}| |}|| }n
|d usJ |d }t| j| j| jD ]}||}q*t  js;t	d|iS | 
|}|S )Nr;   )r
   is_first_rankr   rv   r   rz   rx   ry   is_last_rankr   r{   )r4   r~   r   r   r   position_embedsr;   layerr9   r9   r:   rH      s   




zGPT2Model.forwardweightsc           	      C   s   t | jdd}t }|D ];\}}d|v sd|v rqt|| r q|| }dD ]}||vr-q&|ds3q&| }q&t|dt}||| || q|S )NF)remove_duplicatez
.attn.biasz.attn.masked_bias)r1   r2   rT   z.weightweight_loader)	dictnamed_parameterssetr   endswithtgetattrr   add)	r4   r   params_dictloaded_paramsnameloaded_weightparamconv1d_weight_namer   r9   r9   r:   load_weights   s$   



zGPT2Model.load_weights)rK   rL   rM   r	   rN   r,   rO   rP   r   r   rH   r   tupler   r   rQ   r9   r9   r7   r:   rg      s    
,rg   c                       s   e Zd Zdddedef fddZdejdejfd	d
Z		ddejdejde	dB dejdB deje	B f
ddZ
dejdejdB fddZdeeeejf  dee fddZ  ZS )GPT2LMHeadModelr!   r]   rh   r%   c                   s   t    |jj}|j}|| _|| _t|t|dd| _t	| jj
| jj|| dd| _| jjr9| j| jj| _t|j
| _| jj| _d S )Ntransformerrh   r%   z.lm_headri   )r+   r,   rl   rm   r$   r"   rg   r   r   r   rr   r-   lm_headtie_word_embeddingstie_weightsrs   r   logits_processorr}   )r4   rh   r%   r"   r$   r7   r9   r:   r,     s&   

zGPT2LMHeadModel.__init__r~   r<   c                 C      | j |S rW   r   r   r   r9   r9   r:   r   (     zGPT2LMHeadModel.embed_input_idsN	positionsr   r   c                 C   s   |  ||||}|S rW   r   r4   r~   r   r   r   r;   r9   r9   r:   rH   +  s   zGPT2LMHeadModel.forwardr;   c                 C   s   |  | j|}|S rW   )r   r   )r4   r;   logitsr9   r9   r:   compute_logits7  s   zGPT2LMHeadModel.compute_logitsr   c                 C   s   t | }t|}||S rW   )r   _add_transformer_prefixr   r4   r   loaderr9   r9   r:   r   >  s   
zGPT2LMHeadModel.load_weightsNN)rK   rL   rM   r	   rN   r,   rO   rP   r   r   rH   r   r   r   r   r   rQ   r9   r9   r7   r:   r     s,    

,r   c                       s   e Zd ZdZdZdddedef fddZd	ej	d
ej	fddZ
deeeej	f  fddZ		dd	ej	dej	dedB dej	dB d
ej	f
ddZ  ZS )GPT2ForSequenceClassificationaq  GPT2 Model for sequence classification.

    This class expands GPT2Model with pooling and score functions - last token
    is being used for classification.

    Attributes:
        transformer: An instance of GPT2Model used for forward operations.
        score: A layer for calculating logits.
        _pooler: An instance of Pooler used for pooling operations.
    Tr!   r]   rh   r%   c                   sl   t    |jj}t|t|dd| _tj|j	|j
d|jjd| _|jj}|d us+J tj|| jd| _d S )Ngpt2r   F)r'   dtype)
classifier)r+   r,   rl   rm   rg   r   r   r   Linearr|   
num_labels
head_dtypescorepooler_configr   for_seq_clspooler)r4   rh   r%   r"   r   r7   r9   r:   r,   R  s   

z&GPT2ForSequenceClassification.__init__r~   r<   c                 C   r   rW   r   r   r9   r9   r:   r   d  r   z-GPT2ForSequenceClassification.embed_input_idsr   c                 C   s   t | }||S rW   )r   r   r   r9   r9   r:   r   g  s   
z*GPT2ForSequenceClassification.load_weightsNr   r   r   c                 C   s   | j ||||d}|S )N)r~   r   r   r   r   r   r9   r9   r:   rH   k  s   z%GPT2ForSequenceClassification.forwardr   )rK   rL   rM   __doc__is_pooling_modelr	   rN   r,   rO   rP   r   r   r   r   r   rH   rQ   r9   r9   r7   r:   r   D  s&    r   r   r<   c                 c   s:    | D ]\}}| ds| dsd| }||fV  qd S )Nztransformer.r   )
startswith)r   r   tensorr9   r9   r:   r   {  s   r   ):r   collections.abcr   	itertoolsr   rO   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr   r	   vllm.distributed.parallel_stater
   r   %vllm.model_executor.layers.activationr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   !vllm.model_executor.layers.poolerr   'vllm.model_executor.layers.quantizationr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   vllm.sequencer   
interfacesr   r   utilsr   r   r   r   r   Moduler    rR   rY   rg   r   r   r   rN   rP   r   r9   r9   r9   r:   <module>   s@   	4!%V47