o
    iV;                     @  s  d Z ddlmZ ddlZddlmZ ddlZddlmZ ddlm	Z	m
Z
 ddlmZmZmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZmZ ddlmZ ddlmZ ddlmZmZ ddl m!Z! ddl"m#Z# ddl$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z* ddl+m,Z, ddl-m.Z. g dddgdZ/d(ddZ0G dd dej1Z2G d d! d!ej1Z3G d"d# d#ej1Z4G d$d% d%ej1Z5G d&d' d'ej1e#Z6dS ))z4Shared Step decoder blocks and the Step1 text model.    )annotationsN)Iterable)nn)CacheConfig
VllmConfig)get_pp_groupget_tensor_model_parallel_rank$get_tensor_model_parallel_world_size)
SiluAndMul)	Attention)RMSNorm)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)
SupportsPP)AutoWeightsLoaderPPMissingLayeris_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefix)IntermediateTensors)AttentionType)q_projk_projv_proj	gate_projup_proj)qkv_projgate_up_projtotal_num_headsintreturntorch.Tensorc                 C  s   dt t |  }tjdd|  tjd}t|tjdd| tjd}|| krTtjdd|  tjd}| | }tjddd|  dtjd}tj	|t||gdd}|S )z+Reference ALiBi slopes used by Step models.   g       )dtype   g      r   dim)
mathfloorlog2torchtensorfloat32powarangeint32cat)r%   closest_power_of_2baseslopes
extra_basenum_remaining_headsextra_powers r>   V/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/model_executor/models/step1.py_get_step_alibi_slopes4   s4   


r@   c                      s0   e Zd Z			dd fd	d
ZdddZ  ZS )StepAttentionN cache_configCacheConfig | Nonequant_configQuantizationConfig | Noneprefixstrc                   s  t    |j| _t }|j| _| j| dksJ | j| | _| j| j | _t|dt|dd}|d u s8|dkr:d}|| _	| j	|krL| j	| dksKJ n	|| j	 dksUJ t
d| j	| | _t| j| j| j| j	t|dd|| dd| _| j| j | _| j| j | _t| j| j | jt|dd|| d	d
| _t }|| j }|d | j }	t| j||	 }
|
 }
| jd | _t| j| j| j| j|||
| ddtjd
| _d S )Nr   num_attention_groupsnum_key_value_headsr+   attention_biasF	.qkv_proj)hidden_size	head_sizer%   total_num_kv_headsbiasrE   rG   z.o_proj
input_sizeoutput_sizerP   rE   rG   g      z.attnT)num_kv_headsrC   rE   alibi_slopesrG   use_alibi_sqrt	attn_type)super__init__rM   r	   num_attention_headsr%   	num_headshead_dimgetattrrO   maxrT   r   r#   q_sizekv_sizer   o_projr   r@   tolistscaler   r   DECODERattn)selfconfigrC   rE   rG   tp_sizerO   tp_rank
head_starthead_endrU   	__class__r>   r?   rY   S   sj   


	


zStepAttention.__init__hidden_statesr(   r'   c           	      C  sN   |  |\}}|j| j| j| jgdd\}}}| |||}| |\}}|S )Nr,   )r#   splitr_   r`   re   ra   )	rf   rn   qkv_qkvattn_outputoutputr>   r>   r?   forward   s
    zStepAttention.forward)NNrB   )rC   rD   rE   rF   rG   rH   )rn   r(   r'   r(   __name__
__module____qualname__rY   rx   __classcell__r>   r>   rl   r?   rA   R   s    CrA   c                      s0   e Zd Z			dd fddZdddZ  ZS )StepMLPNrB   FrM   r&   intermediate_sizerE   rF   rG   rH   rP   boolc                   sN   t    t|||g||| dd| _t||||| dd| _t | _d S )N.gate_up_proj)rR   output_sizesrP   rE   rG   z
.down_projrQ   )rX   rY   r   r$   r   	down_projr
   act_fn)rf   rM   r   rE   rG   rP   rl   r>   r?   rY      s    
zStepMLP.__init__xr(   r'   c                 C  s*   |  |\}}| |}| |\}}|S N)r$   r   r   )rf   r   rr   r>   r>   r?   rx      s   
zStepMLP.forward)NrB   F)
rM   r&   r   r&   rE   rF   rG   rH   rP   r   )r   r(   r'   r(   ry   r>   r>   rl   r?   r~      s    r~   c                      s4   e Zd Zdd fddZdddZdddZ  ZS )StepDecoderLayerrB   vllm_configr   rG   rH   c              	     s   t    |jj}|j}|j}|j| _t|||| dd| _t	| j|j
|| dt|ddd| _t| j|jd| _t| j|jd| _d S )Nz
.self_attn)rg   rC   rE   rG   z.mlpmlp_biasF)rM   r   rE   rG   rP   eps)rX   rY   model_config	hf_configrC   rE   rM   rA   	self_attnr~   r   r]   mlpr   rms_norm_epsinput_layernormpost_attention_layernorm)rf   r   rG   rg   rC   rE   rl   r>   r?   rY      s4   

zStepDecoderLayer.__init__	positionsr(   rn   residualtorch.Tensor | Noner'   !tuple[torch.Tensor, torch.Tensor]c                 C  sV   |d u r|}|  |}n|  ||\}}| j|d}| ||\}}| |}||fS )N)rn   )r   r   r   r   )rf   r   rn   r   r>   r>   r?   rx      s   
zStepDecoderLayer.forwardweights"Iterable[tuple[str, torch.Tensor]]set[str]c                 C  s   g d}t |  }t }|D ]Y\}}|D ].\}}}	||vrq|||}|dr/||vr/qt|| r5q|| }
|
j}||
||	  n|drN||vrNqt|| rTq|| }
t|
dt}||
| |	| q|S )N))rL   z.q_projrs   )rL   z.k_projrt   )rL   z.v_projru   )r   z
.gate_projr   )r   z.up_projr+   z.biasweight_loader)
dictnamed_parameterssetreplaceendswithr   r   r]   r   add)rf   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
param_nameweight_nameshard_idparamr   r>   r>   r?   load_weights   s2   


zStepDecoderLayer.load_weights)rB   r   r   rG   rH   )r   r(   rn   r(   r   r   r'   r   r   r   r'   r   )rz   r{   r|   rY   rx   r   r}   r>   r>   rl   r?   r      s    
r   c                      s<   e Zd Zddd fddZdddZ	ddddZ  ZS )StepDecoderModelrB   rG   r   r   rG   rH   c                  s   t     jj} j}|| _|| _t js|jr(t j	r(t
|j|j|d| _nt | _t|j fddt|dd\| _| _| _t j	rOt|j|jd| _nt | _t|dd| _td	d
g|j| _d S )N)rE   c                   s   t  | dS )Nr   rG   )r   r   r   r>   r?   <lambda>*  s    z+StepDecoderModel.__init__.<locals>.<lambda>layersr   r   aux_hidden_state_layersr>   rn   r   )rX   rY   r   r   rE   rg   r   is_first_ranktie_word_embeddingsis_last_rankr   
vocab_sizerM   embed_tokensr   r   num_hidden_layersr   start_layer	end_layerr   r   r   normr]   r   r   make_empty_intermediate_tensorsrf   r   rG   rg   rE   rl   r   r?   rY     s>   



zStepDecoderModel.__init__	input_idsr(   r'   c                 C  s
   |  |S r   )r   rf   r   r>   r>   r?   embed_input_ids:  s   
z StepDecoderModel.embed_input_idsNr   r   intermediate_tensorsIntermediateTensors | Noneinputs_embedsLtorch.Tensor | IntermediateTensors | tuple[torch.Tensor, list[torch.Tensor]]c                 C  s   t  jr|d ur|}n|d usJ | |}d }n|d usJ |d }|d }g }t| j| j| j D ]"\}}	|| jv rN|d u rG|| n|||  |	|||\}}q4t  j	sbt
||dS | ||\}}
|rp||fS |S )Nrn   r   )rn   r   )r   r   r   	enumerater   r   r   r   appendr   r   r   )rf   r   r   r   r   rn   r   aux_hidden_statesidxlayerrr   r>   r>   r?   rx   =  s0   

zStepDecoderModel.forwardr   r   r(   r'   r(   r   )
r   r   r   r(   r   r   r   r   r'   r   )rz   r{   r|   rY   r   rx   r}   r>   r>   rl   r?   r     s
    
$r   c                      sT   e Zd ZeZddd  fddZd!ddZ	d"d#ddZd$ddZd%ddZ	  Z
S )&Step1ForCausalLMrB   r   r   r   rG   rH   c                  s   t    |jj}|j}|| _|| _t|t|dd| _t	 j
rDt|j|j|t|dd| _t|ddr=| j| jj| _t|j| _nt | _d | _| jj| _d S )Nmodelr   lm_head)rE   rG   r   T)rX   rY   r   r   rE   rg   r   r   r   r   r   r   r   rM   r   r]   tie_weightsr   r   logits_processorr   r   r   rl   r>   r?   rY   g  s.   
zStep1ForCausalLM.__init__r   r(   r'   c                 C  s   | j |S r   )r   r   r   r>   r>   r?   r     s   z Step1ForCausalLM.embed_input_idsNtorch.LongTensor | Noner   r   r   r   r   r   c                 C  s   | j ||||dS )N)r   )r   )rf   r   r   r   r   r>   r>   r?   rx     s   zStep1ForCausalLM.forwardrn   c                 C  s   t  jsd S | | j|S r   )r   r   r   r   )rf   rn   r>   r>   r?   compute_logits  s   zStep1ForCausalLM.compute_logitsr   r   r   c                 C  s   t | }||S r   )r   r   )rf   r   loaderr>   r>   r?   r     s   
zStep1ForCausalLM.load_weightsr   r   r   )
r   r   r   r(   r   r   r   r   r'   r   )rn   r(   r'   r   r   )rz   r{   r|   STEP_PACKED_MODULES_MAPPINGpacked_modules_mappingrY   r   rx   r   r   r}   r>   r>   rl   r?   r   d  s    

r   )r%   r&   r'   r(   )7__doc__
__future__r   r.   collections.abcr   r1   r   vllm.configr   r   vllm.distributedr   r   r	   %vllm.model_executor.layers.activationr
   $vllm.model_executor.layers.attentionr   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   %vllm.model_executor.models.interfacesr    vllm.model_executor.models.utilsr   r   r   r   r   r   vllm.sequencer   vllm.v1.attention.backendr   r   r@   ModulerA   r~   r   r   r   r>   r>   r>   r?   <module>   s:    
O!SO