o
    
۾iM                     @   s  d Z ddlmZ ddlmZ ddlmZ ddlZddlmZ ddl	m
Z
 ddlmZmZmZ dd	lmZmZmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZmZm Z m!Z! ddl"m#Z# ddl$m%Z% ddl&m'Z' ddl(m)Z)m*Z* ddl+m,Z, ddl-m.Z. ddl/m0Z0 ddl1m2Z2 ddl3m4Z4m5Z5m6Z6m7Z7m8Z8 ee9Z:G dd dej;Z<G dd dej;Z=G dd dej;Z>G d d! d!ej;Z?e
G d"d# d#ej;Z@G d$d% d%ej;e2ZAdS )&zInference-only Jurassic model.    )Iterable)islice)AnyN)nn)support_torch_compile)CacheConfigModelConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size tensor_model_parallel_all_reduce)init_logger)
SiluAndMul)	Attention)FusedMoE)RMSNorm)ColumnParallelLinearMergedColumnParallelLinearReplicatedLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)IntermediateTensors)Step3TextConfig   )
SupportsPP)PPMissingLayeris_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                       sJ   e Zd Z		ddededB def fddZdejd	ejfd
dZ	  Z
S )FusedMoEBlockN configquant_configprefixc              
      s   t    t | _| j|jkrtd| j d|j dt|j|j|j|j	d|j
|| dd| _t|j|jdd | dd| _d S )	NzTensor parallel size z' is greater than the number of experts .Fz.experts)num_expertstop_khidden_sizeintermediate_sizereduce_resultsrenormalizer(   r)   z.gatebiasr(   r)   )super__init__r   tp_sizemoe_num_experts
ValueErrorr   	moe_top_kr-   moe_intermediate_sizenorm_expert_weightexpertsr   gate)selfr'   r(   r)   	__class__ Y/home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/step3_text.pyr4   6   s2   


zFusedMoEBlock.__init__hidden_statesreturnc                 C   sT   |j }|j d }|d|}| |\}}| j||d}| jdkr%t|}||S )N)rB   router_logitsr   )shapeviewr<   r;   r5   r   )r=   rB   
orig_shape
hidden_dimrE   _final_hidden_statesr@   r@   rA   forwardW   s   


zFusedMoEBlock.forwardNr&   )__name__
__module____qualname__r   r   strr4   torchTensorrL   __classcell__r@   r@   r>   rA   r%   5   s    !r%   c                       sV   e Zd Z		ddededededB deddf fd	d
ZdejdejfddZ	  Z
S )Step3TextMLPNr&   r-   r.   
hidden_actr(   r)   rC   c                    sn   t    t||gd d|| dd| _t||d|| dd| _|dkr.td| dt | _|| _	d S )	N   F.gate_up_projr1   z
.down_projsiluzUnsupported activation: z!. Only silu is supported for now.)
r3   r4   r   gate_up_projr   	down_projr7   r   act_fnr-   )r=   r-   r.   rV   r(   r)   r>   r@   rA   r4   h   s*   


zStep3TextMLP.__init__rB   c                 C   s*   |  |\}}| |}| |\}}|S N)rZ   r\   r[   )r=   rB   gate_uprJ   intermediate_actoutputr@   r@   rA   rL      s   
zStep3TextMLP.forwardrM   )rN   rO   rP   intrQ   r   r4   rR   rS   rL   rT   r@   r@   r>   rA   rU   g   s"    rU   c                       s   e Zd Z						ddedededed	eeef d
edB dedededB de	dB def fddZ
dejdejdejfddZ  ZS )Step3TextAttentionN       r&   r-   	num_headsnum_kv_headsnorm_epsrope_parametersshare_q_dimmax_position_embeddinghead_dimcache_configr(   r)   c                    s:  t    || _t }|| _| j| dksJ | j| | _|dkr)td| d|| _|| _| j| j | _	|r:|n| j| _
t|| j
| j	d  d|
| dd| _t| j| j |d|
| d	d| _t| j
|d
| _t| j
| j| j d|
| dd| _t| j||d| _| jd }t| j| j|| j|	| dd| _d S )Nr   r   z3Step3TextAttention num_kv_heads must be 1, but got r*   rW   F	.qkv_projr1   z.o_projepsz.wq)max_positionrh   g      z.attn)rl   r)   )r3   r4   r-   r   total_num_headsre   r7   rf   rk   kv_sizeq_sizer   qkv_projr   o_projr   
inter_normr   wqr   
rotary_embr   attn)r=   r-   re   rf   rg   rh   ri   rj   rk   rl   r(   r)   r5   scalingr>   r@   rA   r4      sd   




zStep3TextAttention.__init__	positionsrB   rC   c           
      C   sx   |  |\}}|j| j| j| jgdd\}}}| |}| |d }| |||\}}| |||}| |\}	}|	S )NrD   )dimr   )	rt   splitrs   rr   rv   rw   rx   ry   ru   )
r=   r{   rB   qkvrJ   qkvattn_outputresidualr@   r@   rA   rL      s    
zStep3TextAttention.forward)Nrc   rd   NNr&   )rN   rO   rP   ra   floatdictrQ   r   r   r   r4   rR   rS   rL   rT   r@   r@   r>   rA   rb      sJ    
	
Frb   c                       sr   e Zd Z			ddededB dedB deddf
 fdd	Zd
ej	dej	dej	dB de
ej	ej	f fddZ  ZS )Step3TextDecoderLayerNr&   r'   rl   r(   r)   rC   c                    s6  t    |j| _t| j|jd|||j|j|j|j|j	| dd| _
t|dd dd }t|dd }|d urHdd	 | d
D }ndd	 td|jD }||v rut||| dd| _t| j|jd|| dd| _d| _nt|j|jd|| dd| _d| _t|j|jd| _t|j|jd| _d S )Nr   z
.self_attn)r-   re   rf   rl   r(   rg   rj   rk   ri   rh   r)   zlayers.r*   r   moe_layers_enumc                 S   s   g | ]}t |qS r@   )ra   .0ir@   r@   rA   
<listcomp>       z2Step3TextDecoderLayer.__init__.<locals>.<listcomp>,c                 S   s   g | ]}|qS r@   r@   r   r@   r@   rA   r      s    z.moe)r'   r(   r)   rY   z.share_expert)r-   r.   rV   r(   r)   Tz.mlpFrn   )r3   r4   r-   rb   num_attention_headsrms_norm_epsrj   rk   ri   rh   	self_attnra   r}   getattrstriprangenum_hidden_layersr%   moerU   share_expert_dimshare_expertuse_moer.   mlpr   input_layernormpost_attention_layernorm)r=   r'   rl   r(   r)   	layer_idxr   moe_layers_idxr>   r@   rA   r4      sX   
zStep3TextDecoderLayer.__init__r{   rB   r   c                 C   s   |d u r|}|  |}n|  ||\}}| j||d}| ||\}}| jr8| |}| |}|| }||fS | |}||fS )N)r{   rB   )r   r   r   r   r   r   r   )r=   r{   rB   r   share_output
moe_outputr@   r@   rA   rL     s    


zStep3TextDecoderLayer.forward)NNr&   )rN   rO   rP   r   r   r   rQ   r4   rR   rS   tuplerL   rT   r@   r@   r>   rA   r      s0    :r   c                       sz   e Zd Zddededdf fddZdejdejfd	d
Z		ddejdB dejde	dB dejdB dejf
ddZ
  ZS )Step3TextModelr&   vllm_configr)   rC   Nc                    s   t    |jj|j |jj| _| _t j	s!j
r*t jr*t| jj| _nt | _tj fdd| dd\| _| _| _t jrStjjd| _nt | _tdgj| _d S )Nc                    s   t  | dS )N)r'   rl   r(   r)   )r   r)   rl   r'   r(   r@   rA   <lambda>O  s    z)Step3TextModel.__init__.<locals>.<lambda>z.layersr   rn   rB   )r3   r4   model_config	hf_configrl   r(   
vocab_sizer'   r
   is_first_ranktie_word_embeddingsis_last_rankr   r-   embed_tokensr    r#   r   start_layer	end_layerlayersr   r   normr"   make_empty_intermediate_tensors)r=   r   r)   r>   r   rA   r4   ;  s6   


zStep3TextModel.__init__	input_idsc                 C   s
   |  |S r]   )r   r=   r   r@   r@   rA   embed_input_ids`  s   
zStep3TextModel.embed_input_idsr{   intermediate_tensorsinputs_embedsc           	      C   s   t  jr|d ur|}n| |}d }n|d usJ |d }|d }t| j| j| jD ]
}||||\}}q*t  js@t||dS | 	||\}}|S )NrB   r   )rB   r   )
r
   r   r   r   r   r   r   r   r   r   )	r=   r   r{   r   r   rB   r   layerrJ   r@   r@   rA   rL   c  s$   
zStep3TextModel.forward)r&   NN)rN   rO   rP   r	   rQ   r4   rR   rS   r   r   rL   rT   r@   r@   r>   rA   r   9  s     %r   c                
       s   e Zd Zdddedef fddZdejdejfd	d
Z		ddejdB dejde	dB dejdB fddZ
dejdejfddZdeeeejf  dee fddZ  ZS )Step3TextForCausalLMr&   r   r   r)   c                   sr   t    |jj}|| _|| _t||d| _t j	r.t
|j|jt|dd| _t|j| _nt | _| jj| _d S )N)r   r)   lm_headr   )r3   r4   r   r   r'   r   r   modelr
   r   r   r   r-   r$   r   r   logits_processorr    r   )r=   r   r)   r'   r>   r@   rA   r4     s   
zStep3TextForCausalLM.__init__r   rC   c                 C   s   | j |S r]   )r   r   r   r@   r@   rA   r     s   z$Step3TextForCausalLM.embed_input_idsNr{   r   r   c                 C   s   |  ||||}|S r]   )r   )r=   r   r{   r   r   rB   r@   r@   rA   rL     s   zStep3TextForCausalLM.forwardrB   c                 C   s   |  | j|}|S r]   )r   r   )r=   rB   logitsr@   r@   rA   compute_logits  s   z#Step3TextForCausalLM.compute_logitsweightsc              
      s  ddd| j j| j j| j jd   fdd| j j| j j| j jd   | j j| j j | j j| j jd   fdd| j j| j j | j j| j jd   | j j| j jd  | j j| j jd   fg}ddg}t|  }t }g d	}d
d |D }|D ]\ }|D ]5\}	}
}|
 vrq{t fdd|D rq{ |
|	 t | rq{|  }|j	}|||| |
   n|D ]K}|\}	}
}|
 vrq |
|	 t | rq dsՈ drڈ |vrq|  }|j	}t|jd D ]}|| }||| ||d q|
   nd|D ]F\}	}
}}|
 vrq |
|	 t | rq|  }|j|j }t|| }t|| }||j||| }|| |
   nt | rOqu|  }t|dt}||| |
  qu|S )Nrm   z.q_projr   rW   z.k_projz.v_proj)rX   z
.gate_projr   )rX   z.up_projr   )).moe.experts.w13_weightz.moe.gate_proj.weightw1)r   z.moe.up_proj.weightw3)z.moe.experts.w2_weightz.moe.down_proj.weightw2c                 S   s   g | ]}|d  qS )r   r@   )r   datar@   r@   rA   r     r   z5Step3TextForCausalLM.load_weights.<locals>.<listcomp>c                 3   s    | ]}| v V  qd S r]   r@   )r   disable_moe_stacked_paramnamer@   rA   	<genexpr>  s
    
z4Step3TextForCausalLM.load_weights.<locals>.<genexpr>z.bias_bias)shard_id	expert_idweight_loader)r'   ri   rk   r   named_parameterssetanyreplacer!   r   addendswithr   rF   
output_dimra   narrowcopy_r   r   )r=   r   qkv_params_mappingstacked_params_mappingparams_dictloaded_paramsexpert_params_mappingdisable_moe_stacked_paramsloaded_weight
param_nameweight_namer   paramr   mappingr   loaded_weight_expert	start_idxend_idxr|   	begin_idxparam_slicer@   r   rA   load_weights  s   	









z!Step3TextForCausalLM.load_weightsr   )rN   rO   rP   r	   rQ   r4   rR   rS   r   r   rL   r   r   r   r   r   rT   r@   r@   r>   rA   r     s*    
,r   )B__doc__collections.abcr   	itertoolsr   typingr   rR   r   vllm.compilation.decoratorsr   vllm.configr   r   r	   vllm.distributedr
   r   r   vllm.loggerr   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.attentionr   $vllm.model_executor.layers.fused_moer   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   vllm.sequencer   (vllm.transformers_utils.configs.step3_vlr   
interfacesr   utilsr    r!   r"   r#   r$   rN   loggerModuler%   rU   rb   r   r   r   r@   r@   r@   rA   <module>   s@   2&TXJ