o
    
۾i0                     @   s   d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZ d dlmZ ddlmZmZ ddlmZ e
eZG dd dejZ G dd dejZ!G dd dejZ"G dd dejZ#dS )    )IterableN)PretrainedConfig)
VllmConfig)init_logger)GemmaRMSNorm)LogitsProcessor)QuantizationConfig)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)IntermediateTensors   )Step3p5DecoderLayer#get_spec_layer_idx_from_weight_name)maybe_prefixc                       sH   e Zd Z	d
dededB ddf fddZdejdejfdd	Z  Z	S )
SharedHeadNconfigquant_configreturnc                    s2   t    t|j|j| _t|j|j|d| _d S )N)r   )	super__init__r   hidden_sizerms_norm_epsnormr	   
vocab_sizehead)selfr   r   	__class__ Z/home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/step3p5_mtp.pyr      s
   

zSharedHead.__init__hidden_statesc                 C   
   |  |S N)r   )r   r!   r   r   r    forward'      
zSharedHead.forwardr#   )
__name__
__module____qualname__r   r   r   torchTensorr$   __classcell__r   r   r   r    r      s    r   c                       s`   e Zd Zdededdf fddZ		ddejd	ejd
ejdejdB dedejfddZ	  Z
S ) Step3p5AMultiTokenPredictorLayervllm_configprefixr   Nc                    sx   t    |jj}|j}t|j|j| _t|j|j| _	t
j|jd |jdd| _t||d| _t|| dd| _d S )N   F)bias)r   r   z
.mtp_blockr.   )r   r   model_config	hf_configr   r   r   r   enormhnormnnLineareh_projr   shared_headr   	mtp_block)r   r-   r.   r   r   r   r   r    r   ,   s   
z)Step3p5AMultiTokenPredictorLayer.__init__r   	input_ids	positionsprevious_hidden_statesinputs_embedsspec_step_indexc                 C   sJ   |d usJ |  |}| |}| tj||gdd}| j||d}|S )N)dim)r<   r!   )r4   r5   r8   r)   catr:   )r   r;   r<   r=   r>   r?   r!   r   r   r    r$   =   s   

z(Step3p5AMultiTokenPredictorLayer.forwardNr   )r&   r'   r(   r   strr   r)   r*   intr$   r+   r   r   r   r    r,   +   s.    r,   c                       s   e Zd Zdddedef fddZ		dd	ejd
ejdejdejdB dedejfddZ		ddejdedejfddZ
d	ejdejfddZ  ZS )Step3p5AMultiTokenPredictor r1   r-   r.   c                   sp   t    jj}t|j|j| _|j| _	|j
| _tj fddt| j	| j	| j D | _t|j| _d S )Nc                    s&   i | ]}t |t  d | qS )z.layers.)rD   r,   ).0idxr.   r-   r   r    
<dictcomp>]   s    z8Step3p5AMultiTokenPredictor.__init__.<locals>.<dictcomp>)r   r   r2   r3   r
   r   r   embed_tokensnum_hidden_layersmtp_start_layer_idxnum_nextn_predict_layersnum_mtp_layersr)   r6   
ModuleDictrangelayersr   logits_processor)r   r-   r.   r   r   rJ   r    r   R   s"   

z$Step3p5AMultiTokenPredictor.__init__Nr   r;   r<   r=   r>   spec_step_idxr   c                 C   s<   |d u r	|  |}|| j }| jt| j|  |||||S r#   )rL   rP   rS   rD   rN   )r   r;   r<   r=   r>   rU   current_step_idxr   r   r    r$   k   s   

z#Step3p5AMultiTokenPredictor.forwardr!   c                 C   s8   || j  }| jt| j|  }| |jj||}|S r#   )rP   rS   rD   rN   rT   r9   r   )r   r!   rU   rV   	mtp_layerlogitsr   r   r    compute_logits~   s   
z*Step3p5AMultiTokenPredictor.compute_logitsc                 C   r"   r#   )rL   r   r;   r   r   r    embed_input_ids   r%   z+Step3p5AMultiTokenPredictor.embed_input_idsrC   r   )r&   r'   r(   r   rD   r   r)   r*   rE   r$   rY   r[   r+   r   r   r   r    rF   Q   s4    

rF   c                       s   e Zd Zdddedef fddZdejdejfd	d
Z			ddejdejdejde	dB dejdB de
dejfddZ	ddejde
dejdB fddZdeeeejf  dee fddZde
dedefddZ  ZS )
Step3p5MTPrG   r1   r-   r.   c                   s2   t    |jj| _|| _t|t|dd| _d S )Nmodel)r-   r.   )	r   r   r2   r3   r   r-   rF   r   r^   )r   r-   r.   r   r   r    r      s   


zStep3p5MTP.__init__r;   r   c                 C   s   | j |S r#   )r^   r[   rZ   r   r   r    r[      s   zStep3p5MTP.embed_input_idsNr   r<   r!   intermediate_tensorsr>   rU   c                 C   s   |  |||||}|S r#   )r^   )r   r;   r<   r!   r_   r>   rU   r   r   r    r$      s   	
zStep3p5MTP.forwardc                 C   s   | j ||S r#   )r^   rY   )r   r!   rU   r   r   r    rY      s   zStep3p5MTP.compute_logitsweightsc              
   C   sb  g d}g d}t |  }t }|D ]\}}d|v rqt| j|}d|vr+|d u r+q| ||}|D ]:\}	}
}|
|vr=q3d|v rF||vrFq3d|v sNd|v rOq3||
|	}|dr_||vr_q3|| }|j}||||  n|D ]E}|\}	}
}|
|vr|qp||
|	}|ds|d	r||vrqp|| }|j}t	|j
d
 D ]}|| }||||||d q||  nI|dr||vsd|v rq|d urd|v r|dd}d|v r|dd}d|v rt| jdr| jjd
ksJ d}|| }t|dt}||| || qt| }dd | D }||8 }||kr/t|| }|d
 }td| d|S )N))qkv_projq_projq)ra   k_projk)ra   v_projv)gate_up_proj	gate_projr   )rh   up_projr   )).moe.experts.w13_weightz.moe.gate_proj.weightw1)rk   z.moe.up_proj.weightw3)z.moe.experts.w2_weightz.moe.down_proj.weightw2zrotary_emb.inv_freqrL   zmlp.experts.expertsmoez.bias_biasr   )shard_id	expert_idtok_embeddingsz.transformer..r9   zshared_head.outputzshared_head.headrO   zmodel.embed_tokens.weightweight_loaderc                 S   sD   h | ]\}}| d r t|ddd  dkr t|dddu r|qS ))z.k_scalez.v_scalez.q_scalez.prob_scalenumelc                   S   s   dS rC   r   r   r   r   r    <lambda>  s    z3Step3p5MTP.load_weights.<locals>.<setcomp>.<lambda>r   requires_gradF)endswithgetattr)rH   nameparamr   r   r    	<setcomp>  s    z*Step3p5MTP.load_weights.<locals>.<setcomp>zSome parameters like zE are not in the checkpoint and will falsely use random initialization)dictnamed_parameterssetr   r   _rewrite_spec_layer_namereplacerz   rv   rR   shapeaddhasattrrO   r{   r   keysitemslistRuntimeError)r   r`   stacked_params_mappingexpert_params_mappingparams_dictloaded_paramsr|   loaded_weight
spec_layer
param_nameweight_namerr   r}   rv   mappingrs   loaded_weight_expertparams_need_to_loadoptional_paramsmissing_paramsparam_name_exampler   r   r    load_weights   s   	




zStep3p5MTP.load_weightsr   r|   c                 C   sJ   g d}d}|D ]
}||v rd} nq|s#| d| dd| d}|S )z
        Rewrite the weight name to match the format of the original model.
        Add .mtp_block for modules in transformer layer block for spec layer
        )rL   r4   r5   r8   r9   FTzmodel.layers.ru   z.mtp_block.)r   )r   r   r|   spec_layer_weight_namesspec_layer_weightr   r   r   r    r   %  s   z#Step3p5MTP._rewrite_spec_layer_name)NNr   r\   )r&   r'   r(   r   rD   r   r)   r*   r[   r   rE   r$   rY   r   tupler   r   r   r+   r   r   r   r    r]      s>    

$vr]   )$collections.abcr   r)   torch.nnr6   transformersr   vllm.configr   vllm.loggerr   $vllm.model_executor.layers.layernormr   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   3vllm.model_executor.layers.vocab_parallel_embeddingr	   r
   -vllm.model_executor.model_loader.weight_utilsr   vllm.sequencer   step3p5r   r   utilsr   r&   loggerModuler   r,   rF   r]   r   r   r   r    <module>   s&   &=