o
    
۾ik(                     @   s   d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZmZ d dlmZ d dlmZ ddlmZ G dd deZG dd deZeG dd dejZ dS )    )IterableN)support_torch_compile)
VllmConfig)FusedMoE)RMSNorm)LogitsProcessor)VocabParallelEmbedding)default_weight_loader)DeepSeekMultiTokenPredictor DeepSeekMultiTokenPredictorLayer
SharedHead)maybe_prefix)IntermediateTensors   )OpenPanguDecoderLayerc                   @   s"   e Zd ZdededdfddZdS )!OpenPanguMultiTokenPredictorLayervllm_configprefixreturnNc                 C   s   t j|  |jjj}|| _|j}t|j	|j
d| _t|j	|j
d| _t j|j	d |j	dd| _t||t|dd| _t|||| _d S )N)eps   F)biasshared_head)configquant_configr   )nnModule__init__speculative_configdraft_model_config	hf_configr   r   r   hidden_sizerms_norm_epsenormhnormLineareh_projr   r   r   r   	mtp_block)selfr   r   r   r    r)   \/home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/openpangu_mtp.pyr   2   s   
z*OpenPanguMultiTokenPredictorLayer.__init____name__
__module____qualname__r   strr   r)   r)   r)   r*   r   1   s    r   c                   @   s$   e Zd ZdddedefddZdS )OpenPanguMultiTokenPredictor r   r   r   c                   sr   t j|  jj}|j| _|j| _t	j 
 fddt| j| j| j D | _t|j|j| _t|j| _d S )Nc                    s&   i | ]}t |t  d | qS )z.layers.)r/   r   ).0idxr   r   r)   r*   
<dictcomp>L   s    z9OpenPanguMultiTokenPredictor.__init__.<locals>.<dictcomp>)r   r   r   model_configr    num_hidden_layersmtp_start_layer_idxnum_nextn_predict_layersnum_mtp_layerstorch
ModuleDictrangelayersr   
vocab_sizer!   embed_tokensr   logits_processor)r(   r   r   r   r)   r5   r*   r   E   s"   
z%OpenPanguMultiTokenPredictor.__init__Nr+   r)   r)   r)   r*   r0   D   s    r0   c                       s   e Zd Zdddedef fddZdejdejfd	d
Z			ddejdB dejdejde	dB dejdB de
dejfddZ	d dejde
dejdB fddZdd Zdeeeejf  dee fddZde
dedefddZ  ZS )!OpenPanguMTPr1   r2   r   r   c                   s,   t    |jj| _t|t|dd| _d S )Nmodel)r   r   )superr   r7   r    r   r0   r   rD   )r(   r   r   	__class__r)   r*   r   _   s
   


zOpenPanguMTP.__init__	input_idsr   c                 C   s   | j |S N)rD   embed_input_ids)r(   rH   r)   r)   r*   rJ   f   s   zOpenPanguMTP.embed_input_idsNr   	positionshidden_statesintermediate_tensorsinputs_embedsspec_step_idxc                 C   s   |  |||||}|S rI   )rD   )r(   rH   rK   rL   rM   rN   rO   r)   r)   r*   forwardi   s   	zOpenPanguMTP.forwardc                 C   s   | j ||S rI   )rD   compute_logits)r(   rL   rO   r)   r)   r*   rQ   {   s   zOpenPanguMTP.compute_logitsc                 C   sd   d|v r0t | jdr0| jjdkr0t|dd dd }|| jj }|dkr0|| jjk r0|S d S )Nr?   r:   r   zlayers..)hasattrr   r:   intsplitr8   )r(   name	layer_idxmtp_idxr)   r)   r*   get_spec_layer   s   
zOpenPanguMTP.get_spec_layerweightsc              	   C   s  g d}t j| ddd| jjd}t|  }t }|D ]\}}d|v r$q| |}|d u r.q| ||}|D ]<\}	}
}|
|vr@q6d|v rI||vrIq6|	|
|	}|	dkrX||vrXq6|}|
d	rd||vrdq6|| }|j}||||  nK|D ]$}|\}	}
}}|
|vrqu|	|
|	}|| }|j}||||||d
  n$|
d	r||vrq|| jjkrd|vrq|| }t|dt}||| || q|S )N))gate_up_proj	gate_projr   )r\   up_projr   )fused_qkv_a_projq_a_projr   )r_   kv_a_proj_with_mqar   r]   	down_projr^   )ckpt_gate_proj_nameckpt_down_proj_nameckpt_up_proj_namenum_expertszrotary_emb.inv_freqzmlp.experts.r_   z.bias)shard_id	expert_idz.layersweight_loader)r   make_expert_params_mappingr   n_routed_expertsdictnamed_parameterssetrZ   _rewrite_spec_layer_namereplaceendswithri   rD   r9   getattrr	   add)r(   r[   stacked_params_mappingexpert_params_mappingparams_dictloaded_paramsrW   loaded_weight
spec_layer
param_nameweight_namerg   name_mappedparamri   mappingrh   r)   r)   r*   load_weights   sx   

zOpenPanguMTP.load_weightsry   rW   c                 C   s|   g d}dg}d}d}|D ]}||v rd}||v rd} nq|s0| d| dd| d}|S |r<| d| dd}|S )	z
        Rewrite the weight name to match the format of the original model.
        Add .mtp_block for modules in transformer layer block for spec layer
        and rename shared layer weights to be top level.
        )rA   r#   r$   r&   r   rA   FTzmodel.layers.rS   z.mtp_block.zmodel.)rp   )r(   ry   rW   spec_layer_weight_namesshared_weight_namesspec_layer_weightshared_weightr{   r)   r)   r*   ro      s&   z%OpenPanguMTP._rewrite_spec_layer_name)NNr   )r   )r,   r-   r.   r   r/   r   r<   TensorrJ   r   rU   rP   rQ   rZ   r   tuplern   r   ro   __classcell__r)   r)   rF   r*   rC   ]   s@    

$]rC   )!collections.abcr   r<   torch.nnr   vllm.compilation.decoratorsr   vllm.configr   $vllm.model_executor.layers.fused_moer   $vllm.model_executor.layers.layernormr   +vllm.model_executor.layers.logits_processorr   3vllm.model_executor.layers.vocab_parallel_embeddingr   -vllm.model_executor.model_loader.weight_utilsr	   'vllm.model_executor.models.deepseek_mtpr
   r   r    vllm.model_executor.models.utilsr   vllm.sequencer   	openpangur   r   r0   r   rC   r)   r)   r)   r*   <module>   s$   