o
    
۾i                     @   s   d Z ddlmZ ddlmZ ddlZddlmZ ddlm	Z	 ddl
mZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZmZ ddlmZmZ ddlmZ ddlmZmZmZ ee Z!e	ddddddG dd deZ"G dd deej#Z$dS )z>Inference-only MiMo model compatible with HuggingFace weights.    )Iterable)isliceN)support_torch_compile)
VllmConfig)get_pp_group)init_logger)LogitsProcessor)ParallelLMHead)default_weight_loadermaybe_remap_kv_scale_name)Qwen2ForCausalLM
Qwen2Model)IntermediateTensors   )PPMissingLayeris_pp_missing_parametermaybe_prefix)	input_ids	positionsintermediate_tensorsinputs_embeds)dynamic_arg_dimsc                   @   sl   e Zd Z		ddejdB dejdedB dejdB dejeB f
ddZd	eee	ejf  de
e	 fd
dZdS )	MiMoModelNr   r   r   r   returnc                 C   s   t  jr|d ur|}n| |}d }n|d usJ |d }|d }t| j| j| jD ]
}||||\}}q*t  js@t||dS || }|S )Nhidden_statesresidual)r   r   )	r   is_first_rankembed_input_idsr   layersstart_layer	end_layeris_last_rankr   )selfr   r   r   r   r   r   layer r%   S/home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/mimo.pyforward>   s(   

zMiMoModel.forwardweightsc                 C   s^  g d}t | jdd}t }|D ]\}}d|v rqd|v rq| jd urM| j| }rM|| }t|dt}	| dkr>|n|d }|	|| || q|D ].\}
}}||vrYqO|	||
}|
dri||vriqOt|| roqO|| }|j}	|	|||  n)|
dr||vrqt||}|d u rqt|| rq|| }t|dt}	|	|| || q|S )	N))qkv_projq_projq)r)   k_projk)r)   v_projv)gate_up_proj	gate_projr   )r0   up_projr   F)remove_duplicate
mtp_layerszrotary_emb.inv_freqweight_loaderr   z.bias)dictnamed_parameterssetquant_configget_cache_scalegetattrr
   dimaddreplaceendswithr   r5   r   )r#   r(   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
scale_nameparamr5   
param_nameweight_nameshard_idr%   r%   r&   load_weights\   sT   






zMiMoModel.load_weights)NN)__name__
__module____qualname__torchTensorr   r'   r   tuplestrr8   rJ   r%   r%   r%   r&   r   5   s    
(r   c                   @   s>   e Zd ZdddedefddZdejdejd	B fd
dZd	S )MiMoForCausalLM )prefixvllm_configrT   c                C   s   t j|  |jj}|j}|| _|| _t|t|dd| _	t
 jr9|jr*| j	j| _nt|j|j|t|dd| _nt | _t|j| _| j	j| _d S )Nmodel)rU   rT   lm_head)r9   rT   )nnModule__init__model_config	hf_configr9   configr   r   rV   r   r"   tie_word_embeddingsembed_tokensrW   r	   
vocab_sizehidden_sizer   r   logits_processormake_empty_intermediate_tensors)r#   rU   rT   r]   r9   r%   r%   r&   rZ      s*   

zMiMoForCausalLM.__init__r   r   Nc                 C   s   | j |}| | j|}|S )N)rV   normrb   rW   )r#   r   logitsr%   r%   r&   compute_logits   s   zMiMoForCausalLM.compute_logits)	rK   rL   rM   r   rQ   rZ   rN   rO   rf   r%   r%   r%   r&   rR      s     rR   )%__doc__collections.abcr   	itertoolsr   rN   torch.nnrX   vllm.compilation.decoratorsr   vllm.configr   vllm.distributedr   vllm.loggerr   +vllm.model_executor.layers.logits_processorr   3vllm.model_executor.layers.vocab_parallel_embeddingr	   -vllm.model_executor.model_loader.weight_utilsr
   r    vllm.model_executor.models.qwen2r   r   vllm.sequencer   utilsr   r   r   rK   loggerr   rY   rR   r%   r%   r%   r&   <module>   s2   X