o
    پi                     @   s   d dl mZmZmZ d dlZd dlmZ d dlmZ d dlm	Z	m
Z
 d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZ dZG dd deZG dd dejZeZdS )    )IterableOptionalTupleN)nn)LogitsProcessor)PoolerPoolingType)QuantizationConfig)ParallelLMHead)ForwardBatch)default_weight_loader)Qwen2DecoderLayer
Qwen2Model)
add_prefixc                	       s8   e Zd Z		d	dedee deddf fddZ  ZS )
	MiMoModelN configquant_configprefixreturnc                    s   t  j|||td d S )N)r   r   r   decoder_layer_type)super__init__r   selfr   r   r   	__class__ J/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/mimo.pyr      s   
zMiMoModel.__init__Nr   )	__name__
__module____qualname__
MiMoConfigr   r	   strr   __classcell__r   r   r   r   r      s    r   c                       s   e Zd Zg dZddddddZ			d$d
edee deddf fddZ	de
jde
jfddZe
 		d%de
jde
jdede
jdede
jfddZdeeee
jf  fddZdd Zdd  Zd!eddfd"d#Z  ZS )&MiMoForCausalLM)z.gate_proj.z.down_proj.z	.up_proj.z.q_proj.z.k_proj.z.v_proj.z.o_proj.)qkv_projr   )r'      )r'      )gate_up_projr   )r*   r(   )q_projk_projv_proj	gate_projup_projNr   r   r   r   r   c                    sx   t    || _|| _t||td|d| _|jr| jj| _	nt
|j|j|td|d| _	t|| _ttjdd| _d S )Nmodel)r   r   lm_headT)pooling_type	normalize)r   r   r   r   r   r   r0   tie_word_embeddingsembed_tokensr1   r
   
vocab_sizehidden_sizer   logits_processorr   r   LASTpoolerr   r   r   r   r   7   s    

zMiMoForCausalLM.__init__	input_idsc                 C   s   | j |S N)r0   get_input_embeddings)r   r;   r   r   r   r=   O   s   z$MiMoForCausalLM.get_input_embeddingsF	positionsforward_batchinput_embedsget_embeddingc                 C   s2   |  ||||}|s| ||| j|S | ||S r<   )r0   r8   r1   r:   )r   r;   r>   r?   r@   rA   hidden_statesr   r   r   forwardR   s   	
zMiMoForCausalLM.forwardweightsc                 C   s   g d}t |  }|D ]q\}}d|v sd|v sd|v rqd|v s%d|v r&q| jjr/d|v r/q|dr9||vr9q|D ](\}}}||vrEq;|||}|d	rU||vrUq;|| }	|	j}
|
|	||  n|d	rn||vrnq|| }	t|	d
t	}
|
|	| qd S )N))r'   r+   q)r'   r,   k)r'   r-   v)r*   r.   r   )r*   r/   r(   zrotary_emb.inv_freq	projector
mtp_layerszrotary_emb.cos_cachedzrotary_emb.sin_cachedzlm_head.weightzmodel.vision_towerz.biasweight_loader)
dictnamed_parametersr   r4   
startswithreplaceendswithrJ   getattrr   )r   rD   stacked_params_mappingparams_dictnameloaded_weight
param_nameweight_nameshard_idparamrJ   r   r   r   load_weightsc   s<   	
zMiMoForCausalLM.load_weightsc                 C   s   | j jj| jjfS r<   )r0   r5   weightr1   )r   r   r   r   get_embed_and_head   s   z"MiMoForCausalLM.get_embed_and_headc                 C   s8   | j j`| j`|| j j_|| j_tj  tj  d S r<   )r0   r5   rZ   r1   torchcudaempty_cachesynchronize)r   embedheadr   r   r   set_embed_and_head   s   

z"MiMoForCausalLM.set_embed_and_headquantization_param_pathc                 C   s   | j | d S r<   )r0   load_kv_cache_scales)r   rc   r   r   r   rd      s   z$MiMoForCausalLM.load_kv_cache_scalesr   )NF)r    r!   r"   #default_bitsandbytes_target_modules#bitsandbytes_stacked_params_mappingr#   r   r	   r$   r   r\   Tensorr=   no_gradr   boolrC   r   r   rY   r[   rb   rd   r%   r   r   r   r   r&   #   sP    .r&   )typingr   r   r   r\   r   "sglang.srt.layers.logits_processorr   sglang.srt.layers.poolerr   r   *sglang.srt.layers.quantization.base_configr	   *sglang.srt.layers.vocab_parallel_embeddingr
   ,sglang.srt.model_executor.forward_batch_infor   $sglang.srt.model_loader.weight_utilsr   sglang.srt.models.qwen2r   r   sglang.srt.utilsr   r#   r   Moduler&   
EntryClassr   r   r   r   <module>   s   }