o
    -icQ                     @   s  d Z ddlmZ ddlmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZmZmZ dd
lmZmZmZ ddlmZ ddlmZ ddlmZ ddlmZmZmZmZ ddl m!Z! ddl"m#Z# ddl$m%Z% ddl&m'Z'm(Z( ddl)m*Z*m+Z+ ddl,m-Z- ddl.m/Z/m0Z0 ddl1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7 G dd dej8Z9G dd dej8Z:G dd dej8Z;G dd dej8Z<eG d d! d!ej8Z=G d"d# d#ej8e0e/Z>dS )$zInference-only dots1 model.    )Iterable)isliceN)nn)Dots1Config)	Attention)support_torch_compile)CacheConfigModelConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size tensor_model_parallel_all_reduce)
SiluAndMul)SharedFusedMoE)RMSNorm)MergedColumnParallelLinearQKVParallelLinearReplicatedLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)IntermediateTensors   )SupportsLoRA
SupportsPP)AutoWeightsLoaderPPMissingLayeris_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                       sN   e Zd Z			ddededededB ded	ed
df fddZdd Z  Z	S )Dots1MLPNT hidden_sizeintermediate_size
hidden_actquant_configreduce_resultsprefixreturnc                    sj   t    t||gd d|| dd| _t||d||| dd| _|dkr/td| d	t | _d S )
N   Fz.gate_up_projbiasr+   r-   z
.down_proj)r1   r+   r,   r-   siluUnsupported activation: !. Only silu is supported for now.)	super__init__r   gate_up_projr   	down_proj
ValueErrorr   act_fn)selfr(   r)   r*   r+   r,   r-   	__class__ ]/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/model_executor/models/dots1.pyr6   M   s*   
	
zDots1MLP.__init__c                 C   s*   |  |\}}| |}| |\}}|S N)r7   r:   r8   )r;   xgate_up_r>   r>   r?   forwardl   s   
zDots1MLP.forward)NTr'   )
__name__
__module____qualname__intstrr   boolr6   rD   __classcell__r>   r>   r<   r?   r&   L   s(    r&   c                       sJ   e Zd Z		ddededB def fddZdejd	ejfd
dZ	  Z
S )Dots1MoENr'   configr+   r-   c                    s  t    t | _|j| _|j| _|jdkrtd|j dt|j	|j
dd | dd| _|jdkr?tt|j
| j_nd | j_|jd ur_|j|j }t|j	||j|d| dd	| _nd | _t| j|j
|j|j	|jd|j|d
|j|j| d|jd| jjd| _d S )Nr2   r3   r4   Fz.gater0   noaux_tcz.shared_experts)r(   r)   r*   r+   r,   r-   Tz.expertsg      ?)shared_expertsnum_expertstop_kr(   r)   r,   renormalizer+   use_grouped_topknum_expert_group
topk_groupr-   scoring_funcrouted_scaling_factore_score_correction_bias)r5   r6   r   tp_sizerW   n_shared_expertsr*   r9   r   r(   n_routed_expertsgatetopk_methodr   	ParametertorchemptyrX   moe_intermediate_sizer&   rO   r   num_experts_per_toknorm_topk_probn_grouprU   rV   experts)r;   rM   r+   r-   r)   r<   r>   r?   r6   t   s`   






	zDots1MoE.__init__hidden_statesr.   c           	      C   sx   |j \}}|d|}| |\}}| j||d\}}| jd ur(|| | j }n|| j }| jdkr6t|}|||S )N)rf   router_logitsr   )shapeviewr\   re   rO   rW   rY   r   )	r;   rf   
num_tokens
hidden_dimrh   rC   
shared_out
routed_outfinal_hidden_statesr>   r>   r?   rD      s   




zDots1MoE.forward)Nr'   )rE   rF   rG   r   r   rI   r6   r_   TensorrD   rK   r>   r>   r<   r?   rL   s   s    ?rL   c                       sp   e Zd Z				ddededededed	edB d
edB deddf fddZde	j
de	j
de	j
fddZ  ZS )Dots1Attention    Nr'   r(   	num_headsnum_kv_headsrM   max_position_embeddingscache_configr+   r-   r.   c	              	      sr  t    || _t }	|| _| j|	 dksJ | j|	 | _|| _| j|	kr/| j|	 dks.J n	|	| j dks8J td| j|	 | _t	|d|| j | _
| j| j
 | _| j| j
 | _| j
d | _|| _|j}
t|| j
| j| j|
|| dd| _t| j| j
 |d|| dd| _t| j
||jd	| _t| j| j
| j| j||| d
d| _t| j
|jd| _t| j
|jd| _d S )Nr   r   head_dimg      z	.qkv_projr0   Fz.o_proj)max_positionrope_parametersz.attn)rt   rv   r+   r-   eps)r5   r6   r(   r   total_num_headsrs   total_num_kv_headsmaxrt   getattrrw   q_sizekv_sizescalingru   attention_biasr   qkv_projr   o_projr   ry   
rotary_embr   attnr   rms_norm_epsq_normk_norm)r;   r(   rs   rt   rM   ru   rv   r+   r-   rY   r   r<   r>   r?   r6      sb   



	zDots1Attention.__init__	positionsrf   c           
      C   s   |  |\}}|j| j| j| jgdd\}}}| |d| j| j|j}| 	|d| j
| j|j}| |||\}}| |||}| |\}	}|	S )Nrg   )dim)r   splitr   r   r   reshapers   rw   ri   r   rt   r   r   r   )
r;   r   rf   qkvrC   qkvattn_outputoutputr>   r>   r?   rD     s     zDots1Attention.forward)rr   NNr'   )rE   rF   rG   rH   r   r   r   rI   r6   r_   rp   rD   rK   r>   r>   r<   r?   rq      s>    	
Erq   c                       sj   e Zd Z		ddededededB dedB ddf fdd	Zd
e	j
de	j
de	j
dB de	j
fddZ  ZS )Dots1DecoderLayerNrM   r-   model_configrv   r+   r.   c              
      s   t    |j| _t|dd}t|jddd }|| _t| j|j|j	||||| dd| _
|jd urL||jkrL||j dkrLt||| d	d
| _nt|j|j|j|| d	d| _t|j|jd| _t|j|jd| _|j| _d S )Nru   rr   .)seprg   z
.self_attn)r(   rs   rt   rM   ru   rv   r+   r-   r   z.mlp)rM   r+   r-   )r(   r)   r*   r+   r-   rz   )r5   r6   r(   r   rH   r   	layer_idxrq   num_attention_headsnum_key_value_heads	self_attnr[   first_k_dense_replacemoe_layer_freqrL   mlpr&   r)   r*   r   r   input_layernormpost_attention_layernormrW   )r;   rM   r-   r   rv   r+   ru   r   r<   r>   r?   r6     sB   



zDots1DecoderLayer.__init__r   rf   residualc                 C   sX   |d u r|}|  |}n|  ||\}}| j||d}| ||\}}| |}||fS )N)r   rf   )r   r   r   r   )r;   r   rf   r   r>   r>   r?   rD   J  s   
zDots1DecoderLayer.forwardNN)rE   rF   rG   r   rI   r	   r   r   r6   r_   rp   rD   rK   r>   r>   r<   r?   r     s2    .r   c                       s   e Zd ZdZdddedef fddZdejd	ejfd
dZ		ddejdejde
dB dejdB d	eje
B f
ddZd	eeeeeef  fddZdeeeejf  d	ee fddZ  ZS )
Dots1ModelFr'   r-   vllm_configr-   c                   s   t    |jj|j|j |j| _j| _t j	r,t
jj| dd| _nt | _tj fdd| dd\| _| _| _t jrVtjjd| _nt | _tdd	gj| _d S )
Nz.embed_tokensr+   r-   c                    s   t |  dS )N)r   rv   r+   )r   r   rv   rM   r   r+   r>   r?   <lambda>v  s    z%Dots1Model.__init__.<locals>.<lambda>z.layersr   rz   rf   r   )r5   r6   r   	hf_configrv   r+   rM   
vocab_sizer   is_first_rankr   r(   embed_tokensr!   r$   num_hidden_layersstart_layer	end_layerlayersis_last_rankr   r   normr#   make_empty_intermediate_tensors)r;   r   r-   r<   r   r?   r6   _  s4   



zDots1Model.__init__	input_idsr.   c                 C   s
   |  |S r@   )r   r;   r   r>   r>   r?   embed_input_ids     
zDots1Model.embed_input_idsNr   intermediate_tensorsinputs_embedsc           	      C   s   t  jr|d ur|}n| |}d }n|d usJ |d }|d }t| j| j| jD ]
}||||\}}q*t  js@t||dS | 	||\}}|S )Nrf   r   )rf   r   )
r   r   r   r   r   r   r   r   r   r   )	r;   r   r   r   r   rf   r   layerrC   r>   r>   r?   rD     s(   

zDots1Model.forwardc                 C   s   t j| ddd| jjdS )N	gate_projr8   up_proj)ckpt_gate_proj_nameckpt_down_proj_nameckpt_up_proj_namerP   )r   make_expert_params_mappingrM   r[   r;   r>   r>   r?   get_expert_mapping  s   zDots1Model.get_expert_mappingweightsc              	   C   sh  g d}t |  }t }|  }|D ]\}}d|v rq|D ]7\}}	}
|	|vr(qd|v r1||vr1q||	|}|drA||vrAqt|| rGq|| }|j}||||
  nV|D ]*}|\}}	}}
|	|vreqX||	|}t|| rqqX|| }|j}|||||
|d  n)|dr||vrqt||}|d u rqt|| rq|| }t	|dt
}||| || q|S )N))r   q_projr   )r   k_projr   )r   v_projr   )r7   r   r   )r7   r   r   zrotary_emb.inv_freqzmlp.experts.z.bias)shard_id	expert_idweight_loader)dictnamed_parameterssetr   replaceendswithr"   r   r   r   r   add)r;   r   stacked_params_mappingparams_dictloaded_paramsexpert_params_mappingnameloaded_weight
param_nameweight_namer   paramr   mappingr   r>   r>   r?   load_weights  sh   




zDots1Model.load_weightsr@   )rE   rF   rG   fall_back_to_pt_during_loadr
   rI   r6   r_   rp   r   r   rD   listtuplerH   r   r   r   r   rK   r>   r>   r<   r?   r   [  s$    )
,	r   c                       s   e Zd Zg dddgdZdddedef fd	d
ZdejdejfddZ			ddejdejde
dB dejdB deje
B f
ddZdejdejdB fddZdeeeejf  dee fddZdeeeeeef  fddZ  ZS )Dots1ForCausalLM)r   r   r   r   r   )r   r7   r'   r   r   r-   c                   s   t    |jj}|j}|| _|| _t|t|dd| _t	 j
r/t|j|j|t|dd| _nt | _t|j| _| jj| _d S )Nmodel)r   r-   lm_headr   )r5   r6   r   r   r+   rM   r   r%   r   r   r   r   r   r(   r   r!   r   logits_processorr   )r;   r   r-   rM   r+   r<   r>   r?   r6     s&   


zDots1ForCausalLM.__init__r   r.   c                 C   s   | j |S r@   )r   r   r   r>   r>   r?   r     s   z Dots1ForCausalLM.embed_input_idsNr   r   r   c                 C   s   |  ||||}|S r@   )r   )r;   r   r   r   r   rf   r>   r>   r?   rD     s   zDots1ForCausalLM.forwardrf   c                 C   s   |  | j|}|S r@   )r   r   )r;   rf   logitsr>   r>   r?   compute_logits*  s   zDots1ForCausalLM.compute_logitsr   c                 C   s   t | }||S r@   )r    r   )r;   r   loaderr>   r>   r?   r   1  s   
zDots1ForCausalLM.load_weightsc                 C   s
   | j  S r@   )r   r   r   r>   r>   r?   r   5  r   z#Dots1ForCausalLM.get_expert_mappingr   )rE   rF   rG   packed_modules_mappingr
   rI   r6   r_   rp   r   r   rD   r   r   r   r   r   r   rH   r   rK   r>   r>   r<   r?   r     s8    

$&r   )?__doc__collections.abcr   	itertoolsr   r_   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr   r	   r
   vllm.distributedr   r   r   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.fused_moer   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   r   vllm.sequencer   
interfacesr   r   utilsr    r!   r"   r#   r$   r%   Moduler&   rL   rq   r   r   r   r>   r>   r>   r?   <module>   s<    
'SU@ 