o
    izE                     @   sz  d Z ddlmZ ddlZddlmZ ddlmZ ddlmZ ddl	m
Z
mZ ddlmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ ddlmZ ddlm Z m!Z! ddl"m#Z#m$Z$ ddl%m&Z& ddl'm(Z(m)Z) ddl*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0 G dd dej1Z2G dd dej1Z3G dd dej1Z4eG dd dej1Z5G dd dej1e(e)Z6dS ) z?Inference-only Solar model compatible with HuggingFace weights.    )IterableN)nn)PretrainedConfig)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)
SiluAndMul)	Attention)RMSNorm)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)IntermediateTensors   )SupportsLoRA
SupportsPP)AutoWeightsLoaderPPMissingLayeris_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                       sN   e Zd Z			ddededededB ded	ed
df fddZdd Z  Z	S )SolarMLPNF hidden_sizeintermediate_size
hidden_actquant_configbiasprefixreturnc                    sh   t    t||gd ||| dd| _t||||| dd| _|dkr.td| dt | _d S )	N   .gate_up_proj)
input_sizeoutput_sizesr'   r&   r(   z
.down_projr,   output_sizer'   r&   r(   siluzUnsupported activation: z!. Only silu is supported for now.)	super__init__r   gate_up_projr   	down_proj
ValueErrorr
   act_fn)selfr#   r$   r%   r&   r'   r(   	__class__ V/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/model_executor/models/solar.pyr2   E   s(   
	
zSolarMLP.__init__c                 C   s*   |  |\}}| |}| |\}}|S N)r3   r6   r4   )r7   xgate_up_r:   r:   r;   forwardc   s   
zSolarMLP.forward)NFr"   )
__name__
__module____qualname__intstrr   boolr2   r@   __classcell__r:   r:   r8   r;   r!   D   s(    r!   c                       sv   e Zd Z					ddedededed	ed
edB dededB deddf fddZ	de
jde
jde
jfddZ  ZS )SolarAttention    NFr"   configr#   	num_headsnum_kv_headsmax_position_embeddingsr&   r'   cache_configr(   r)   c
              	      sZ  t    || _t }
|| _| j|
 dksJ | j|
 | _|| _| j|
kr/| j|
 dks.J n	|
| j dks8J td| j|
 | _t	|dd | _
| j
d u rT| j| j | _
| j| j
 | _| j| j
 | _| j
d | _|| _t|| j
| j| j|||	 dd| _t| j| j
 ||||	 dd| _t| j
||jd	| _t| j| j
| j| j|||	 d
d| _d S )Nr   r   head_dimg      	.qkv_proj)r#   	head_sizetotal_num_headstotal_num_kv_headsr'   r&   r(   z.o_projr.   )max_positionrope_parametersz.attn)rL   rN   r&   r(   )r1   r2   r#   r	   rR   rK   rS   maxrL   getattrrO   q_sizekv_sizescalingrM   r   qkv_projr   o_projr   rU   
rotary_embr   attn)r7   rJ   r#   rK   rL   rM   r&   r'   rN   r(   tp_sizer8   r:   r;   r2   k   s`   


	
zSolarAttention.__init__	positionshidden_statesc           
      C   s`   |  |\}}|j| j| j| jgdd\}}}| |||\}}| |||}| |\}	}|	S )N)dim)r[   splitrX   rY   r]   r^   r\   )
r7   r`   ra   qkvr?   qkvattn_outputoutputr:   r:   r;   r@      s    zSolarAttention.forward)rI   NFNr"   )rA   rB   rC   r   rD   r   rF   r   rE   r2   torchTensorr@   rG   r:   r:   r8   r;   rH   j   sD    	
ErH   c                       sr   e Zd Z			ddededB dedB deddf
 fdd	Zd
ej	dej	dej	dB de
ej	ej	f fddZ  ZS )SolarDecoderLayerNr"   rJ   rN   r&   r(   r)   c                    s   t    |j| _t|dd}t|ddpt|dd}t|| j|jt|d|j||||| dd	| _t| j|j|j	|t|d	d| d
d| _
t|j|jd| _t|j|jd| _d S )NrM   rI   attention_biasFr'   num_key_value_headsz
.self_attn)	rJ   r#   rK   rL   rM   r&   r'   rN   r(   mlp_biasz.mlp)r#   r$   r%   r&   r'   r(   eps)r1   r2   r#   rW   rH   num_attention_heads	self_attnr!   r$   r%   mlpr   rms_norm_epsinput_layernormpost_attention_layernorm)r7   rJ   rN   r&   r(   rM   rn   r8   r:   r;   r2      s>   

zSolarDecoderLayer.__init__r`   ra   residualc                 C   sX   |d u r|}|  |}n|  ||\}}| j||d}| ||\}}| |}||fS )N)r`   ra   )rw   rt   rx   ru   )r7   r`   ra   ry   r:   r:   r;   r@      s   
zSolarDecoderLayer.forward)NNr"   )rA   rB   rC   r   r   r   rE   r2   rk   rl   tupler@   rG   r:   r:   r8   r;   rm      s0    )rm   c                       s   e Zd Zdddedef fddZdejdejfd	d
Z	ddejdB dejde	dB dejdB deje	B f
ddZ
deeeejf  dee fddZ  ZS )
SolarModelr"   r(   vllm_configr(   c                   s   t    |jj|j |j| _| _j| _t j	s$j
r-t jr-t| jj| _nt | _tj fdd| dd\| _| _| _t jrVtjjd| _nt | _tddgj| _d S )Nc                    s   t  | dS )N)rJ   rN   r&   r(   )rm   r|   rN   rJ   r&   r:   r;   <lambda>  s    z%SolarModel.__init__.<locals>.<lambda>z.layersr|   rq   ra   ry   )r1   r2   model_config	hf_configrN   r&   rJ   
vocab_sizer   is_first_ranktie_word_embeddingsis_last_rankr   r#   embed_tokensr   r   num_hidden_layersstart_layer	end_layerlayersr   rv   normr   make_empty_intermediate_tensors)r7   r}   r(   r8   r~   r;   r2      s8   



zSolarModel.__init__	input_idsr)   c                 C   s
   |  |S r<   )r   r7   r   r:   r:   r;   embed_input_ids(  s   
zSolarModel.embed_input_idsNr`   intermediate_tensorsinputs_embedsc                 C   sh  t  jr|d ur|}n| |}d }n|d usJ |d }|d }d }d }d }	d }
| jr2| jjd n| jjd }t| j| jD ]_}|| jj	v rO|
 }|
 }	|| jjv r]|
 }|
 }
|| jjv rw|| |d|   }|	| |d|   }|| jjv r|| |d|   }|
| |d|   }| j| }||||\}}q?t  jst||dS | ||\}}|S )Nra   ry   r   r   )ra   ry   )r   r   r   trainingrJ   bskcn_tvranger   r   bskcn_1clonebskcn_2bskcn_3bskcn_4r   r   r   r   )r7   r   r`   r   r   ra   ry   	bskcn_h_1	bskcn_h_2	bskcn_r_1	bskcn_r_2r   ilayerr?   r:   r:   r;   r@   +  sL   


zSolarModel.forwardweightsc                 C   sF  g d}t |  }t }|D ]\}}| jd urA| j| }rA|| }t|dt}	| dkr2|n|d }|	|| || q|D ].\}
}}||vrMqC|	||
}|
dr]||vr]qCt|| rcqC|| }|j}	|	|||  n)|
dr|||vr|qt||}|d u rqt|| rq|| }t|dt}	|	|| || q|S )N))rP   z.q_projrf   )rP   z.k_projrg   )rP   z.v_projrh   )r+   z
.gate_projr   )r+   z.up_projr   weight_loaderr   z.bias)dictnamed_parameterssetr&   get_cache_scalerW   r   rc   addreplaceendswithr   r   r   )r7   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
scale_nameparamr   
param_nameweight_nameshard_idr:   r:   r;   load_weights_  sL   






zSolarModel.load_weightsr<   )rA   rB   rC   r   rE   r2   rk   rl   r   r   r@   r   rz   r   r   rG   r:   r:   r8   r;   r{      s     (
,4r{   c                       s   e Zd Zg dddgdZdddZdd	d
edef fddZdej	dej	fddZ
		ddej	dB dej	dedB dej	dB dej	eB f
ddZdej	dej	fddZdeeeej	f  dee fddZ  ZS )SolarForCausalLM)q_projk_projv_proj	gate_projup_proj)r[   r3   input_embeddingsoutput_embeddings)r   lm_headr"   r|   r}   r(   c                   s   t    |jj}|j}|| _|| _t|t|dd| _t	 j
rGt|j|j|t|dd| _|jr8| jjj| j_t|dd}t|j|d| _nt | _| jj| _d S )Nmodel)r}   r(   r   )r&   r(   logit_scaleg      ?)scale)r1   r2   r   r   r&   rJ   r{   r    r   r   r   r   r   r#   r   r   r   weightrW   r   logits_processorr   r   )r7   r}   r(   rJ   r&   r   r8   r:   r;   r2     s2   

zSolarForCausalLM.__init__r   r)   c                 C   s   | j |S r<   )r   r   r   r:   r:   r;   r     s   z SolarForCausalLM.embed_input_idsNr`   r   r   c                 C   s   |  ||||}|S r<   )r   )r7   r   r`   r   r   model_outputr:   r:   r;   r@     s   zSolarForCausalLM.forwardra   c                 C   s   |  | j|}|S r<   )r   r   )r7   ra   logitsr:   r:   r;   compute_logits  s   zSolarForCausalLM.compute_logitsr   c                 C   s   t | }||S r<   )r   r   )r7   r   loaderr:   r:   r;   r     s   
zSolarForCausalLM.load_weights)NN)rA   rB   rC   packed_modules_mappingembedding_modulesr   rE   r2   rk   rl   r   r   r@   r   r   rz   r   r   rG   r:   r:   r8   r;   r     s4    "
,r   )7__doc__collections.abcr   rk   r   transformersr   vllm.compilation.decoratorsr   vllm.configr   r   vllm.distributedr   r	   %vllm.model_executor.layers.activationr
   $vllm.model_executor.layers.attentionr   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   r   vllm.sequencer   
interfacesr   r   utilsr   r   r   r   r   r    Moduler!   rH   rm   r{   r   r:   r:   r:   r;   <module>   s6    
&SA 