o
    
۾iE                     @   s  d Z ddlmZ ddlmZ ddlZddlmZ ddlmZ	 ddl
mZ ddlmZmZ dd	lmZmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ ddl m!Z! ddl"m#Z# ddl$m%Z%m&Z& ddl'm(Z(m)Z) ddl*m+Z+ ddl,m-Z- ddl.m/Z/ ddl0m1Z1m2Z2 ddl3m4Z4m5Z5m6Z6m7Z7m8Z8m9Z9 ee:Z;G dd dej<Z=G dd dej<Z>G dd dej<Z?edd ddd!d"G d#d$ d$ej<Z@G d%d& d&ej<e1e2ZAdS )'zAInference-only SeedOss model compatible with HuggingFace weights.    )Iterable)isliceN)nn)PretrainedConfig)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)init_logger)
SiluAndMul)	Attention)RMSNorm)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)IntermediateTensors)set_default_rope_theta)AttentionType   )SupportsLoRA
SupportsPP)AutoWeightsLoaderPPMissingLayeris_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                       sH   e Zd Z		ddededededB deddf fd	d
Zdd Z  ZS )
SeedOssMLPN hidden_sizeintermediate_size
hidden_actquant_configprefixreturnc                    sh   t    t||gd d|| dd| _t||d|| dd| _|dkr.td| dt | _d S )	N   Fz.gate_up_projbiasr*   r+   z
.down_projsiluzUnsupported activation: z!. Only silu is supported for now.)	super__init__r   gate_up_projr   	down_proj
ValueErrorr   act_fn)selfr'   r(   r)   r*   r+   	__class__ W/home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/seed_oss.pyr2   J   s(   

zSeedOssMLP.__init__c                 C   s*   |  |\}}| |}| |\}}|S N)r3   r6   r4   )r7   xgate_up_r:   r:   r;   forwardg   s   
zSeedOssMLP.forward)Nr&   )	__name__
__module____qualname__intstrr   r2   r@   __classcell__r:   r:   r8   r;   r%   I   s"    r%   c                       s|   e Zd Zddddejfdededededed	ed
edB dedB de	de	ddf fddZ
dejdejdejfddZ  ZS )SeedOssAttentioni   Nr&   r'   	num_headsnum_kv_headshead_dimrope_parametersmax_positioncache_configr*   r+   	attn_typer,   c              
      s4  t    || _t }|| _| j| dksJ | j| | _|| _|| _| j|kr2| j| dks1J n	|| j dks;J td| j| | _	| j| j | _
| j	| j | _| jd | _t|| j| j| jd||	 dd| _t| j| j |d||	 dd| _t| j||d	| _t| j| j| j| j	|||
|	 d
d| _d S )Nr   r   g      Tz	.qkv_projr.   Fz.o_proj)rL   rK   z.attn)rI   rM   r*   rN   r+   )r1   r2   r'   r
   total_num_headsrH   total_num_kv_headsrJ   maxrI   q_sizekv_sizescalingr   qkv_projr   o_projr   
rotary_embr   attn)r7   r'   rH   rI   rJ   rK   rL   rM   r*   r+   rN   tp_sizer8   r:   r;   r2   o   s\   

	
zSeedOssAttention.__init__	positionshidden_statesc           
      C   s`   |  |\}}|j| j| j| jgdd\}}}| |||\}}| |||}| |\}	}|	S )N)dim)rU   splitrR   rS   rW   rX   rV   )
r7   rZ   r[   qkvr?   qkvattn_outputoutputr:   r:   r;   r@      s    zSeedOssAttention.forward)rA   rB   rC   r   DECODERrD   dictr   r   rE   r2   torchTensorr@   rF   r:   r:   r8   r;   rG   n   sH    	
CrG   c                       sr   e Zd Z			ddededB dedB deddf
 fdd	Zd
ej	dej	dej	dB de
ej	ej	f fddZ  ZS )SeedOssDecoderLayerNr&   configrM   r*   r+   r,   c                    s   t    |j| _t|dd t|ddrtj}ntj}t| j|j	|j
|j|j|||j| d|d
| _t| j|j|j|| dd| _t|j|jd	| _t|j|jd	| _d S )
Ni@B )default_theta	is_causalTz
.self_attn)
r'   rH   rL   rI   rJ   rM   r*   rK   r+   rN   z.mlp)r'   r(   r)   r*   r+   eps)r1   r2   r'   r   getattrr   re   ENCODER_ONLYrG   num_attention_headsmax_position_embeddingsnum_key_value_headsrJ   rK   	self_attnr%   r(   r)   mlpr   rms_norm_epsinput_layernormpost_attention_layernorm)r7   rj   rM   r*   r+   rN   r8   r:   r;   r2      s:   
zSeedOssDecoderLayer.__init__rZ   r[   residualc                 C   sX   |d u r|}|  |}n|  ||\}}| j||d}| ||\}}| |}||fS )N)rZ   r[   )rw   rt   rx   ru   )r7   rZ   r[   ry   r:   r:   r;   r@      s   
zSeedOssDecoderLayer.forward)NNr&   )rA   rB   rC   SeedOssConfigr   r   rE   r2   rg   rh   tupler@   rF   r:   r:   r8   r;   ri      s0    ,ri   r\   )	input_idsrZ   intermediate_tensorsinputs_embeds)dynamic_arg_dimsc                       s   e Zd Zdeddededeej f fddZ	de
jd	e
jfd
dZ		dde
jdB de
jdedB de
jdB d	e
jeB f
ddZdeeee
jf  d	ee fddZ  ZS )SeedOssModelr&   )r+   decoder_layer_typevllm_configr+   r   c                   s  t    |jj|j |j jd ur)tdr)jj	ks)J d
jj	| _| _j| _t js>jrMt jrMtjj| dd| _nt | _pTttj	 fdd| dd\| _| _| _td	d
gj| _t jrtjjd| _d S t | _d S )Nmax_window_layerszSliding window for some but all layers is not supported. This model uses sliding window but `max_window_layers` = {} is less than `num_hidden_layers` = {}. Please open an issue to discuss this feature.z.embed_tokensr*   r+   c                    s    | dS )N)rj   rM   r*   r+   r:   r+   rM   rj   r   r*   r:   r;   <lambda>;  s    z'SeedOssModel.__init__.<locals>.<lambda>z.layersr   r[   ry   rm   )r1   r2   model_config	hf_configrM   r*   sliding_windowhasattrr   num_hidden_layersformatrj   
vocab_sizer	   is_first_ranktie_word_embeddingsis_last_rankr   r'   embed_tokensr    ri   r#   start_layer	end_layerlayersr"   make_empty_intermediate_tensorsr   rv   norm)r7   r   r+   r   r8   r   r;   r2     sR   



zSeedOssModel.__init__r|   r,   c                 C   s
   |  |S r<   )r   r7   r|   r:   r:   r;   embed_input_idsL  s   
zSeedOssModel.embed_input_idsNrZ   r}   r~   c           	      C   s   t  jr|d ur|}n| |}d }n|d usJ |d }|d }t| j| j| jD ]
}||||\}}q*t  js@t||dS | 	||\}}|S )Nr[   ry   )r[   ry   )
r	   r   r   r   r   r   r   r   r   r   )	r7   r|   rZ   r}   r~   r[   ry   layerr?   r:   r:   r;   r@   O  s(   

zSeedOssModel.forwardweightsc                 C   sT  g d}t | jdd}t }|D ]\}}d|v rq| jd urH| j| }rH|| }t|dt}	| dkr9|n|d }|	|| || q|D ].\}
}}||vrTqJ|	||
}|
drd||vrdqJt|| rjqJ|| }|j}	|	|||  n)|
dr||vrqt||}|d u rqt|| rq|| }t|dt}	|	|| || q|S )N))rU   q_projr`   )rU   k_projra   )rU   v_projrb   )r3   	gate_projr   )r3   up_projr   F)remove_duplicatezrotary_emb.inv_freqweight_loaderr   z.bias)rf   named_parameterssetr*   get_cache_scalero   r   r]   addreplaceendswithr!   r   r   )r7   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
scale_nameparamr   
param_nameweight_nameshard_idr:   r:   r;   load_weightsm  sP   






zSeedOssModel.load_weightsNN)rA   rB   rC   ri   r   rE   typer   Moduler2   rg   rh   r   r   r@   r   r{   r   r   rF   r:   r:   r8   r;   r     s2    @
,r   c                       s   e Zd Zg dddgdZdddedef fd	d
ZdejdejfddZ			ddejdB dejde
dB dejdB deje
B f
ddZdejdejdB fddZdeeeejf  dee fddZ  ZS )SeedOssForCausalLM)r   r   r   r   r   )rU   r3   r&   r   r   r+   c                   s   t    |jj}|j}|| _|| _t|t|dd| _t	 j
r8|jr)| jj| _nt|j|j|t|dd| _nt | _t|j| _| jj| _d S )Nmodel)r   r+   lm_headr   )r1   r2   r   r   r*   rj   r   r$   r   r	   r   r   r   r   r   r   r'   r    r   logits_processorr   )r7   r   r+   rj   r*   r8   r:   r;   r2     s*   


zSeedOssForCausalLM.__init__r|   r,   c                 C   s   | j |S r<   )r   r   r   r:   r:   r;   r     s   z"SeedOssForCausalLM.embed_input_idsNrZ   r}   r~   c                 C   s   |  ||||}|S r<   )r   )r7   r|   rZ   r}   r~   r[   r:   r:   r;   r@     s   zSeedOssForCausalLM.forwardr[   c                 C   s   |  | j|}|S r<   )r   r   )r7   r[   logitsr:   r:   r;   compute_logits  s   z!SeedOssForCausalLM.compute_logitsr   c                 C   s$   t | | jjr	dgnd d}||S )Nzlm_head.)skip_prefixes)r   rj   r   r   )r7   r   loaderr:   r:   r;   r     s
   
zSeedOssForCausalLM.load_weightsr   )rA   rB   rC   packed_modules_mappingr   rE   r2   rg   rh   r   r   r@   r   r   r{   r   r   rF   r:   r:   r8   r;   r     s6    

,r   )B__doc__collections.abcr   	itertoolsr   rg   r   transformersr   rz   vllm.compilation.decoratorsr   vllm.configr   r   vllm.distributedr	   r
   vllm.loggerr   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.attentionr   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   r   vllm.sequencer   vllm.transformers_utils.configr   vllm.v1.attention.backendr   
interfacesr   r   utilsr   r    r!   r"   r#   r$   rA   loggerr   r%   rG   ri   r   r   r:   r:   r:   r;   <module>   sL    	%QD 