o
    -i'Q                     @   s  d Z ddlmZ ddlmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZmZ dd
lmZmZ ddlmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ ddlm Z  ddl!m"Z" ddl#m$Z$m%Z% ddl&m'Z'm(Z( ddl)m*Z* ddl+m,Z, ddl-m.Z.m/Z/ ddl0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7 G dd dej8Z9G dd dej8Z:G dd dej8Z;eG dd  d ej8Z<G d!d" d"ej8e.e/Z=dS )#zAInference-only Apertus model compatible with HuggingFace weights.    )Iterable)isliceN)nn)ApertusConfig)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)XIELU)EncoderOnlyAttention)RMSNorm)ColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)IntermediateTensors)AttentionType   )SupportsLoRA
SupportsPP)AutoWeightsLoaderPPMissingLayerextract_layer_indexis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                       sT   e Zd Z				ddededededB d	ed
ededdf fddZdd Z  Z	S )
ApertusMLPNF Thidden_sizeintermediate_size
hidden_actquant_configbiasprefixreduce_resultsreturnc                    sd   t    t||||| dd| _t|||||| dd| _|dkr,td| dt | _d S )Nz.up_proj
input_sizeoutput_sizer+   r*   r,   z
.down_proj)r0   r1   r+   r*   r-   r,   xieluzUnsupported activation: z". Only xIELU is supported for now.)	super__init__r   up_projr   	down_proj
ValueErrorr   act_fn)selfr'   r(   r)   r*   r+   r,   r-   	__class__ _/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/model_executor/models/apertus.pyr4   L   s*   


zApertusMLP.__init__c                 C   s*   |  |\}}| |}| |\}}|S N)r5   r8   r6   )r9   x_r<   r<   r=   forwardm   s   
zApertusMLP.forward)NFr&   T)
__name__
__module____qualname__intstrr   boolr4   rA   __classcell__r<   r<   r:   r=   r%   K   s.    	!r%   c                       s   e Zd Zddddddejfdedededed	ed
edB dedede	dB de
de
ddf fddZdejdejdejfddZded
edB ddfddZ  ZS )ApertusAttention    NFr&   configr'   	num_headsnum_kv_headsmax_position_embeddingsr*   r+   bias_o_projcache_configr,   	attn_typer.   c                    s  t    t|
}|| _t }|| _| j| dksJ | j| | _|| _| j|kr3| j| dks2J n	|| j dks<J td| j| | _	t
|dd }|d u rU| j| j }|| _| j| j | _| j	| j | _| jd | _|| _t|| j| j| j|||
 dd| _t| j| j ||||
 dd| _| j||d	 d }t
|d
d  }r|| dk}|r|j}|tjkrtnt}|| j| j| j| j	|	||||
 dd	| _t| j|jd| _t| j|jd| _d S )Nr   r   head_dimg      	.qkv_proj)r'   	head_sizetotal_num_headstotal_num_kv_headsr+   r*   r,   z.o_projr/   r*   layer_typessliding_attentionz.attn)rM   rP   r*   per_layer_sliding_windowrQ   r,   eps)r3   r4   r    r'   r   rU   rL   rV   maxrM   getattrrR   q_sizekv_sizescalingrN   r   qkv_projr   o_proj_init_rotary_embsliding_windowr   ENCODER_ONLYr   r   attnr   rms_norm_epsq_normk_norm)r9   rK   r'   rL   rM   rN   r*   r+   rO   rP   r,   rQ   	layer_idxtp_sizerR   re   rX   
is_slidingattn_clsr:   r<   r=   r4   u   sv   




zApertusAttention.__init__	positionshidden_statesc           
      C   s   |  |\}}|j| j| j| jgdd\}}}| | d| j|}| 	| d| j|}| 
|||\}}| |||}| |\}	}|	S )N)dim)rb   splitr_   r`   ri   
contiguousviewrR   view_asrj   
rotary_embrg   rc   )
r9   ro   rp   qkvr@   qkvattn_outputoutputr<   r<   r=   rA      s    zApertusAttention.forwardc                 C   sB   d}|o	|  dk}|r|jdkrd}t| j| j|j|d| _d S )NTggufapertusF)max_positionrope_parametersis_neox_style)get_name
model_typer   rR   rN   r   rw   )r9   rK   r*   r   is_ggufr<   r<   r=   rd      s   z!ApertusAttention._init_rotary_emb)rB   rC   rD   r   DECODERr   rE   r   rG   r   rF   r4   torchTensorrA   rd   rH   r<   r<   r:   r=   rI   t   s^    	
X
rI   c                       sr   e Zd Z			ddededB dedB deddf
 fdd	Zd
ej	dej	dej	dB de
ej	ej	f fddZ  ZS )ApertusDecoderLayerNr&   rK   rP   r*   r,   r.   c           	         s   t    |j| _t|dd}t|ddpt|dd}|}t|dr%|j}t|ddr/tj}ntj}t	|| j|j
t|d	|j
|||||| d
|d| _t| j|j|j|t|dd| dd| _t|j|jd| _t|j|jd| _d S )NrN   rJ   attention_biasFr+   qkv_bias	is_causalTnum_key_value_headsz
.self_attn)rK   r'   rL   rM   rN   r*   r+   rO   rP   r,   rQ   mlp_biasz.mlp)r'   r(   r)   r*   r+   r,   r[   )r3   r4   r'   r^   hasattrr   r   r   rf   rI   num_attention_heads	self_attnr%   r(   r)   mlpr   rh   attention_layernormfeedforward_layernorm)	r9   rK   rP   r*   r,   rN   r   rO   rQ   r:   r<   r=   r4      sN   


zApertusDecoderLayer.__init__ro   rp   residualc                 C   sX   |d u r|}|  |}n|  ||\}}| j||d}| ||\}}| |}||fS )N)ro   rp   )r   r   r   r   )r9   ro   rp   r   r<   r<   r=   rA   '  s   
zApertusDecoderLayer.forward)NNr&   )rB   rC   rD   r   r   r   rF   r4   r   r   tuplerA   rH   r<   r<   r:   r=   r      s0    9r   c                       s   e Zd Zdeddededeej f fddZ	de
jd	e
jfd
dZ	dde
jdB de
jdedB de
jdB d	e
jeB ee
jee
j f B f
ddZdeeee
jf  d	ee fddZ  ZS )ApertusModelr&   r,   
layer_typevllm_configr,   r   c                   s   t    |jj|j |j| _| _j| _t j	s$j
r/t jr/t| jjd| _nt | _tj fdd| dd\| _| _| _t jrYtjjd| _nt | _ttdf  | _tdd	gj| _d S )
NrW   c                    s    | dS )N)rK   rP   r*   r,   r<   r,   rP   rK   r   r*   r<   r=   <lambda>[  s    z'ApertusModel.__init__.<locals>.<lambda>z.layersr   r[   .rp   r   )r3   r4   model_config	hf_configrP   r*   rK   
vocab_sizer
   is_first_ranktie_word_embeddingsis_last_rankr   r'   embed_tokensr   r#   num_hidden_layersstart_layer	end_layerlayersr   rh   normr   rE   aux_hidden_state_layersr"   make_empty_intermediate_tensorsr9   r   r,   r   r:   r   r=   r4   =  s<   




zApertusModel.__init__	input_idsr.   c                 C   s
   |  |S r>   )r   r9   r   r<   r<   r=   embed_input_idsn  s   
zApertusModel.embed_input_idsNro   intermediate_tensorsinputs_embedsc                 C   s   t  jr|d ur|}n| |}d }n|d usJ |d }|d }g }tt| j| j| jD ]\}}	|| jv r>|	||  |	|||\}}q.t  j
sRt||dS | ||\}}
t|dkrd||fS |S )Nrp   r   )rp   r   r   )r
   r   r   	enumerater   r   r   r   r   appendr   r   r   len)r9   r   ro   r   r   rp   r   aux_hidden_statesidxlayerr@   r<   r<   r=   rA   q  s.   

zApertusModel.forwardweightsc                 C   s  g d}t |  }|  D ]\}}|ds|dr |||< qt }|D ]\}}d|v r/q&d|v s7d|v r8q&| jd urf| j| }rf|| }	t|	dt}
|	 dkrW|n|d }|
|	| |
| q&d	|v snd
|v rxt||}|d u rxq&|D ].\}}}||vrqz|||}|dr||vrqzt|| rqz|| }	|	j}
|
|	||  n|dr||vrq&t|| rq&|| }	t|	dt}
|
|	| |
| q&|S )N))rS   z.q_projry   )rS   z.k_projrz   )rS   z.v_projr{   z.betaz.epszrotary_emb.inv_freqzrotary_emb.cos_cachedzrotary_emb.sin_cachedweight_loaderr   scale
zero_pointz.bias)dictnamed_parametersnamed_buffersendswithsetr*   get_cache_scaler^   r   rr   addr   replacer!   r   )r9   r   stacked_params_mappingparams_dictnamebufferloaded_paramsloaded_weight
scale_nameparamr   
param_nameweight_nameshard_idr<   r<   r=   load_weights  s^   






zApertusModel.load_weightsr>   )rB   rC   rD   r   r	   rF   typer   Moduler4   r   r   r   r   r   listrA   r   r   r   rH   r<   r<   r:   r=   r   ;  s0    1
,%r   c                       s,  e Zd Zdg diZdddZdedded	ed
ee	j
 f fddZdeedf ddfddZdeedf fddZdefded	ed
ee	j
 fddZdejdejfddZ		d%dejdejdedB dejdB dejeB f
ddZdejdejdB fd d!Zd"eeeejf  dee fd#d$Z  ZS )&ApertusForCausalLMrb   )q_projk_projv_projinput_embeddingsoutput_embeddings)r   lm_headr&   r   r   r,   r   c                   s   t    |jj}|j}|| _| j|t|d|d| _t	 j
rHt|j|j|t|dd| _|jr9| j| jj| _t|dd}t|j|d| _nt | _| jj| _d S )Nmodelr   r,   r   r   )r*   r,   logit_scaleg      ?)r   )r3   r4   r   r   r*   rK   _init_modelr$   r   r
   r   r   r   r'   r   r   tie_weightsr   r^   r   logits_processorr   r   )r9   r   r,   r   rK   r*   r   r:   r<   r=   r4     s2   

zApertusForCausalLM.__init__r   .r.   Nc                 C   s   || j _d S r>   )r   r   )r9   r   r<   r<   r=   set_aux_hidden_state_layers     z.ApertusForCausalLM.set_aux_hidden_state_layersc                 C   s   t | jj}d|d |d fS )N      )r   r   r   )r9   
num_layersr<   r<   r=   "get_eagle3_aux_hidden_state_layers  s   z5ApertusForCausalLM.get_eagle3_aux_hidden_state_layersc                 C   s   t |||dS )Nr   )r   r   r<   r<   r=   r     s   zApertusForCausalLM._init_modelr   c                 C   s   | j |S r>   )r   r   r   r<   r<   r=   r     r   z"ApertusForCausalLM.embed_input_idsro   r   r   c                 C   s   |  ||||}|S r>   )r   )r9   r   ro   r   r   model_outputr<   r<   r=   rA     s   zApertusForCausalLM.forwardrp   c                 C   s   |  | j|}|S r>   )r   r   )r9   rp   logitsr<   r<   r=   compute_logits+  s   z!ApertusForCausalLM.compute_logitsr   c                 C   s$   t | | jjr	dgnd d}||S )Nzlm_head.)skip_prefixes)r   rK   r   r   )r9   r   loaderr<   r<   r=   r   2  s
   
zApertusForCausalLM.load_weights)NN)rB   rC   rD   packed_modules_mappingembedding_modulesr   r	   rF   r   r   r   r4   r   rE   r   r   r   r   r   r   r   rA   r   r   r   r   rH   r<   r<   r:   r=   r     sZ    	'



,r   )>__doc__collections.abcr   	itertoolsr   r   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr   r	   vllm.distributedr
   r   %vllm.model_executor.layers.activationr   ;vllm.model_executor.layers.attention.encoder_only_attentionr   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   r   vllm.sequencer   vllm.v1.attention.backendr   
interfacesr   r   utilsr   r   r    r!   r"   r#   r$   r   r%   rI   r   r   r   r<   r<   r<   r=   <module>   s<   $)yN  