o
    
۾iH                     @   sr  d Z ddlmZ ddlZddlmZ ddlmZ ddlmZ ddl	m
Z
mZ ddlmZmZ dd	lmZ dd
lmZ ddlmZmZmZ ddlmZ ddlmZ ddlmZ ddlmZmZ ddl m!Z!m"Z" ddl#m$Z$ ddl%m&Z&m'Z' ddl(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/ G dd dej0Z1G dd dej0Z2G dd dej0Z3eG dd dej0Z4G dd dej0e&e'Z5dS )z?Inference-only Jais2 model compatible with HuggingFace weights.    )IterableN)nn)Jais2Config)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)ReLUSquaredActivation)	Attention)ColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)IntermediateTensors   )SupportsLoRA
SupportsPP)AutoWeightsLoaderPPMissingLayerextract_layer_indexis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                       sN   e Zd Z			ddededededB ded	ed
df fddZdd Z  Z	S )Jais2MLPNF hidden_sizeintermediate_size
hidden_actquant_configbiasprefixreturnc                    sJ   t    t||||| dd| _t||||| dd| _t | _d S )Nz.up_proj
input_sizeoutput_sizer'   r&   r(   z
.down_proj)super__init__r   up_projr   	down_projr
   act_fn)selfr#   r$   r%   r&   r'   r(   	__class__ T/home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/jais2.pyr.   I   s    
	zJais2MLP.__init__c                 C   s*   |  |\}}| |}| |\}}|S N)r/   r1   r0   )r2   x_r5   r5   r6   forwardc   s   
zJais2MLP.forward)NFr"   )
__name__
__module____qualname__intstrr   boolr.   r:   __classcell__r5   r5   r3   r6   r!   H   s(    r!   c                       sv   e Zd Z					ddedededed	ed
edB dededB deddf fddZ	de
jde
jde
jfddZ  ZS )Jais2Attention    NFr"   configr#   	num_headsnum_kv_headsmax_position_embeddingsr&   r'   cache_configr(   r)   c
              
      s  t    t|	}
|| _t }|| _| j| dksJ | j| | _|| _| j|kr3| j| dks2J n	|| j dks<J td| j| | _	t
|d| j| j | _| j| j | _| j	| j | _| jd | _|| _t|| j| j| j|||	 dd| _t| j| j ||||	 dd| _d	}|d ur| d
krd}t| j|t
|dd |d| _t|dr|j}t|tr|}nt|tr|
t| }|| }ntt| dd }t| j| j| j| j	||||	 dd| _ d S )Nr   r   head_dimg      	.qkv_proj)r#   	head_sizetotal_num_headstotal_num_kv_headsr'   r&   r(   z.o_projr*   TggufFrope_parameters)max_positionrO   is_neox_styleinterleaved_sliding_windowz is not supported.z.attn)rF   rH   r&   per_layer_sliding_windowr(   )!r-   r.   r   r#   r	   rL   rE   rM   maxrF   getattrrI   q_sizekv_sizescalingrG   r   qkv_projr   o_projget_namer   
rotary_embhasattrrR   
isinstancer>   listlen
ValueErrortyper   attn)r2   rD   r#   rE   rF   rG   r&   r'   rH   r(   	layer_idxtp_sizerQ   rR   sliding_windowsw_idxr3   r5   r6   r.   k   s   








zJais2Attention.__init__	positionshidden_statesc           
      C   s`   |  |\}}|j| j| j| jgdd\}}}| |||\}}| |||}| |\}	}|	S )N)dim)rY   splitrV   rW   r\   rc   rZ   )
r2   rh   ri   qkvr9   qkvattn_outputoutputr5   r5   r6   r:      s    zJais2Attention.forward)rC   NFNr"   )r;   r<   r=   r   r>   r   r@   r   r?   r.   torchTensorr:   rA   r5   r5   r3   r6   rB   j   sD    	
\rB   c                
       sx   e Zd Z	ddedededdf fddZd	ejd
ejdejdB de	ejejf fddZ
dededB fddZ  ZS )Jais2DecoderLayerr"   vllm_configrD   r(   r)   Nc                    s   t    |p
|jj}|j}| |}|j| _t|dd}t|ddp(t|dd}t|| j|j	t|d|j	||||| dd	| _
t| j|j|j|t|d	d| d
d| _tj|j|jd| _tj|j|jd| _d S )NrG   rC   attention_biasFr'   num_key_value_headsz
.self_attn)	rD   r#   rE   rF   rG   r&   r'   rH   r(   mlp_biasz.mlp)r#   r$   r%   r&   r'   r(   eps)r-   r.   model_config	hf_configrH   get_quant_configr#   rU   rB   num_attention_heads	self_attnr!   r$   r%   mlpr   	LayerNormlayer_norm_epsinput_layernormpost_attention_layernorm)r2   rv   rD   r(   rH   r&   rG   rw   r3   r5   r6   r.      sH   


zJais2DecoderLayer.__init__rh   ri   residualc                 C   sh   |d u r|}|  |}n|  || || }}| j||d}| || || }}| |}||fS )N)rh   ri   )r   r   r   r   )r2   rh   ri   r   r5   r5   r6   r:     s   
zJais2DecoderLayer.forwardc                 C   s   |j S )z?Get quantization config for this layer. Override in subclasses.)r&   )r2   rv   r5   r5   r6   r~      s   z"Jais2DecoderLayer.get_quant_configr"   )r;   r<   r=   r   r   r?   r.   rs   rt   tupler:   r   r~   rA   r5   r5   r3   r6   ru      s*    /
ru   c                       s   e Zd Zdefdededeej f fddZ	de
jde
jfd	d
Z	dde
jdB de
jdedB de
jdB de
jeB ee
jee
j f B f
ddZdeeee
jf  dee fddZ  ZS )
Jais2Modelr"   rv   r(   
layer_typec                    s   t    jj j} | _|| _ j| _ j| _ j| _	t
 js) jr6t
 jr6t| j j j|d| _nt | _t j fdd| dd\| _| _| _t
 jr`tj j jd| _nt | _tddg j| _d S )	N)org_num_embeddingsr&   c                    s    | dS )N)rD   rv   r(   r5   r(   rD   r   rv   r5   r6   <lambda>E  s
    z%Jais2Model.__init__.<locals>.<lambda>z.layersr   rz   ri   r   )r-   r.   r|   r}   r&   rD   pad_token_idpadding_idx
vocab_sizeorg_vocab_sizer   is_first_ranktie_word_embeddingsis_last_rankr   r#   embed_tokensr   r   num_hidden_layersstart_layer	end_layerlayersr   r   r   normr   make_empty_intermediate_tensors)r2   rv   r(   r   r&   r3   r   r6   r.   '  s>   

	

zJais2Model.__init__	input_idsr)   c                 C   s
   |  |S r7   )r   r2   r   r5   r5   r6   embed_input_idsU  s   
zJais2Model.embed_input_idsNrh   intermediate_tensorsinputs_embedsc           
      C   s   t  jr|d ur|}n| |}d }n|d usJ |d }|d }t| j| jD ]}| j| }||||\}}q(t  jsCt||dS | 	|| |}}	|S )Nri   r   )ri   r   )
r   r   r   ranger   r   r   r   r   r   )
r2   r   rh   r   r   ri   r   ilayerr9   r5   r5   r6   r:   X  s"   

zJais2Model.forwardweightsc                 C   sn  g d}t |  }t }|D ]\}}d|v rqd|v s d|v r!q| jd urG| j| }rG|| }t|dt}	|d }|	|| || qd|v rUt||}|d u rUq|D ].\}
}}||vraqW|	||
}|
drq||vrqqWt|| rwqW|| }|j}	|	|||  n)|
dr||vrqt||}|d u rqt|| rq|| }t|dt}	|	|| || q|S )	N))rJ   z.q_projrn   )rJ   z.k_projro   )rJ   z.v_projrp   zrotary_emb.inv_freqzrotary_emb.cos_cachedzrotary_emb.sin_cachedweight_loaderr   scalez.bias)dictnamed_parameterssetr&   get_cache_scalerU   r   addr   replaceendswithr   r   )r2   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
scale_nameparamr   
param_nameweight_nameshard_idr5   r5   r6   load_weightsv  sZ   







zJais2Model.load_weightsr7   )r;   r<   r=   ru   r   r?   rb   r   Moduler.   rs   rt   r   r   r   r_   r:   r   r   r   rA   r5   r5   r3   r6   r   %  s0    .
,r   c                       s   e Zd Zdg diZdddZddded	ef fd
dZdded	efddZde	j
de	j
fddZ		dde	j
dB de	j
dedB de	j
dB de	j
eB f
ddZde	j
de	j
dB fddZdeeee	j
f  dee fddZ  ZS ) Jais2ForCausalLMrY   )q_projk_projv_projinput_embeddingsoutput_embeddings)r   lm_headr"   r   rv   r(   c                   s   t    |jj}|j}|| _| j|t|dd| _t	 j
rGt|j|j|t|dd| _|jr8| j| jj| _t|dd}t|j|d| _nt | _| jj| _d S )Nmodelrv   r(   r   )r&   r(   logit_scaleg      ?)r   )r-   r.   r|   r}   r&   rD   _init_modelr    r   r   r   r   r   r#   r   r   tie_weightsr   rU   r   logits_processorr   r   )r2   rv   r(   rD   r&   r   r3   r5   r6   r.     s.   


zJais2ForCausalLM.__init__c                 C   s   t ||dS )Nr   )r   )r2   rv   r(   r5   r5   r6   r        zJais2ForCausalLM._init_modelr   r)   c                 C   s   | j |S r7   )r   r   r   r5   r5   r6   r     r   z Jais2ForCausalLM.embed_input_idsNrh   r   r   c                 C   s   |  ||||}|S r7   )r   )r2   r   rh   r   r   model_outputr5   r5   r6   r:     s   zJais2ForCausalLM.forwardri   c                 C   s   |  | j|}|S r7   )r   r   )r2   ri   logitsr5   r5   r6   compute_logits  s   zJais2ForCausalLM.compute_logitsr   c                 C   s$   t | | jjr	dgnd d}||S )Nzlm_head.)skip_prefixes)r   rD   r   r   )r2   r   loaderr5   r5   r6   r     s
   
zJais2ForCausalLM.load_weightsr   )NN)r;   r<   r=   packed_modules_mappingembedding_modulesr   r?   r.   r   rs   rt   r   r   r:   r   r   r   r   r   rA   r5   r5   r3   r6   r     s8    

,r   )6__doc__collections.abcr   rs   r   transformersr   vllm.compilation.decoratorsr   vllm.configr   r   vllm.distributedr   r	   %vllm.model_executor.layers.activationr
   $vllm.model_executor.layers.attentionr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   r   vllm.sequencer   
interfacesr   r   utilsr   r   r   r   r   r   r    r   r!   rB   ru   r   r   r5   r5   r5   r6   <module>   s4   $"jQ 