o
    i7                     @   sv  d Z ddlZddlmZ ddlmZ ddlZddlmZ ddlm	Z	 ddl
mZmZ ddlmZmZmZ dd	lmZ dd
lmZmZmZ ddlmZ ddlmZ ddlmZmZ ddlmZ ddl m!Z! ddl"m#Z# ddl$m%Z% ddl&m'Z'm(Z(m)Z)m*Z* G dd dej+Z,dd Z-G dd dej+Z.G dd dej+Z/G dd dej+Z0e	G dd dej+Z1G d d! d!ej+e%Z2dS )"z>Inference-only Jais model compatible with HuggingFace weights.    N)Iterable)islice)nn)support_torch_compile)CacheConfig
VllmConfig)get_pp_groupget_tensor_model_parallel_rank$get_tensor_model_parallel_world_size)	Attention)ColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)IntermediateTensors)
JAISConfig   )
SupportsPP)is_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                   @   s(   e Zd ZdejdejdejfddZdS )SwiGLUActivationx1x2returnc                 C   s   |t j| S N)r   
functionalsilu)selfr   r    r$   U/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/model_executor/models/jais.pyforward@   s   zSwiGLUActivation.forwardN)__name__
__module____qualname__torchTensorr&   r$   r$   r$   r%   r   ?   s     r   c                 C   s\   dd }t |  r|| S dt t |  }||td| dd d d | |   S )Nc                    s6   ddt | d       fddt| D S )N      c                    s   g | ]} |  qS r$   r$   ).0iratiostartr$   r%   
<listcomp>H   s    zD_get_alibi_slopes.<locals>.get_slopes_power_of_2.<locals>.<listcomp>)mathlog2range)nr$   r0   r%   get_slopes_power_of_2E   s   z0_get_alibi_slopes.<locals>.get_slopes_power_of_2r,   r   )r4   r5   
is_integerfloor_get_alibi_slopes)r7   r8   closest_power_of_2r$   r$   r%   r;   D   s    r;   c                	       T   e Zd Z			ddededB dedB def fddZd	ej	d
ej	fddZ
  ZS )JAISAttentionN configcache_configquant_configprefixc              	      s  t    |j| _|j}t }|| dksJ || | _| j| | _t|dr+|j|_	|j	r0dnd| _
| j| j
  | _t| j| j|d|| dd| _t| j| jd|| dd| _t }|| j }|d	 | j }	t|}
|
||	 }
t| j| j| j|
||| d
d| _d S )Nr   scale_qk_dot_by_dg      ?g      ?Tz.c_attnbiasrB   rC   .c_projr   .attn)scalealibi_slopesrA   rB   rC   )super__init__hidden_sizenum_attention_headsr
   	num_headshead_dimhasattrrD   mup_scale_qk_dot_by_dattn_scale_powerrI   r   c_attnr   c_projr	   r;   r   attn)r#   r@   rA   rB   rC   total_num_heads tensor_model_parallel_world_sizetp_rank
head_starthead_endrJ   	__class__r$   r%   rL   U   sP   



zJAISAttention.__init__hidden_statesr   c                 C   sB   |  |\}}|jddd\}}}| |||}| |\}}|S )Nr-   )chunksdim)rT   chunkrV   rU   )r#   r^   qkv_qkvattn_outputr$   r$   r%   r&      s
   zJAISAttention.forwardNNr?   r'   r(   r)   r   r   r   strrL   r*   r+   r&   __classcell__r$   r$   r\   r%   r>   T   s$    2r>   c                	       sN   e Zd Z		ddedededB def fddZd	ej	d
ej	fddZ
  ZS )JAISMLPNr?   intermediate_sizer@   rB   rC   c                    s   t    |j}|jdk| _t||d|| dd| _| jr*t||d|| ddnd | _t||d|| dd| _	t
 | _d S )NswigluTz.c_fcrE   z.c_fc2rG   )rK   rL   rM   activation_functionro   r   c_fcc_fc2r   rU   r   act)r#   rn   r@   rB   rC   rM   r\   r$   r%   rL      s8   
zJAISMLP.__init__r^   r   c                 C   sP   | j r
| |\}}| |\}}| j r| ||n| |}| |\}}|S r    )ro   rr   rq   rs   rU   )r#   r^   hidden_states2rd   r$   r$   r%   r&      s   zJAISMLP.forward)Nr?   )r'   r(   r)   intr   r   rk   rL   r*   r+   r&   rl   r$   r$   r\   r%   rm      s    &rm   c                	       r=   )	JAISBlockNr?   r@   rA   rB   rC   c                    s   t    |j}|jd ur|jnd| }tj||jd| _t|||| dd| _	tj||jd| _
t|||| dd| _d S )N   epsrH   rC   z.mlp)rK   rL   rM   n_innerr   	LayerNormlayer_norm_epsilonln_1r>   rV   ln_2rm   mlp)r#   r@   rA   rB   rC   rM   	inner_dimr\   r$   r%   rL      s   
zJAISBlock.__init__r^   r   c                 C   sF   |}|  |}| j|d}|| }|}| |}| |}|| }|S )N)r^   )r~   rV   r   r   )r#   r^   residualrh   feed_forward_hidden_statesr$   r$   r%   r&      s   


zJAISBlock.forwardri   rj   r$   r$   r\   r%   rv      s$    rv   c                       s~   e Zd Zdddedef fddZdejdejfd	d
Z		ddejdB dejde	dB dejdB de	ejB f
ddZ
  ZS )	JAISModelr?   rz   vllm_configrC   c                   s   t    |jj|j |j| _jrJ jrJ j	r!J j
| _tj| j| _jdkr:tj| jnd | _tdrGj| _nj| _tj fdd| dd\| _| _| _tj| jjd| _tdgj | _!d S )	Nalibiembeddings_scalec                    s   t  | dS )N)r@   rA   rB   rC   )rv   rz   rA   r@   rB   r$   r%   <lambda>  s    z$JAISModel.__init__.<locals>.<lambda>z.hrz   rx   r^   )"rK   rL   model_config	hf_configrA   rB   r@   add_cross_attentionscale_attn_by_inverse_layer_idxreorder_and_upcast_attnrM   	embed_dimr   
vocab_sizewteposition_embedding_typer   	Embeddingmax_position_embeddingswperQ   r   mup_embeddings_scaler   num_hidden_layersstart_layer	end_layerhr|   r}   ln_fr   n_embdmake_empty_intermediate_tensors)r#   r   rC   r\   r   r%   rL      s4   







zJAISModel.__init__	input_idsr   c                 C   s
   |  |S r    )r   r#   r   r$   r$   r%   embed_input_ids  s   
zJAISModel.embed_input_idsNposition_idsintermediate_tensorsinputs_embedsc                 C   s   t  jr,|d u r| |}| jd ur| |}|| }n|}|tjt| j|jd9 }n
|d us2J |d }t	| j
| j| jD ]}||}q?t  jsPtd|iS | |}|S )N)dtyper^   )r   is_first_rankr   r   r*   tensorfloatr   r   r   r   r   r   is_last_rankr   r   )r#   r   r   r   r   position_embedsr^   layerr$   r$   r%   r&     s$   






zJAISModel.forwardNN)r'   r(   r)   r   rk   rL   r*   r+   r   r   r&   rl   r$   r$   r\   r%   r      s     'r   c                       s   e Zd Zdddedef fddZdejdejfd	d
Z		ddejdB dejde	dB dejdB de	ejB f
ddZ
dejdejdB fddZdeeeejf  dee fddZ  ZS )JAISLMHeadModelr?   rz   r   rC   c                   s   t    |jj}|j}|| _|| _t|t|dd| _| jj	r&| jj
| _nt| jj| jjt|dd| _t|dr?|j| _n|j|j | _t|j| jd| _| jj| _d S )Ntransformer)r   rC   lm_headrz   width_scale)r   rI   )rK   rL   r   r   rB   r@   r   r   r   tie_word_embeddingsr   r   r   r   rM   rQ   r   output_logits_scalemup_output_alphamup_width_scaler   logits_processorr   )r#   r   rC   r@   rB   r\   r$   r%   rL   :  s.   



zJAISLMHeadModel.__init__r   r   c                 C   s   | j |S r    )r   r   r   r$   r$   r%   r   V  s   zJAISLMHeadModel.embed_input_idsN	positionsr   r   c                 C   s   |  ||||}|S r    )r   )r#   r   r   r   r   r^   r$   r$   r%   r&   Y  s   zJAISLMHeadModel.forwardr^   c                 C   s   |  | j|}|S r    )r   r   )r#   r^   logitsr$   r$   r%   compute_logitse  s   zJAISLMHeadModel.compute_logitsweightsc           	      C   s   t | jdd}t }|D ]N\}}d|v rqd|v sd|v rqd|v r$q|ds-d| }t|| r3q|| }dD ]}||vr@q9|d	sFq9| }q9t|d
t}||| |	| q|S )NF)remove_duplicatezlm_head.weightz
.attn.biasz.attn.masked_biasrelative_peztransformer.)rT   rU   rq   z.weightweight_loader)
dictnamed_parametersset
startswithr   endswithtgetattrr   add)	r#   r   params_dictloaded_paramsnameloaded_weightparamconv1d_weight_namer   r$   r$   r%   load_weightsl  s0   




zJAISLMHeadModel.load_weightsr   )r'   r(   r)   r   rk   rL   r*   r+   r   r   r&   r   r   tupler   r   rl   r$   r$   r\   r%   r   9  s,    

,r   )3__doc__r4   collections.abcr   	itertoolsr   r*   r   vllm.compilation.decoratorsr   vllm.configr   r   vllm.distributedr   r	   r
   $vllm.model_executor.layers.attentionr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   vllm.sequencer   vllm.transformers_utils.configsr   
interfacesr   utilsr   r   r   r   Moduler   r;   r>   rm   rv   r   r   r$   r$   r$   r%   <module>   s6   >4'K