o
    -i4                     @   s\  d Z ddlmZ ddlmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZmZ dd	lmZmZ dd
lmZ ddlmZmZmZ ddlmZ ddlmZ ddlmZ ddlmZmZ ddl m!Z! ddl"m#Z# ddl$m%Z% ddl&m'Z'm(Z(m)Z)m*Z*m+Z+ G dd dej,Z-G dd dej,Z.G dd dej,Z/G dd dej,Z0G dd dej,e%Z1dS )zmInference-only StableLM (https://github.com/Stability-AI/StableLM)
model compatible with HuggingFace weights.    )Iterable)isliceN)nn)StableLmConfig)	Attention)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)
SiluAndMul)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)IntermediateTensors   )
SupportsPP)AutoWeightsLoaderis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                	       sN   e Zd Z		ddededB deddf fddZd	ejdejfd
dZ	  Z
S )StablelmMLPN configquant_configprefixreturnc                    sn   t    || _|j| _|j| _t|j|jgd d|| dd| _t|j|jd|| dd| _t	 | _
d S )N   Fz.gate_up_projbiasr    r!   z
.down_proj)super__init__r   hidden_sizeintermediate_sizer   gate_up_projr   	down_projr   act_fn)selfr   r    r!   	__class__ `/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/model_executor/models/stablelm.pyr'   >   s&   

zStablelmMLP.__init__xc                 C   s*   |  |\}}| |}| |\}}|S N)r*   r,   r+   )r-   r2   gate_up_r0   r0   r1   forwardX   s   
zStablelmMLP.forward)Nr   )__name__
__module____qualname__r   r   strr'   torchTensorr6   __classcell__r0   r0   r.   r1   r   =   s    r   c                       s^   e Zd Z			ddededB dedB deddf
 fdd	Zd
ej	dej	dej	fddZ
  ZS )StablelmAttentionNr   r   cache_configr    r!   r"   c              	      s  t    || _|j| _t }|j| _| j| | _|j| _	| j	|kr,| j	| dks+J n	|| j	 dks5J t
d| j	| | _| j| j | _|j| _| jd | _| j| j | _| j| j | _t|dd| _| j| j | | jkr|td| j d| j dt| j| j| j| j	| j|| d	d
| _t| j| j | jd|| dd| _t| j| jj| jjd| _t| j| j| j| j||| dd| _d S )Nr   r   g      use_qkv_biasFz?hidden_size must be divisible by num_heads (got `hidden_size`: z and `num_heads`: z).z	.qkv_projr    r!   z.o_projr$   )max_positionrope_parametersz.attn)num_kv_headsr?   r    r!   )r&   r'   r   r(   r
   num_attention_headstotal_num_heads	num_headsnum_key_value_headstotal_num_key_value_headsmaxhead_dimmax_position_embeddingsscalingq_sizekv_sizegetattrqkv_bias
ValueErrorr   qkv_projr   o_projr   rC   
rotary_embr   attn)r-   r   r?   r    r!   tp_sizer.   r0   r1   r'   `   sn   

	
zStablelmAttention.__init__	positionshidden_statesc           
      C   s`   |  |\}}|j| j| j| jgdd\}}}| |||\}}| |||}| |\}	}|	S )N)dim)rS   splitrN   rO   rU   rV   rT   )
r-   rX   rY   qkvr5   qkvattn_outputoutputr0   r0   r1   r6      s    zStablelmAttention.forwardNNr   )r7   r8   r9   r   r   r   r:   r'   r;   r<   r6   r=   r0   r0   r.   r1   r>   _   s,    Dr>   c                       sh   e Zd Z			ddededB dedB deddf
 fdd	Zd
ej	dej	de
ej	ej	f fddZ  ZS )StablelmDecoderLayerNr   r   r?   r    r!   r"   c                    st   t    t|||| dd| _t||| dd| _t|dt|dd}tj|j	|d| _
tj|j	|d| _d S )Nz
.self_attnr!   z.mlpnorm_epslayer_norm_epsh㈵>eps)r&   r'   r>   	self_attnr   mlprP   r   	LayerNormr(   input_layernormpost_attention_layernorm)r-   r   r?   r    r!   rf   r.   r0   r1   r'      s   
zStablelmDecoderLayer.__init__rX   rY   c                 C   sL   |}|  |}| j||d}|| }|}| |}| |}|| }||fS )N)rX   rY   )rn   rk   ro   rl   )r-   rX   rY   residualr0   r0   r1   r6      s   


zStablelmDecoderLayer.forwardrc   )r7   r8   r9   r   r   r   r:   r'   r;   r<   tupler6   r=   r0   r0   r.   r1   rd      s,    rd   c                       s   e Zd Zdddedef fddZdejdejfd	d
Z	ddejdejde	dB dejdB deje	B f
ddZ
deeeejf  dee fddZ  ZS )StableLMEpochModelr   re   vllm_configr!   c                   s   t    |jj|j |jtjj| dd| _	t
j fdd| dd\| _| _| _tdtdd	}tjj|d
| _tdgj| _d S )Nz.embed_tokensrA   c                    s   t  | dS )Nre   )rd   re   r?   r   r    r0   r1   <lambda>   s    z-StableLMEpochModel.__init__.<locals>.<lambda>z.layersre   rf   rg   rh   ri   rY   )r&   r'   model_config	hf_configr?   r    r   
vocab_sizer(   embed_tokensr   num_hidden_layersstart_layer	end_layerlayersrP   r   rm   normr   make_empty_intermediate_tensors)r-   rs   r!   rf   r.   rt   r1   r'      s(   

zStableLMEpochModel.__init__	input_idsr"   c                 C   s
   |  |S r3   )ry   r-   r   r0   r0   r1   embed_input_ids   s   
z"StableLMEpochModel.embed_input_idsNrX   intermediate_tensorsinputs_embedsc                 C   s~   t  jr|d ur|}n| |}n
|d usJ |d }t| j| j| jD ]	}|||\}}q$t  js8td|iS | 	|}|S )NrY   )
r	   is_first_rankr   r   r}   r{   r|   is_last_rankr   r~   )r-   r   rX   r   r   rY   layerrp   r0   r0   r1   r6      s   
zStableLMEpochModel.forwardweightsc                 C   s   g d}t |  }t }|D ]Y\}}|D ].\}}}	||vrq|||}|dr/||vr/qt|| r5q|| }
|
j}||
||	  n|drN||vrNqt|| rTq|| }
t|
dt}||
| |	| q|S )N))rS   q_projr^   )rS   k_projr_   )rS   v_projr`   )r*   	gate_projr   )r*   up_projr   z.biasweight_loader)
dictnamed_parameterssetreplaceendswithr   r   rP   r   add)r-   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
param_nameweight_nameshard_idparamr   r0   r0   r1   load_weights  s2   


zStableLMEpochModel.load_weightsr3   )r7   r8   r9   r   r:   r'   r;   r<   r   r   r6   r   rq   r   r   r=   r0   r0   r.   r1   rr      s     
,rr   c                       s   e Zd Zdddedef fddZdejdejfd	d
Z		ddejdejde	dB dejdB deje	B f
ddZ
dejdejdB fddZdeeeejf  dee fddZ  ZS )StablelmForCausalLMr   re   rs   r!   c                   s   t    |jj}|j}|| _|| _t|t|dd| _t	|j
|j|| dd| _| jjr5| jjj| j_t|j
| _| jj| _d S )Nmodel)rs   r!   z.lm_headrA   )r&   r'   rv   rw   r    r   rr   r   r   r   rx   r(   lm_headtie_word_embeddingsry   weightr   logits_processorr   )r-   rs   r!   r   r    r.   r0   r1   r'   4  s&   

zStablelmForCausalLM.__init__r   r"   c                 C   s   | j |S r3   )r   r   r   r0   r0   r1   r   J  s   z#StablelmForCausalLM.embed_input_idsNrX   r   r   c                 C   s   |  ||||}|S r3   )r   )r-   r   rX   r   r   rY   r0   r0   r1   r6   M  s   zStablelmForCausalLM.forwardrY   c                 C   s   |  | j|}|S r3   )r   r   )r-   rY   logitsr0   r0   r1   compute_logitsY  s   z"StablelmForCausalLM.compute_logitsr   c                 C   s   t | }||S r3   )r   r   )r-   r   loaderr0   r0   r1   r   `  s   
z StablelmForCausalLM.load_weights)NN)r7   r8   r9   r   r:   r'   r;   r<   r   r   r6   r   r   rq   r   r   r=   r0   r0   r.   r1   r   3  s,    

,r   )2__doc__collections.abcr   	itertoolsr   r;   r   transformersr   vllm.attention.layerr   vllm.configr   r   vllm.distributedr	   r
   %vllm.model_executor.layers.activationr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   vllm.sequencer   
interfacesr   utilsr   r   r   r   r   Moduler   r>   rd   rr   r   r0   r0   r0   r1   <module>   s0   	"R(Z