o
    
۾i3'                     @   sb  d Z ddlmZ ddlmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZmZmZmZ ddlmZ ddlmZm Z  ddl!m"Z" ddl#m$Z$m%Z%m&Z&m'Z' ddl(m)Z) ddl*m+Z+m,Z,m-Z-m.Z.m/Z/ e0ej1ej1f Z2G dd dej3Z4e
G dd dej3Z5G dd dej3e$e%e'e&Z6dS )zPyTorch MAMBA model.    )Iterable)isliceN)nn)MambaConfig)support_torch_compile)CacheConfigModelConfig
VllmConfig)get_pp_group)RMSNorm)LogitsProcessor)
MambaMixer)MambaStateCopyFuncMambaStateCopyFuncCalculatorMambaStateDtypeCalculatorMambaStateShapeCalculator)QuantizationConfig)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)HasInnerStateIsAttentionFreeSupportsMambaPrefixCaching
SupportsPP)IntermediateTensors   )AutoWeightsLoaderis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                       sp   e Zd Z					ddededB dedB dedB dedB d	ed
df fddZ	de
jde
jdB fddZ  ZS )MambaDecoderLayerNF configmodel_configcache_configquant_configis_lora_enabledprefixreturnc                    s   t    || _|jdk| _|| _| jr|jnd }t|j|j	|j
|j|j|j|j| j| j ||j| j||| dd| _t|j|jd| _d S )Nfalcon_mambaz.mixer)hidden_sizessm_state_sizeconv_kernel_sizeintermediate_sizetime_step_rankuse_conv_biasuse_biasuse_rms_normrms_norm_has_weightrms_norm_eps
activationr'   r$   r%   r(   eps)super__init__r#   
model_typeis_falcon_mambar'   mixer_rms_epsr   r+   
state_sizeconv_kernelr.   r/   r0   r1   
hidden_actmixerr   layer_norm_epsilonnorm)selfr#   r$   r%   r&   r'   r(   r<   	__class__ T/home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/mamba.pyr9   2   s.   
	zMambaDecoderLayer.__init__hidden_statesresidualc                 K   sF   |d u r|}|  |}n|  ||\}}t|}| || ||fS N)rB   torch
empty_liker@   )rC   rH   rI   kwargsoutputrF   rF   rG   forwardT   s   
zMambaDecoderLayer.forward)NNNFr"   )__name__
__module____qualname__r   r   r   r   boolstrr9   rK   TensorrO   __classcell__rF   rF   rD   rG   r!   1   s4    "r!   c                       s   e Zd Zdddedef fddZdejdejfd	d
Z		ddejdB dejde	dB dejdB dejf
ddZ
deeeejf  dee fddZ  ZS )
MambaModelr"   r(   vllm_configr(   c                   s   t    |jj|j|j |j|j}t|| _j	| _	t
| j	j| _tj fdd| dd\| _| _| _tjjd| _tddgj| _d S )Nc                    s   t  | dS )N)r$   r%   r&   r'   r(   )r!   rX   r%   r#   r'   r$   r&   rF   rG   <lambda>|   s    z%MambaModel.__init__.<locals>.<lambda>z.layersrX   r6   rH   rI   )r8   r9   r$   	hf_configr%   r&   lora_configrS   r#   
vocab_sizer   r+   
embeddingsr   num_hidden_layersstart_layer	end_layerlayersr   rA   norm_fr   make_empty_intermediate_tensors)rC   rY   r(   r]   rD   rZ   rG   r9   g   s,   


zMambaModel.__init__	input_idsr)   c                 C   s
   |  |S rJ   )r_   rC   rf   rF   rF   rG   embed_input_ids   s   
zMambaModel.embed_input_idsN	positionsintermediate_tensorsinputs_embedsc           	      C   s   t  jr|d ur|}n| |}d }n|d usJ |d }|d }t| j| j| jD ]}||||d\}}q*t  jsAt||dS | 	||\}}|S )NrH   rI   )ri   rH   rI   )rH   rI   )
r
   is_first_rankrh   r   rc   ra   rb   is_last_rankr   rd   )	rC   rf   ri   rj   rk   rH   rI   layer_rF   rF   rG   rO      s$   
zMambaModel.forwardweightsc                 C   s   t |  }t }|D ]2\}}d|v r|dd}|dr#||vr#qt|| r)q|| }t|dt}||| || q|S )NA_logAz.biasweight_loader)	dictnamed_parameterssetreplaceendswithr   getattrr   add)rC   rp   params_dictloaded_paramsnameloaded_weightparamrs   rF   rF   rG   load_weights   s   

zMambaModel.load_weightsNN)rP   rQ   rR   r	   rT   r9   rK   rU   rh   r   rO   r   tuplerv   r   rV   rF   rF   rD   rG   rW   e   s"    %
,rW   c                
       s.  e Zd Zdddedef fddZdejdejfd	d
Z		d#dejdB dejde	dB dejdB fddZ
edddeejejf fddZedddeeeef eeef f fddZedeeef fddZdd ZdefddZdejdejfddZd eeeejf  dee fd!d"Z  ZS )$MambaForCausalLMr"   rX   rY   r(   c                   s   |j j}|j| _t   || _|| _|j | _ t|t|dd| _	|j
r*| j	j| _nt|j|jt|dd| _t|j| _| j	j| _d S )Nbackbone)rY   r(   lm_headrX   )r$   r\   scheduler_configr8   r9   r#   rY   rW   r    r   tie_word_embeddingsr_   r   r   r^   r+   r   logits_processorre   )rC   rY   r(   r#   rD   rF   rG   r9      s&   

zMambaForCausalLM.__init__rf   r)   c                 C      | j |S rJ   )r   rh   rg   rF   rF   rG   rh         z MambaForCausalLM.embed_input_idsNri   rj   rk   c                 K   s   |  ||||}|S rJ   )r   )rC   rf   ri   rj   rk   rM   rH   rF   rF   rG   rO      s   zMambaForCausalLM.forwardr	   c                 C   s   t |jj|jj|jjS rJ   )r   mamba1_state_dtyper$   dtyper%   mamba_cache_dtypemamba_ssm_cache_dtype)clsrY   rF   rF   rG   !get_mamba_state_dtype_from_config   s
   z2MambaForCausalLM.get_mamba_state_dtype_from_configc                 C   s(   |j }|jj}tj|j|j|j|jdS )N)tp_world_sizer.   r=   r>   )	parallel_configr$   r\   r   mamba1_state_shapetensor_parallel_sizer.   r=   r>   )r   rY   r   r\   rF   rF   rG   !get_mamba_state_shape_from_config   s   z2MambaForCausalLM.get_mamba_state_shape_from_configc                 C   s   t  S rJ   )r   mamba1_state_copy_func)r   rF   rF   rG   get_mamba_state_copy_func
  s   z*MambaForCausalLM.get_mamba_state_copy_funcc                 K   s   | j j|fi |S rJ   )mamba_cachecopy_inputs_before_cuda_graphs)rC   input_buffersrM   rF   rF   rG   r     s   z/MambaForCausalLM.copy_inputs_before_cuda_graphs
batch_sizec                 C   r   rJ   )r   "get_seqlen_agnostic_capture_inputs)rC   r   rF   rF   rG   r     r   z3MambaForCausalLM.get_seqlen_agnostic_capture_inputsrH   c                 C   s   |  | j|}|S rJ   )r   r   )rC   rH   logitsrF   rF   rG   compute_logits  s   zMambaForCausalLM.compute_logitsrp   c                 C   s   t | }||S rJ   )r   r   )rC   rp   loaderrF   rF   rG   r     s   
zMambaForCausalLM.load_weightsr   )rP   rQ   rR   r	   rT   r9   rK   rU   rh   r   rO   classmethodr   r   r   intr   r   r   r   r   r   r   rv   r   rV   rF   rF   rD   rG   r      s@    

,r   )7__doc__collections.abcr   	itertoolsr   rK   r   transformersr   vllm.compilation.decoratorsr   vllm.configr   r   r	   vllm.distributed.parallel_stater
   $vllm.model_executor.layers.layernormr   +vllm.model_executor.layers.logits_processorr   ,vllm.model_executor.layers.mamba.mamba_mixerr   ,vllm.model_executor.layers.mamba.mamba_utilsr   r   r   r   'vllm.model_executor.layers.quantizationr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   %vllm.model_executor.models.interfacesr   r   r   r   vllm.sequencer   utilsr   r   r   r   r    r   rU   KVCacheModuler!   rW   r   rF   rF   rF   rG   <module>   s4   4
Z