o
    
۾i(                     @   sP  d Z ddlmZ ddlZddlmZ ddlmZ ddlmZ ddl	m
Z
mZmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZmZmZmZ ddlmZ ddlmZmZ ddlm Z  ddl!m"Z"m#Z#m$Z$ ddl%m&Z& ddl'm(Z(m)Z)m*Z*m+Z+m,Z, e-ej.ej.f Z/G dd dej0Z1eG dd dej0Z2G dd dej0e"e#e$Z3dS )zPyTorch MAMBA2 model.    )IterableN)nn)MambaConfig)support_torch_compile)CacheConfigModelConfig
VllmConfig)get_pp_group)RMSNorm)LogitsProcessor)MambaMixer2)MambaStateCopyFuncMambaStateCopyFuncCalculatorMambaStateDtypeCalculatorMambaStateShapeCalculator)QuantizationConfig)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)HasInnerStateIsAttentionFreeSupportsMambaPrefixCaching)IntermediateTensors   )AutoWeightsLoaderis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                       sf   e Zd Z				ddededB dedB dedB deddf fd	d
Zde	j
de	j
dB fddZ  ZS )Mamba2DecoderLayerN configmodel_configcache_configquant_configprefixreturnc                    sx   t    || _t|j|j|jt|d|j|j |j	|j
|j|j|j|j|j|||| dd| _t|j|jd| _d S )Nintermediate_sizez.mixer)hidden_sizessm_state_sizeconv_kernel_sizer'   use_conv_biasuse_biasn_groups	num_headshead_dimrms_norm_eps
activationr"   r#   r$   r%   eps)super__init__r!   r   r(   
state_sizeconv_kernelgetattrexpandr+   r,   r-   r.   r/   layer_norm_epsilon
hidden_actmixerr
   norm)selfr!   r"   r#   r$   r%   	__class__ U/home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/mamba2.pyr5   0   s,   
zMamba2DecoderLayer.__init__hidden_statesresidualc                 K   s:   |d u r|}|  |}n|  ||\}}| |}||fS N)r=   r<   )r>   rC   rD   kwargsoutputrA   rA   rB   forwardP   s   
zMamba2DecoderLayer.forward)NNNr    )__name__
__module____qualname__r   r   r   r   strr5   torchTensorrH   __classcell__rA   rA   r?   rB   r   /   s.     r   c                       s   e Zd Zdddedef fddZdejdejfd	d
Z		ddejdB dejde	dB dejdB dejf
ddZ
deeeejf  dee fddZ  ZS )Mamba2Modelr    r%   vllm_configr%   c                   s   t    |jj|j|j |j|j}t|}|rJ | _j	| _	t
| j	j| _tj fdd| dd\| _| _| _tjjd| _tddgj| _d S )Nc                    s   t  | dS )N)r"   r#   r$   r%   )r   rQ   r#   r!   r"   r$   rA   rB   <lambda>x   s    z&Mamba2Model.__init__.<locals>.<lambda>z.layersrQ   r2   rC   rD   )r4   r5   r"   	hf_configr#   r$   lora_configboolr!   
vocab_sizer   r(   
embeddingsr   num_hidden_layersstart_layer	end_layerlayersr
   r:   norm_fr   make_empty_intermediate_tensors)r>   rR   r%   rV   is_lora_enabledr?   rS   rB   r5   b   s.   


zMamba2Model.__init__	input_idsr&   c                 C   s
   |  |S rE   )rY   r>   ra   rA   rA   rB   embed_input_ids   s   
zMamba2Model.embed_input_idsN	positionsintermediate_tensorsinputs_embedsc           
      C   s   t  jr|d ur|}n| |}d }n|d usJ |d }|d }t| jD ]\}}||||d\}}q&t  js?t||dS | ||\}}	|S )NrC   rD   )rd   rC   rD   )rC   rD   )r	   is_first_rankrc   	enumerater]   is_last_rankr   r^   )
r>   ra   rd   re   rf   rC   rD   ilayer_rA   rA   rB   rH      s$   
zMamba2Model.forwardweightsc                 C   s   t |  }t }|D ]2\}}d|v r|dd}|dr#||vr#qt|| r)q|| }t|dt}||| || q|S )NA_logAz.biasweight_loader)	dictnamed_parameterssetreplaceendswithr   r8   r   add)r>   rm   params_dictloaded_paramsnameloaded_weightparamrp   rA   rA   rB   load_weights   s   

zMamba2Model.load_weightsNN)rI   rJ   rK   r   rL   r5   rM   rN   rc   r   rH   r   tuplers   r|   rO   rA   rA   r?   rB   rP   `   s"    %
, rP   c                
       s0  e Zd Zedddeejejf fddZedddeeeef eeeef f fddZ	edee
e
f fdd	Zd
ddedef fddZdejdejfddZ		d#dejdB dejdedB dejdB fddZdd ZdefddZdejdejfddZd eeeejf  dee fd!d"Z  ZS )$Mamba2ForCausalLMrR   r   r&   c                 C   s   t |jj|jj|jjS rE   )r   mamba2_state_dtyper"   dtyper#   mamba_cache_dtypemamba_ssm_cache_dtype)clsrR   rA   rA   rB   !get_mamba_state_dtype_from_config   s
   z3Mamba2ForCausalLM.get_mamba_state_dtype_from_configc              	   C   s>   |j }|jj}|j|j }tj||j|j|j	|j
|j|jdS )a3  Calculate shapes for Mamba's convolutional and state caches.

        Args:
            vllm_config: vLLM config

        Returns:
            Tuple containing:
            - conv_state_shape: Shape for convolutional state cache
            - temporal_state_shape: Shape for state space model cache
        )r'   tp_world_sizer-   r.   r/   r6   r7   )parallel_configr"   rU   r9   r(   r   mamba2_state_shapetensor_parallel_sizer-   r.   r/   r6   r7   )r   rR   r   rU   r'   rA   rA   rB   !get_mamba_state_shape_from_config   s   z3Mamba2ForCausalLM.get_mamba_state_shape_from_configc                 C   s   t  S rE   )r   mamba2_state_copy_func)r   rA   rA   rB   get_mamba_state_copy_func   s   z+Mamba2ForCausalLM.get_mamba_state_copy_funcr    rQ   r%   c                   s   |j j}|j}t   || _|| _|| _|j | _ t|t|dd| _	t
|j|jt|dd| _|jr<| j| j	j| _t|j| _| j	j| _d S )Nbackbone)rR   r%   lm_headrQ   )r"   rU   scheduler_configr4   r5   r!   rR   rP   r   r   r   rX   r(   r   tie_word_embeddingstie_weightsrY   r   logits_processorr_   )r>   rR   r%   r!   r   r?   rA   rB   r5      s(   

zMamba2ForCausalLM.__init__ra   c                 C      | j |S rE   )r   rc   rb   rA   rA   rB   rc   	     z!Mamba2ForCausalLM.embed_input_idsNrd   re   rf   c                 K   s   |  ||||}|S rE   )r   )r>   ra   rd   re   rf   rF   rC   rA   rA   rB   rH     s   zMamba2ForCausalLM.forwardc                 K   s   | j j|fi |S rE   )mamba_cachecopy_inputs_before_cuda_graphs)r>   input_buffersrF   rA   rA   rB   r     s   z0Mamba2ForCausalLM.copy_inputs_before_cuda_graphs
batch_sizec                 C   r   rE   )r   "get_seqlen_agnostic_capture_inputs)r>   r   rA   rA   rB   r     r   z4Mamba2ForCausalLM.get_seqlen_agnostic_capture_inputsrC   c                 C   s   |  | j|}|S rE   )r   r   )r>   rC   logitsrA   rA   rB   compute_logits   s   z Mamba2ForCausalLM.compute_logitsrm   c                 C   s   t | }||S rE   )r   r|   )r>   rm   loaderrA   rA   rB   r|   $  s   
zMamba2ForCausalLM.load_weightsr}   )rI   rJ   rK   classmethodr~   rM   r   r   intr   r   r   r   rL   r5   rN   rc   r   rH   r   r   r   r   rs   r|   rO   rA   rA   r?   rB   r      s@    

,r   )4__doc__collections.abcr   rM   r   transformersr   vllm.compilation.decoratorsr   vllm.configr   r   r   vllm.distributed.parallel_stater	   $vllm.model_executor.layers.layernormr
   +vllm.model_executor.layers.logits_processorr   -vllm.model_executor.layers.mamba.mamba_mixer2r   ,vllm.model_executor.layers.mamba.mamba_utilsr   r   r   r   'vllm.model_executor.layers.quantizationr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   %vllm.model_executor.models.interfacesr   r   r   vllm.sequencer   utilsr   r   r   r   r   r~   rN   KVCacheModuler   rP   r   rA   rA   rA   rB   <module>   s2   1
]
