o
    5ti]                     @   sL   d dl Z d dlZd dlZd dlmZ d dlmZ edG dd deZdS )    N)register_model)HFLM	mamba_ssmc                       sr   e Zd Z		ddeddf fddZdeddf fd	d
Z	ddedeejB dB ddf fddZ	dd Z
  ZS )MambaLMWrapperstate-spaces/mamba-130mFis_hfreturnNc              	      s`   d|v r|d dksJ |p| d| _t jd
||dd|dd|ddd| d	S )a8  
        Mamba (via the `mamba_ssm` package) supports the following args:
        ```
        d_model: int,
        n_layer: int,
        vocab_size: int,
        initializer_cfg=None,
        pad_vocab_size_multiple: int = 1,
        ssm_cfg=None,
        norm_epsilon: float = 1e-5,
        rms_norm: bool = False,
        initializer_cfg=None,
        fused_add_norm=False,
        residual_in_fp32=False,
        ```

        See https://github.com/state-spaces/mamba/blob/main/mamba_ssm/models/mixer_seq_simple.py#L175 for more info.
        The above can all be passed via `--model_args` or to this __init__() directly
        but we recommend placing many of these within the config.json file uploaded alongside your
        Mamba model to the HF Hub instead.
        All other HuggingFace from_pretrained() kwargs
        such as those related to
        `parallelize=True`, PEFT, autoGPTQ,
        or any sub-configurations of these advanced args,
        are unsupported by the `mamba_ssm` package.

        The HFLM arguments

        `backend`, `tokenizer`, `truncation`, `max_length`,
        `device`, `dtype`, `batch_size`, `max_batch_size`, `trust_remote_code`, `use_fast_tokenizer`

        Are all supported by Mamba where they do not conflict
        with Mamba-specific restrictions such as causal LMs only.
        backendcausalhf	tokenizerzEleutherAI/gpt-neox-20b
max_lengthi   )
pretrainedr	   r   r   N )endswithr   super__init__pop)selfr   r   kwargs	__class__r   K/home/ubuntu/.local/lib/python3.10/site-packages/lm_eval/models/mamba_lm.pyr      s   *



zMambaLMWrapper.__init__r   c              
      sd   | j rt j|fi | d S zddlm} W n ty* } zt|d|d }~ww ||| _d S )Nr   )load_config_hfattempted to use 'mamba_ssm' LM type, but package `mamba_ssm` is not installed.     please install mamba via `pip install lm-eval[mamba]` or `pip install -e .[mamba]`)r   r   _get_configmamba_ssm.utils.hfr   ModuleNotFoundErrortype_config)r   r   r   r   	exceptionr   r   r   r   B   s   zMambaLMWrapper._get_configfloat16dtypec              
      s   | j rt j|fd|i| d S zddlm} W n ty, } zt|d|d }~ww |j|| j|dkr9t	j
ntjj|d| _d S )Nr"   r   )MambaLMHeadModelr   auto)devicer"   )r   r   _create_model!mamba_ssm.models.mixer_seq_simpler#   r   r   from_pretrained_devicetorchr!   lm_evalmodelsutils_hf	get_dtype_model)r   r   r"   r   r#   r    r   r   r   r&   T   s&   	zMambaLMWrapper._create_modelc           	      K   s   | j rdgnddg}|D ]}||v r|| q| j s'| jjd||d|S tjj| j||j	d |j	d }|
dd|d< |
d}|
ddkrV|d u rVd |d< }|du rf|
ddkrf|d | jjd|||| jjd	d
|S )Nattention_mask	do_sample)	input_idsr      r   temperatureg        FT)r2   r   stopping_criteriapad_token_id	use_cacher   )r   r   modelgenerater+   r,   r-   stop_sequences_criteriar   shapegetr6   )	r   contextr   stopgeneration_kwargs
remove_argkeyr5   r1   r   r   r   _model_generater   sD   
	

zMambaLMWrapper._model_generate)r   F)r!   )__name__
__module____qualname__boolr   strr   r*   r"   r&   rB   __classcell__r   r   r   r   r   	   s,    7r   )	r*   lm_eval.models.utilsr+   lm_eval.models.utils_hflm_eval.api.registryr   lm_eval.models.huggingfacer   r   r   r   r   r   <module>   s    