o
    5tie                     @  sz   d Z ddlmZ ddlZddlmZmZ ddlZddlm	Z	 ddl
mZ er*ddlZeeZe	dG dd	 d	eZdS )
a  
Mistral3 model adapter for lm-evaluation-harness.

This adapter enables evaluation of Ministral-3 models (3B, 8B, 14B) which use
Mistral3ForConditionalGeneration instead of AutoModelForCausalLM.

Usage:
    lm_eval --model hf-mistral3         --model_args pretrained=mistralai/Ministral-3-3B-Instruct-2512-BF16,dtype=bfloat16         --tasks hellaswag         --device cuda:0         --batch_size 8
    )annotationsN)TYPE_CHECKINGLiteral)register_model)HFLMzhf-mistral3c                      sR   e Zd ZdZdZ fddZ		ddddZ		ddddZedddZ	  Z
S ) 
Mistral3LMz
    Model adapter for Mistral3 models (Ministral-3 family).

    These models use Mistral3ForConditionalGeneration which is a vision-language
    model class, but can be used for text-only evaluation by ignoring the vision
    encoder.
    Nc                   sF   zddl m} || _W n ty   tdd w t jdi | d S )Nr   ) Mistral3ForConditionalGenerationzMistral3ForConditionalGeneration not found in transformers. Please install transformers >= 5.0.0 or from main: pip install git+https://github.com/huggingface/transformers )transformersr   AUTO_MODEL_CLASSImportErrorsuper__init__)selfkwargsr   	__class__r	   K/home/ubuntu/.local/lib/python3.10/site-packages/lm_eval/models/mistral3.pyr   ,   s   
zMistral3LM.__init__defaultFconfig7transformers.PretrainedConfig | transformers.AutoConfigbackend'Literal['default', 'causal', 'seq2seq']trust_remote_codebool | NonereturnNonec                 C  s   d| _ td dS )z
        Override to force causal backend for Mistral3 models.

        Mistral3 models are decoder-only despite using a conditional generation class.
        causalz)Using backend 'causal' for Mistral3 modelN)r   eval_loggerinfo)r   r   r   r   r	   r	   r   _get_backend;   s   zMistral3LM._get_backendinpstorch.Tensor	attn_masktorch.Tensor | Nonelabelsc              	   C  s   t  4 t j| jj| j| jdud | |jW  d   W  d   S 1 s+w   Y  W d   dS 1 s;w   Y  dS )a  
        Override to handle Mistral3 model output format.

        Mistral3ForConditionalGeneration returns logits in the same format as
        causal LMs, so we call the model directly but bypass the base class
        assertion that checks for AutoModelForCausalLM.
        N)device_typedtypeenabled)torchno_gradautocastdevicetypemixed_precision_dtypemodellogits)r   r!   r#   r%   r	   r	   r   _model_callJ   s   
	RzMistral3LM._model_callintc                 C  s   | j r| j S d}t| jjdr%| jjj}|D ]}t||r$t||  S q|D ]}t| jj|r9t| jj|  S q't| jdrJ| jjdk rJ| jjS | jS )z.Get the maximum sequence length for the model.)max_position_embeddingsn_positionsn_ctxtext_configmodel_max_lengthi ʚ;)	_max_lengthhasattrr/   r   r6   getattr	tokenizerr7   _DEFAULT_MAX_LENGTH)r   seqlen_config_attrsr6   attrr	   r	   r   
max_lengthb   s$   


zMistral3LM.max_length)r   F)r   r   r   r   r   r   r   r   )NN)r!   r"   r#   r$   r%   r$   r   r"   )r   r2   )__name__
__module____qualname____doc__r   r   r    r1   propertyr?   __classcell__r	   r	   r   r   r       s    r   )rC   
__future__r   loggingtypingr   r   r)   lm_eval.api.registryr   lm_eval.models.huggingfacer   r
   	getLoggerr@   r   r   r	   r	   r	   r   <module>   s    
