o
    ̳iP                     @   sx  d dl Z d dlZd dlmZmZmZmZ d dlZd dlZd dl	m
Z
 d dlmZ d dlmZ d dlmZmZ d dlmZ d dlmZ d d	lmZmZmZ d d
lmZmZmZmZ d dlm Z m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z) d dl*m+Z+ d dl,m-Z- d dl.m/Z/ G dd deZ0G dd deZ1G dd de-Z2ej3deddfddZ4e5dkre 6e4  dS dS )    N)DictListTupleUnion)evaluate)HFMultimodalLM)HFLM)get_task_dictTaskManager)
make_table)
DictConfig)configtrainingutils)format_content_with_imagesleft_pad_sequenceMessage$padded_collate_tiled_images_and_mask)generatesample)TransformerDecoder)local_kv_cache)DeepFusionModel)	Transform)ModelTokenizer)EvalRecipeInterface)FullModelTorchTuneCheckpointerc                   @   sP  e Zd ZdZddejddddded	ed
ejde	de	dej
dedede	fddZedd Zedd Zedd Zedd Zedd Zedd Zedd  Zed!d" Zed#d$ Zed%d& Zed'd( Zd)ee	 fd*d+Zd9d)efd,d-Z	.d:d/ee d0eeejj  d1e	fd2d3Ze d4e eej!f d5e	d6ee fd7d8Z"d.S );_VLMEvalWrappera  An EvalWrapper for EleutherAI's eval harness based on gpt-fast's
    EvalWrapper: https://github.com/pytorch-labs/gpt-fast/blob/main/eval.py.

    Note:
        This is ONLY for vision-language models.

    Args:
        model (DeepFusionModel): The VLM to evaluate.
        transform (Transform): The transform (tokenizer) to use for preprocessing.
        device (torch.device): The device to use.
        max_seq_length (int): The maximum sequence length.
        batch_size (int): The batch size.
        dtype (torch.dtype): dtype for the model caches during generation.
        enable_kv_cache (bool): Whether to enable KV cache for generation.
        image_tag (str): The string to use for the image token. Default is "<image>", which
            is the default used by the MMMU dataset.
        max_images_per_sample (int): The maximum number of images per sample. Defaults to
            the max number of images in MMMU.
          Tz<image>   )max_seq_length
batch_sizedtypeenable_kv_cache	image_tagmax_images_per_samplemodel	transformdevicer!   r"   r#   r$   r%   r&   c          
      C   s:   || _ || _|| _|| _|| _|| _d| _|| _|	| _d S NT)	_model
_transform_device_max_seq_length_batch_size_dtype_enable_kv_cache
_image_tag_max_images_per_sample)
selfr'   r(   r)   r!   r"   r#   r$   r%   r&    r5   I/home/ubuntu/.local/lib/python3.10/site-packages/recipes/eleuther_eval.py__init__=   s   
z_VLMEvalWrapper.__init__c                 C   s   | j | j_| jS N)r0   r+   r#   r4   r5   r5   r6   r'   W   s   
z_VLMEvalWrapper.modelc                 C      | j S r8   )r,   r9   r5   r5   r6   model_transform^      z_VLMEvalWrapper.model_transformc                 C   r:   r8   r-   r9   r5   r5   r6   r)   b   r<   z_VLMEvalWrapper.devicec                 C   s   G dd d}| S )Nc                   @   s   e Zd Zdd ZdS )z2_VLMEvalWrapper.cache_hook.<locals>.DummyCacheHookc                 S   s   dd | _ d S )Nc                 S      dS r*   r5   )xyzr5   r5   r6   <lambda>k   s    zM_VLMEvalWrapper.cache_hook.<locals>.DummyCacheHook.__init__.<locals>.<lambda>)add_partialr9   r5   r5   r6   r7   j   s   z;_VLMEvalWrapper.cache_hook.<locals>.DummyCacheHook.__init__N)__name__
__module____qualname__r7   r5   r5   r5   r6   DummyCacheHooki   s    rG   r5   )r4   rG   r5   r5   r6   
cache_hookf   s   z_VLMEvalWrapper.cache_hookc                 C   r>   )Nr   r5   r9   r5   r5   r6   ranko      z_VLMEvalWrapper.rankc                 C   r>   )N   r5   r9   r5   r5   r6   
world_sizet   rJ   z_VLMEvalWrapper.world_sizec                 C   r:   r8   r/   r9   r5   r5   r6   r"   y   r<   z_VLMEvalWrapper.batch_sizec                 C   
   | j jjS r8   )r,   	tokenizereos_idr9   r5   r5   r6   eos_token_id}      
z_VLMEvalWrapper.eos_token_idc                 C   rN   r8   )r,   rO   eot_idr9   r5   r5   r6   eot_token_id   rR   z_VLMEvalWrapper.eot_token_idc                 C   r:   r8   r.   r9   r5   r5   r6   
max_length   r<   z_VLMEvalWrapper.max_lengthc                 C   r>   r*   r5   r9   r5   r5   r6   
truncation      z_VLMEvalWrapper.truncationreturnc                 K   s   | j jj|dddS )NF)add_bosadd_eos)r,   rO   encode)r4   stringkwargsr5   r5   r6   
tok_encode   s   z_VLMEvalWrapper.tok_encodec                 C   s"   t |tr|g}| jjj||dS )N)skip_special_tokens)
isinstanceintr,   rO   decode)r4   tokensr`   r5   r5   r6   
tok_decode   s
   
z_VLMEvalWrapper.tok_decodeN	all_texts
all_imagesleft_truncate_lenc                 O   s   g }t ||D ]D\}}g }	|D ]}
|
jdkr|
d}
|	|
 qg }t|| j|	d}|td|d |tddd | jd|idd	}|| qt|d
| j	| j
jd}t|| j |d|d< |d urx|d d d | d f |d< |S )NRGB)r%   imagesuser)rolecontent	assistant messagesT)	inferenceleft)pad_directionpad_max_imagespad_max_tilesrd   	input_ids)zipmodeconvertappendr   r2   r   r;   r   r3   r,   max_num_tilesr   batch_to_devicer)   pop)r4   rf   rg   rh   argsr^   all_encoded_messagestextrj   proper_imagesimagerp   rm   	tok_batchr5   r5   r6   tok_batch_multimodal_encode   s4   


z+_VLMEvalWrapper.tok_batch_multimodal_encodebatchrV   stopc              
   K   s  | d}|j\}}|dd}|dd}	|	s|dkr td|dkr,td| d	| jj| j }
| j t	
t	j| j| jft	jd
}t	| j}W d    n1 sVw   Y  |d d |f |d< |d d |f |d< t| j| j| j| j|
| jdr g }| j|fi |d d df }t|dd d}||  |d d d dd f }t|D ]:}| | jjv r n/| j||d |d d d f d ||d |f dd d df }t|dd d}||  |d7 }qW d    n1 sw   Y  t	j|t	jddS )Nrv   temperature        	do_sampleF9Any decoding strategy other than greedy is not supported.rK   zGot a batch size of 'zA'. Batch size > 1 is not yet supported for multimodal generation.)sizer#   	input_posmask)r"   r)   r#   encoder_max_seq_lendecoder_max_seq_len)r   top_kencoder_mask)r   encoder_inputr   r   )r#   r   )r}   shapegetRuntimeError
ValueErrorr;   image_seq_lenr3   r)   torchtrilonesrV   boolaranger   r'   r"   r0   r   rz   itemrangestop_tokenstensorint32	unsqueeze)r4   r   rV   r   generation_kwargspromptbszseq_lenr   r   r   causal_maskr   generated_tokenslogitstoken
cache_mask_r5   r5   r6   _model_multimodal_generate   sp   
	


		


 z*_VLMEvalWrapper._model_multimodal_generate)Tr8   )#rD   rE   rF   __doc__r   bfloat16r   r   r)   rb   r#   r   strr7   propertyr'   r;   rH   rI   rL   r"   rQ   rT   rV   rW   r   r_   re   PILImager   inference_moder   Tensorr   r5   r5   r5   r6   r   (   s    	












3r   c                       s6  e Zd ZdZddejdddededejd	e	d
e	dej
def fddZedd Zedd Zedd Zedd Zedd Zedd Zedd Zdedee	 fdd Z	!d.dee d"e	deejejf fd#d$Zd%eee	 e	f defd&d'Zd(ejdejfd)d*Ze d+ejdejfd,d-Z  ZS )/_LLMEvalWrappera  An EvalWrapper for EleutherAI's eval harness based on gpt-fast's
    EvalWrapper: https://github.com/pytorch-labs/gpt-fast/blob/main/eval.py.

    Note:
        This is for text-only decoder models.

    Args:
        model (TransformerDecoder): The model to evaluate.
        tokenizer (ModelTokenizer): Tokenizer associated with the model being evaluated.
            This should be the same tokenizer used when fine-tuning the model.
        device (torch.device): The device to use.
        max_seq_length (int): The maximum sequence length to use.
        batch_size (int): The batch size per GPU to use.
        dtype (torch.dtype): dtype for the model caches during generation.
        enable_kv_cache (bool): Whether to enable KV cache for generation.
    r   r   T)r!   r"   r#   r$   r'   rO   r)   r!   r"   r#   r$   c                   s<   t  jdt|d || _|| _|| _|| _|| _|| _d S )Ngpt2)
pretrainedr)   )	superr7   r   r+   
_tokenizerr.   r/   r0   r1   )r4   r'   rO   r)   r!   r"   r#   r$   	__class__r5   r6   r7   +  s   
z_LLMEvalWrapper.__init__c                 C   r:   r8   r+   r9   r5   r5   r6   r'   ?  r<   z_LLMEvalWrapper.modelc                 C   s   | j jS r8   )r   rP   r9   r5   r5   r6   rT   C  s   z_LLMEvalWrapper.eot_token_idc                 C   r:   r8   rU   r9   r5   r5   r6   rV   G  r<   z_LLMEvalWrapper.max_lengthc                 C   r>   )N   r5   r9   r5   r5   r6   max_gen_toksK  rX   z_LLMEvalWrapper.max_gen_toksc                 C   r:   r8   rM   r9   r5   r5   r6   r"   O  r<   z_LLMEvalWrapper.batch_sizec                 C   r:   r8   r=   r9   r5   r5   r6   r)   S  r<   z_LLMEvalWrapper.devicec                 C   r:   r8   )r1   r9   r5   r5   r6   r$   W  r<   z_LLMEvalWrapper.enable_kv_cacher   rY   c                 K   s   | j j|dddS )NF)r   rZ   r[   )r   r\   )r4   r   r^   r5   r5   r6   r_   [  s   z_LLMEvalWrapper.tok_encodeNrh   c                    sZ    fdd|D }t dd |D d jjd}|d ur&|d d | d f }|t|fS )Nc                    s   g | ]}  |qS r5   )r_   .0r?   r9   r5   r6   
<listcomp>f      z4_LLMEvalWrapper.tok_batch_encode.<locals>.<listcomp>c                 S   s   g | ]}t |qS r5   )r   r   r   r5   r5   r6   r   j  r   T)batch_firstpadding_value)r   r   pad_idr   	ones_like)r4   r   rh   r^   tokenized_textr?   r5   r9   r6   tok_batch_encodec  s   z _LLMEvalWrapper.tok_batch_encoderd   c                 K   s   t |tr|g}| j|S r8   )ra   rb   r   rc   )r4   rd   r^   r5   r5   r6   re   v  s   
z_LLMEvalWrapper.tok_decodeinpsc                 K   s
   |  |S r8   r   )r4   r   r^   r5   r5   r6   _model_call{  s   
z_LLMEvalWrapper._model_callcontextc           
   
   K   s   |j \}}|dd}|dd}|s|dkrtdtjjj|ddd| j| f| jj	d}t
| j| j| j| j| jd t| j|| j|d | jj| jjd	\}}	W d    n1 sZw   Y  |d | S )
Nr   r   r   Fr   r   )value)r"   r)   r#   r   )max_generated_tokensr   r   r   r   )r   r   r   r   nn
functionalpadr/   r   rP   r   r'   r"   r)   r0   rV   r   r   r   r   )
r4   r   r   r   r   r   r   maybe_padded_contexttoksr   r5   r5   r6   _model_generate~  s<   
z_LLMEvalWrapper._model_generater8   ) rD   rE   rF   r   r   float32r   r   r)   rb   r#   r   r7   r   r'   rT   rV   r   r"   r$   r   r   r_   r   r   r   r   re   r   r   r   __classcell__r5   r5   r   r6   r     sh    	






	
r   c                   @   s>   e Zd ZdZdeddfddZdeddfddZdd	d
ZdS )EleutherEvalRecipea|  
    This recipe runs evaluation on a trained model using EleutherAI's eval harness.
    This assumes the user has the EleutherAI eval harness installed. See
    https://github.com/EleutherAI/lm-evaluation-harness for more details.

    Features:
        - Single GPU evaluation. Multi-GPU evaluation is currently not supported.
        - Quantization (for text-only models) is supported.
        - Any task from the EleutherAI eval harness

    We recommend launching evaluation using the tune CLI::

        tune run eleuther_eval --config eleuther_evaluation             tasks=["truthfulqa_mc2","hellaswag"]             limit=50     cfgrY   Nc                 C   s   ddl m} |ddk rtdtj|jd| _tj|j| jd| _t	|
dd	| _tj|j|
d
d d |j| _t|j| _|j| _|
dd| _|
dd | _d S )Nr   )versionzlm-evalz0.4.5zoThis recipe requires EleutherAI Eval Harness v0.4.5 or higher. Please install with `pip install lm-eval>=0.4.5`)r)   )r#   r)   	log_levelinfocudnn_deterministic_mode)seed
debug_moder$   Tinclude_path)importlib.metadatar   r   r   
get_devicer)   r   	get_dtyper#   
get_loggerr   loggerset_seedr   limitlisttasksr"   r$   r   )r4   r   r   r5   r5   r6   r7     s    zEleutherEvalRecipe.__init__c              	   C   s  t |j}t|}t |j}t| j! | j t |j	}W d    n1 s,w   Y  W d    n1 s;w   Y  |d urt
|tsMtdd|v rUtd||}|j| j| jd}|jddtj }| D ]\}}|| j||< qp|j|dd n| tj }|| | jd	| j d
 |  t |j}	t
|trt}
| js| jd nt
|trt}
|
||	| j|j| j| j| jd| _d S )NzQuantization is only supported for models quantized and saved with the FullModelTorchTuneCheckpointer - please ensure you have quantized your model and are using the quantized weights!qata  You have specified a quantizer with 'QAT' - QAT quantizers should only be used during quantization aware training and when quantizing models. Please use the corresponding post-training quantizer e.g. Int8DynActInt4WeightQuantizer for Int8DynActInt4WeightQATQuantizer.)r)   r#   F)weights_onlyT)assignz$Model is initialized with precision .zReceived enable_kv_cache=False, but KV cache is required for running multimodal generation in a timely manner. Setting enable_kv_cache=True.)r)   r!   r"   r#   r$   ) r   instantiate	quantizerr   get_quantizer_modecheckpointerset_default_dtyper#   r)   r'   ra   r   r   quantizetoload_checkpoint	MODEL_KEYitemsload_state_dictr   r   evalrO   r   r   r$   debugr   r   r!   r"   eleuther_model_wrapper)r4   r   r   quantization_moder   r'   	ckpt_dictkvr;   r   r5   r5   r6   setup  s`   
 





zEleutherEvalRecipe.setupc                 C   s   t | jd}t| j|}t }| jd| j  t| j|| j	d}t | }| jd|dd | j
jdkrMt }| jd| d	 dd
 t|}| jd| d d S )N)r   z+Running evaluation on the following tasks: )r   zEval completed in z.02fz	 seconds.cpuzMax memory allocated: g    eAz GBz


)r
   r   r	   r   timer   r   r   r   r   r)   typer   get_torch_device_namespacemax_memory_allocatedr   )r4   task_manager	task_dictt0outputt1torch_deviceformatted_outputr5   r5   r6   r     s$   zEleutherEvalRecipe.evaluate)rY   N)rD   rE   rF   r   r   r7   r   r   r5   r5   r5   r6   r     s
    Fr   r   rY   c                 C   s0   t jd| d t| d}|j| d |  dS )zEntry point for the recipe.r   )recipe_namer   )r   N)r   
log_configr   r   r   )r   reciper5   r5   r6   recipe_main3  s   
r  __main__)7sysr  typingr   r   r   r   r   r   lm_eval.evaluatorr   lm_eval.models.hf_vlmsr   lm_eval.models.huggingfacer   lm_eval.tasksr	   r
   lm_eval.utilsr   	omegaconfr   	torchtuner   r   r   torchtune.datar   r   r   r   torchtune.generationr   r   torchtune.modulesr   torchtune.modules.common_utilsr   torchtune.modules.model_fusionr   torchtune.modules.transformsr   'torchtune.modules.transforms.tokenizersr   torchtune.recipe_interfacesr   torchtune.trainingr   r   r   r   parser  rD   exitr5   r5   r5   r6   <module>   s@    r  