o
    5ti/                     @   s   d dl Z d dlmZmZ d dlZd dlZd dlmZ d dlmZ d dl	m
Z
mZmZ d dlmZ d dlmZ d dlmZ d d	lmZmZmZmZ d d
lmZ er\d dl	mZ d dlmZ e eZdZedG dd deZdS )    N)TYPE_CHECKINGAny)
distribute)tqdm)LLMSamplingParams
TextPrompt)LoRARequest)Instance)register_model)Collatorreplace_placeholdersresize_imageundistribute)VLLM)RequestOutputz<image>zvllm-vlmc                       s@  e Zd ZdZ							d%dededB dedB ded	ed
edB dedB dedB f fddZ		d&dee dedB dedee	 fddZ
				d'dee	 dB dededB dee dB ded f
ddZ	d(deeeef  defddZ	d)ded d edee f fd!d"Z	d)ded d edee f fd#d$Z  ZS )*VLLM_VLMTFN  
pretrainedtrust_remote_coderevision
interleave
max_imagesimage_widthimage_heightimage_max_sidec	           
         s   || _ || _|| _| jr| j s| jrtd|dkr(d|i|	d< td|  t jd	|||d|	 || _|| _	t
jj|||d| _d| _d S )
NzlAmbiguous config for image resize: you can not specify both image_max_side and (image_width or image_height)r   imagelimit_mm_per_promptz&Setting limit_mm_per_prompt[image] to )r   r   r   )r   r   F )r   r   r   
ValueErroreval_loggerinfosuper__init__r   r   transformersAutoProcessorfrom_pretrained	processorchat_applied)
selfr   r   r   r   r   r   r   r   kwargs	__class__r   L/home/ubuntu/.local/lib/python3.10/site-packages/lm_eval/models/vllm_vlms.pyr#   $   s2   
zVLLM_VLM.__init__stringsleft_truncate_len
truncationreturnc           	         sh    fdd|D } j du r fdd|D }g }t||ddD ]\}}t|d|id}|| q |S )	Nc                    s   g | ]	}|d  j  qS N)r   .0imgr)   r   r-   
<listcomp>S   s    z8VLLM_VLM.tok_batch_multimodal_encode.<locals>.<listcomp>Fc                    s   g | ]
}t |tt jqS r   )r   DEFAULT_IMAGE_PLACEHOLDERr   )r4   stringr6   r   r-   r7   V   s    Tstrictr   )promptmulti_modal_data)r(   zipr   append)	r)   r.   imagesr/   r0   outputsxi_inputr   r6   r-   tok_batch_multimodal_encodeL   s   


z$VLLM_VLM.tok_batch_multimodal_encoderequestsgenerate
max_tokensstopr   c           
         s   |d u rg S |rt d||d|nt dddddjdkr]tjdtdtt dtd	 fd
d dd tj|D }fdd|D } fdd|D }t|}t	  t
|S jd urrjj|jdkjd}	|	S jj|jdkd}	|	S )N)rH   rI   r      F)temperatureprompt_logprobsrH   
detokenize
model_argsrF   r1   r   c                 S   s   t di | }|j||dS )N)sampling_paramsr   )r   rG   )rN   rO   rF   llmr   r   r-   run_inference_one_modelz   s   zDVLLM_VLM._multimodal_model_generate.<locals>.run_inference_one_modelc                 S   s   g | ]}t |qS r   )listr4   rB   r   r   r-   r7      s    z7VLLM_VLM._multimodal_model_generate.<locals>.<listcomp>c                 3   s    | ]	}j  |fV  qd S r2   )rN   )r4   req)rO   r)   r   r-   	<genexpr>   s    z6VLLM_VLM._multimodal_model_generate.<locals>.<genexpr>c                    s   g | ]} j | qS r   )remoterS   )rQ   r   r-   r7      s    auto)rO   use_tqdmlora_request)rO   rX   r   )r   data_parallel_sizerayrV   dictrR   r   r   getshutdownr   rY   modelrG   
batch_size)
r)   rF   rG   rH   rI   r*   inputsobject_refsresultsrA   r   )rQ   rO   r)   r-   _multimodal_model_generatef   sL   



z#VLLM_VLM._multimodal_model_generatechat_historyc                 C   s<  d| _ | js<|D ]2}g }|d }t| j|t}|td}t|D ]
}|dd d q#|d|d ||d< qnX|D ]U}g }|d }t| j|t}d}	|	t}
t
|
D ]&\}}|rh|d|d |t|
d	 k r|| jk r|d
di |	d	7 }	qZ||d< |	|krtd| d|	 q>| jj||| dS )NTcontent r   )typer   text)rh   ri   r   rJ   rh   z/Mismatch in image placeholder count. Expected: z
, Actual: )add_generation_promptcontinue_final_message)r(   r   minr   countr8   replaceranger?   split	enumeratelenr   r'   apply_chat_template)r)   re   rj   rf   cri   image_count_expected_image_countactual_image_count
text_partsrC   partr   r   r-   rs      sP   

zVLLM_VLM.apply_chat_templater
   disable_tqdmc              
      s  |rt |d jdk rt j||dS g } fdd}tt ||p% jdkdd}tdd	 |D |d
dd d}|j jdkrDt	 jndd d} j
 j}|D ]}	t|	ddi\}
}} fdd	|D }t|
tspt|
}
|d }t|tsJ dt|  j|| jd\}}} j| } j|
||d} j|f|d|d|}t||
ddD ]#\}}|jd j}||  jd||||dB f| |d qqR||}|  |S )Nr      )rF   r{   c                    s      | d }t| | d fS )Nr   )
tok_encoderr   )rB   toksr6   r   r-   _collate   s   z)VLLM_VLM.generate_until.<locals>._collatez5Running generate_until requests with text+image input)totaldisabledescc                 S   s   g | ]}|j qS r   )args)r4   regr   r   r-   r7      s    z+VLLM_VLM.generate_until.<locals>.<listcomp>
gen_kwargsc                 S   s   | d S )NrJ   r   )rB   r   r   r-   <lambda>   s    z)VLLM_VLM.generate_until.<locals>.<lambda>)group_bygroup_fnrW   )nbatch_fnr;   Tc                    s"   g | ]} fd d|d D qS )c                    s    g | ]}t | j j jqS r   )r   r   r   r   r3   r6   r   r-   r7      s    z6VLLM_VLM.generate_until.<locals>.<listcomp>.<listcomp>visualr   )r4   argr6   r   r-   r7      s    
z3Expected `gen_kwargs` to be of type `dict` but got )eosdefault_max_gen_toks)r/   )rI   rG   rH   r:   generate_until)untilmax_gen_toksrJ   )rr   r   r"   r   r   rankr   get_batchedr`   int	tokenizerdecodeeot_token_idr>   
isinstancerR   r\   rh   modify_gen_kwargsr   
max_lengthrE   rd   rA   ri   r?   
cache_hookadd_partialupdateget_originalclose)r)   rF   r{   resr   pbarre_ordschunksr   chunkcontextsall_gen_kwargsaux_argumentsvisualsr   r*   r   r   max_ctx_lenra   contoutputcontextgenerated_textr+   r6   r-   r      sz   
	






zVLLM_VLM.generate_untilc                    s.   |rt |d jdk rt j|dS tdd)Nr   r|   )rF   z}model type `vllm-vlm` does not support loglikelihood_rolling. Use 'vlm' model type for text-only loglikelihood_rolling tasks zZthis is because we do not support measuring the loglikelihood a model assigns to an image.)rr   r   r"   loglikelihood_rollingNotImplementedError)r)   rF   r{   r+   r   r-   r   5  s   zVLLM_VLM.loglikelihood_rolling)FNTr   NNN)NF)NFNN)T)F)__name__
__module____qualname__
MULTIMODALstrboolr   r#   rR   r   rE   rd   r\   r   rs   r   floatr   __classcell__r   r   r+   r-   r       s    	
,



8
:`r   ) loggingtypingr   r   r[   r$   more_itertoolsr   r   vllmr   r   r   vllm.lora.requestr	   lm_eval.api.instancer
   lm_eval.api.registryr   lm_eval.models.utilsr   r   r   r   lm_eval.models.vllm_causallmsr   r   	getLoggerr   r    r8   r   r   r   r   r-   <module>   s&    
