o
    پi>                     @  s   d Z ddlmZ ddlZddlZddlZddlmZmZm	Z	 ddl
mZmZ ddlmZ ddlmZ ddlmZmZmZmZmZmZ G d	d
 d
eZd!ddZd"ddZd#ddZd$ddZd%ddZd&dd ZdS )'zF
MMMU evaluation for VLMs using the run_eval simple-evals interface.

    )annotationsN)ListOptionalTuple)concatenate_datasetsload_dataset)Image)simple_eval_common)
HTML_JINJAEval
EvalResultSamplerBaseSingleEvalResultmap_with_progressc                   @  s   e Zd Zg dg dg dg dg dg ddZ				
	d/d0ddZed1ddZed2ddZd3d!d"Zed4d%d&Z	ed5d(d)Z
d6d-d.ZdS )7MMMUVLMEval)Art
Art_TheoryDesignMusic)
Accounting	EconomicsFinanceManage	Marketing)Biology	Chemistry	GeographyMathPhysics)Basic_Medical_ScienceClinical_Medicine#Diagnostics_and_Laboratory_MedicinePharmacyPublic_Health)History
Literature	Sociology
Psychology)AgricultureArchitecture_and_EngineeringComputer_ScienceElectronicsEnergy_and_Power	MaterialsMechanical_Engineering)zArt and DesignBusinessSciencezHealth and MedicinezHumanities and Social SciencezTech and Engineeringd       *   Nnum_examplesOptional[int]num_threadsintseedresponse_answer_regexstrc                 C  s*   || _ || _|| _| | j | _|| _dS )zACreate MMMU VLM eval (Math subset, 100 fixed samples by default).N)r4   r6   r8   _prepare_mmmu_samplessamplesr9   )selfr4   r6   r8   r9    r>   T/home/ubuntu/.local/lib/python3.10/site-packages/sglang/test/simple_eval_mmmu_vlm.py__init__8   s
   
zMMMUVLMEval.__init__imageImage.Imagereturnc                 C  sH   | j dkr
| d} t }| j|dd t| d}d| S )NRGBARGBPNG)formatzutf-8zdata:image/png;base64,)	modeconvertioBytesIOsavebase64	b64encodegetvaluedecode)rA   bufb64r>   r>   r?   _to_data_uriH   s   


zMMMUVLMEval._to_data_urioptions	List[str]Tuple[dict, List[str]]c                 C  sD   i }g }t d}| D ]}t|}|||< || |d7 }q
||fS )NA   )ordchrappend)rT   	index2ansall_choiceschoptletterr>   r>   r?   _build_mc_mappingQ   s   

zMMMUVLMEval._build_mc_mappingk
List[dict]c                   s  g }| j  D ]}|| qg }|D ]$}ztd|dd}|d|gt| }|| W q ty7   Y qw |s>tdt	|  fdd}t
tt |d}|d | }	g }
|	D ]} | }|d }|d	}|d u stt|d
suq\| |}|dd}|d}|d}d}d }d }d }|rz#t|tr|ntt|}t|trt|dkr| |\}}d}W n ty   d }Y nw d| d}|rdd tt|D }t||D ]\}}|| d| d7 }q|d7 }|
|d| d| |||||||d q\|
S )Nz	MMMU/MMMU
validationsplit__subject__zFailed to load MMMU datasetsc                   s&    |  }t |d|d  d|  S )Nidrg   :)r:   get)idxexmergedr>   r?   _keys   s   z/MMMUVLMEval._prepare_mmmu_samples.<locals>._keykeyimage_1rI   question answerrT   openr   multiple-choicez
Question: z

c                 S  s   g | ]
}t td | qS )rW   )rZ   rY   ).0ir>   r>   r?   
<listcomp>   s    z5MMMUVLMEval._prepare_mmmu_samples.<locals>.<listcomp>z) 
z	
Answer: rh   ri   )rh   final_input_prompt
image_dataru   question_typer\   r]   category)DOMAIN_CAT2SUB_CATvaluesextendr   
add_columnlenr[   	ExceptionRuntimeErrorr   sortedrangerj   hasattrrS   
isinstancelistevalra   zip)r=   rb   subjectssubsdatasetssubjdro   orderpicked_indicesr<   rk   rl   subjectrA   data_urirs   ru   raw_optionsr~   r\   r]   rT   prompt_textlettersr`   r_   r>   rm   r?   r;   ]   s   




z!MMMUVLMEval._prepare_mmmu_samplesprompttuple[str, str]c                 C  s>   d| v rd| v r|  dd }|  ddd }||fS | dfS )Split a prompt containing an inline image tag into prefix and suffix.

        If no tag is present, treat the whole prompt as prefix and empty suffix.
        <>r   rX   rt   re   )r   prefixsuffixr>   r>   r?   _split_prompt_for_image   s
   z#MMMUVLMEval._split_prompt_for_imager   c                 C  s^   t | \}}g }|r|d|d |dd|id |r'|d|d d|dg}|S )r   text)typer   	image_urlurl)r   r   user)rolecontent)r   r   r[   )r   r}   r   r   r   prompt_messagesr>   r>   r?   build_chat_messages_from_prompt   s   z+MMMUVLMEval.build_chat_messages_from_promptsamplerr   r   c                   s  d fdd}t |jj}i }i }g }g }g }|D ]A}	|	jr'|	jdnd }
|
d u r/d}
||
dd ||
< |	jrF||
dd ||
< ||	j ||	j |	jd ur]||	j qi }|	 D ]\}
}||
d}|dkrv|| nd	}t
|d
|d||
< qdi }j	 D ]R\}}d	}d}|D ]}
|
|v r|||
 d ||
 d  7 }|||
 d 7 }q|dkr|t
|| d
d|d| < |D ]}
|
|v r||
 d ||
 d d||
< qqtdd | D }|dkrtdd | D | nd	}|t
|d
d|d< t||||dS )Nsampledictc                   s<  | d }| d }t ||} |}|pd}jr3|d ur$tj|nd }|d ur1|d n|}| d }| d dkr^| d r^| d	 r^t|| d | d	 }|d urY||krYd
nd}|}	nt|}
|d urmt	||
rmd
nd}d
tt|
}	tjtj|t|dd|||	d}|t|ddg }t||d| d i|dS )Nr|   r}   rt   rX   ru   r~   rw   r]   r\   g      ?        z, 	assistant)r   r   )r   next_messagescorecorrect_answerextracted_answer__category__r   )htmlr   metricsconvo)r   r   r9   researchgroupstrip_parse_multi_choice_response_parse_open_response
_eval_openjoinmapr:   common	jinja_envfrom_stringr
   renderr   r   )r   r   r}   r   response_textmatchgoldpredr   r   parsed_listhtml_renderedr   r   r=   r>   r?   fn   sV   

z MMMUVLMEval.__call__.<locals>.fnr   Unknownr   rX   r      )accnum_exampler   r   )numr   zOverall-c                 s  s    | ]}|d  V  qdS )r   Nr>   rx   vr>   r>   r?   	<genexpr>:  s    z'MMMUVLMEval.__call__.<locals>.<genexpr>c                 s  s     | ]}|d  |d  V  qdS )r   r   Nr>   r   r>   r>   r?   r   <  s    Overall)r   r   htmlsconvos)r   r   )r   r<   r6   r   rj   r   r[   r   r   itemsroundr   sumr   r   )r=   r   r   resultsper_cat_totalper_cat_correctr   r   scoresrcatevaluation_resulttotcorrr   printable_resultsdomaincatsacc_sumnum_sum	total_numoverall_accr>   r   r?   __call__   sv   7





zMMMUVLMEval.__call__)r1   r2   r3   N)r4   r5   r6   r7   r8   r7   r9   r:   )rA   rB   rC   r:   )rT   rU   rC   rV   )rb   r7   rC   rc   )r   r:   rC   r   )r   r:   rC   r   )r   r   rC   r   )__name__
__module____qualname__r   r@   staticmethodrS   ra   r;   r   r   r   r>   r>   r>   r?   r      s.    
Pr   responser:   r]   rU   r\   r   rC   c           
        sd  dD ]}|  |} qd|  d } g }|D ]}d| d| v r#|| q|s8|D ]}d| d| v r7|| q(|sZt|  dkrZ| D ]\}}|rY| |  v rY|| qF|s`|d S t|dkrj|d S g  |D ]1}| d| d}	|	dkr| d| d}	|	dkr||r|  ||  }	 |	 qn|tt	t
t  fd	d
d S )N),.!?;ri   ' ()   r   rX   c                   s    |  S )Nr>   )ry   startsr>   r?   <lambda>m  s    z._parse_multi_choice_response.<locals>.<lambda>rp   )r   r[   r   rf   r   lowerrfindrj   r7   maxr   )
r   r]   r\   char
candidateschoicerk   anscanposr>   r   r?   r   I  s>   


$r   sboolc                 C  s,   zt | dd W dS  ty   Y dS w )Nr   rt   TF)floatreplacer   )r  r>   r>   r?   _check_is_numberp  s   r  c                 C  sv   |   } t| r(| dd} ztt| d}|gW S  ty'   |  g Y S w t| dkr3|  gS d|  | d gS )Nr   rt      rX   r   )r   r  r  r   r
  r   r   r   )r  r   r>   r>   r?   _normalize_strx  s   &r  c                 C  s8   dd l }d}d}d}||| |||  |||  S )Nr   z-?\b\d{1,3}(?:,\d{3})+\bz-?\d+(?:\.\d+)?[eE][+-]?\d+z3-?(?:\d+\.\d+|\.\d+|\d+\b)(?![eE][+-]?\d+)(?![,\d]))r   findall)r  _repattern_commaspattern_scientificpattern_simpler>   r>   r?   _extract_numbers  s   


r  c                   sh   dd l  d fdd}|| }| }|D ]	}|t| qg }|D ]	}|t| q#tt|S )	Nr   respr:   rC   rU   c           
        s   |    d }  d| }g d}g }t|D ]@\}}g |}|t|d kr.|d d }|D ]}||v rM||d   }	|rKt|	t|k rM|	}q2|rY|dvrY|| q|p^| gS )Nr   z\.\s(?=[A-Z])|\n)z	could be zso zis zthus z
therefore zfinal zanswer zresult rX   =r   )ri   r   r   r   r   r   ri   r   )r   r   rf   	enumerater   r[   )
r  r   
indicatorskeysry   r  candsshortestindpartr  r>   r?   get_key_subresponses  s&   



z2_parse_open_response.<locals>.get_key_subresponses)r  r:   rC   rU   )r   copyr   r  r  r   r   fromkeys)r   r  	key_resps	pred_listr   outxr>   r  r?   r     s   r   predsc                 C  s|   t | trg }| D ]	}|t| q	nt| }|D ]!}t |tr4|D ]}t |tr2||v r2  dS q#q||v r; dS qdS )NTF)r   r   r   r  r:   )r   r&  norm_answersr  pnar>   r>   r?   r     s"   

r   )r   r:   r]   rU   r\   r   rC   r:   )r  r:   rC   r	  )r  r:   )r  r:   rC   rU   )r   r:   rC   rU   )r&  rU   rC   r	  ) __doc__
__future__r   rM   rJ   r   typingr   r   r   r   r   r   PILr   sglang.testr	   r   sglang.test.simple_eval_commonr
   r   r   r   r   r   r   r   r  r  r  r   r   r>   r>   r>   r?   <module>   s&     
  
0
'


*