o
    پiP                     @   s   d dl Z d dlZd dlmZ d dlZd dlZd dlZd dlZd dl	m
Z
 d dlmZ d dlmZmZmZmZ dZdZdZd	Zd
ZG dd deZG dd deZG dd deZG dd deZG dd deeeZdS )    N)ThreadPoolExecutor)Image)kill_process_tree)!DEFAULT_TIMEOUT_FOR_SERVER_LAUNCHDEFAULT_URL_FOR_TESTCustomTestCasepopen_launch_serverzrhttps://raw.githubusercontent.com/sgl-project/sgl-test-files/refs/heads/main/images/man_ironing_on_back_of_suv.pngz`https://raw.githubusercontent.com/sgl-project/sgl-test-files/refs/heads/main/images/sgl_logo.pngzlhttps://raw.githubusercontent.com/sgl-project/sgl-test-files/refs/heads/main/videos/jobs_presenting_ipod.mp4zjhttps://raw.githubusercontent.com/sgl-project/sgl-test-files/refs/heads/main/audios/Trump_WEF_2018_10s.mp3zahttps://raw.githubusercontent.com/sgl-project/sgl-test-files/refs/heads/main/audios/bird_song.mp3c                   @   sx   e Zd ZU eed< g Zeed< ddgZeed< dZe	ed< e
dd	 Ze
d
d Zdd Zdd ZdedefddZdS )TestOpenAIMLLMServerBasemodel
extra_args--trust-remote-codez--enable-multimodal
fixed_argsTtrust_remote_codec                 C   sl   t | _d| _t| j}| jr|| j n|dd | jD  t| j	| jt
| j|d| _|  jd7  _d S )N	sk-123456c                 s   s    | ]	}|d kr|V  qdS )r   N ).0argr   r   I/home/ubuntu/.local/lib/python3.10/site-packages/sglang/test/vlm_utils.py	<genexpr>0   s    z6TestOpenAIMLLMServerBase.setUpClass.<locals>.<genexpr>)timeoutapi_key
other_argsz/v1)r   base_urlr   listr   r   extendr   r   r
   r   process)clsr   r   r   r   
setUpClass%   s    

z#TestOpenAIMLLMServerBase.setUpClassc                 C   s   t | jj d S N)r   r   pid)r   r   r   r   tearDownClass=   s   z&TestOpenAIMLLMServerBase.tearDownClassc                 C      |   S r   get_request_kwargsselfr   r   r   get_vision_request_kwargsA      z2TestOpenAIMLLMServerBase.get_vision_request_kwargsc                 C   s   i S r   r   r$   r   r   r   r#   D   s   z+TestOpenAIMLLMServerBase.get_request_kwargsurlreturnc                 C   s   t jd}|d u rt |dd }t j||}t j|dd t j|sMt	|}|
  t|d}||j W d    |S 1 sHw   Y  |S )Nz~/.cache/T)exist_okwb)ospath
expanduser
ValueErrorsplitjoinmakedirsexistsrequestsgetraise_for_statusopenwritecontent)r%   r(   	cache_dir	file_name	file_pathresponsefr   r   r   get_or_download_fileG   s   

z-TestOpenAIMLLMServerBase.get_or_download_fileN)__name__
__module____qualname__str__annotations__r   r   r   r   boolclassmethodr   r    r&   r#   rA   r   r   r   r   r	      s   
 

r	   c                   @   sB   e Zd Zdd Zdd Zdd Zdefdd	Zd
d Zdd Z	dS )AudioOpenAITestMixinc                 C   s8   g d}|D ]}||  v sJ d| d| dqd S )N)z	thank youzit's a privilege to be hereleaderscienceartu   audio_response: ｜u   ｜ should contain ｜u   ｜)lower)r%   text
check_list
check_wordr   r   r   "verify_speech_recognition_responseY   s   z7AudioOpenAITestMixin.verify_speech_recognition_responsec                 C   s&   ddd| idd|dgdg}|S )Nuser	audio_urlr(   typerS   rN   rU   rN   roler;   r   )r%   promptaudio_file_namemessagesr   r   r   prepare_audio_messagesf   s   z+AudioOpenAITestMixin.prepare_audio_messagesc                 C   r!   r   r"   r$   r   r   r   get_audio_request_kwargsy   r'   z-AudioOpenAITestMixin.get_audio_request_kwargsr(   c           	      C   s   |  |}tjd| jd}| ||}|jjjdd|dddd|  }|j	d j
j}td td	| d
|  td | }| | | t|d | S )Nr   r   r   defaultr      Fr
   r[   temperature
max_tokensstream------------------------------zaudio z response:
r   )rA   openaiClientr   r\   chatcompletionscreater]   choicesmessager;   printrM   assertIsNotNoneassertGreaterlen)	r%   r(   rY   categoryaudio_file_pathclientr[   r?   audio_responser   r   r   get_audio_response|   s(   

	
z'AudioOpenAITestMixin.get_audio_responsec                 C   s   | j tddd}| | d S )NzGListen to this audio and write down the audio transcription in English.speech)rq   )ru   AUDIO_TRUMP_SPEECH_URLrQ   r%   rt   r   r   r   test_audio_speech_completion   s   z1AudioOpenAITestMixin.test_audio_speech_completionc                 C   s   |  tdd}d|v sJ d S )NzSPlease listen to the audio snippet carefully and transcribe the content in English.ambientbird)ru   AUDIO_BIRD_SONG_URLrx   r   r   r   test_audio_ambient_completion   s   z2AudioOpenAITestMixin.test_audio_ambient_completionN)
rB   rC   rD   rQ   r\   r]   rE   ru   ry   r}   r   r   r   r   rI   X   s    	rI   c                   @   sL   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dS )ImageOpenAITestMixinc                 C   s   t j| j| jd}g }|dkr|ddtid n|dkr)|ddtid n	 |ddd	 |jjj	dd
d|dgdd| 
 }|jd jjdksQJ |jd jj}t|ts_J d S )Nr^   r   	image_urlr(   rU   r      rN   "Describe this image in a sentence.rV   r_   rR   rW   r
   r[   rb   	assistantr   )rf   rg   r   r   appendIMAGE_MAN_IRONING_URLIMAGE_SGL_LOGO_URLrh   ri   rj   r&   rk   rl   rX   r;   
isinstancerE   )r%   image_idrs   r;   r?   rN   r   r   r   run_decode_with_image   s>   
	z*ImageOpenAITestMixin.run_decode_with_imagec                 C   sL   g dd }t d}t|| j| W d    d S 1 sw   Y  d S )N)r   r         )r   r   mapr   )r%   	image_idsexecutorr   r   r   test_mixed_batch   s   
"z%ImageOpenAITestMixin.test_mixed_batchc                 C   s   |j d jjdksJ |j d jj}t|tsJ d|v s	 d|v s:d	|v s:d
|v s:d|v s:d|v s:J d| dd|v sRd|v sRd|v sRd|v sRJ d| d|jsWJ |js\J |jj	dksdJ |jj
dkslJ |jjdkstJ d S )Nr   r   manpersondrivertext: z&, should contain man, person or drivercabtaxiSUVvehiclecarz/, should contain cab, taxi, SUV, vehicle or carironhangclothholdingz-, should contain iron, hang, cloth or holding)rk   rl   rX   r;   r   rE   idcreatedusageprompt_tokenscompletion_tokenstotal_tokens)r%   r?   rN   r   r   r   verify_single_image_response   s&   


"


z1ImageOpenAITestMixin.verify_single_image_responsec                 C   s   t j| j| jd}|jjjdddddtidddd	gd
gdd|  }t	d t	d|j
d jj  t	d | | d S )Nr^   r_   rR   r   r(   r   rN   r   rV   rW   r   r   re   zSingle image response:
r   )rf   rg   r   r   rh   ri   rj   r   r&   rm   rk   rl   r;   r   )r%   rs   r?   r   r   r   !test_single_image_chat_completion   s*   
z6ImageOpenAITestMixin.test_single_image_chat_completionc              	   C   s  t j| j| jd}|jjjdddddtidddd	gd
dddd	gd
dddd	gd
gdd|  }|j	d j
jdksBJ |j	d j
j}t|tsPJ d|v s`d|v s`J d| d|jseJ |jsjJ |jjdksrJ |jjdkszJ |jjdksJ d S )Nr^   r_   rR   r   r(   r   rN   r   rV   rW   r   z?There is a man at the back of a yellow cab ironing his clothes.zRepeat your previous answer.r   r   r   r   r   z, should contain man or cabr   )rf   rg   r   r   rh   ri   rj   r   r&   rk   rl   rX   r;   r   rE   r   r   r   r   r   r   r%   rs   r?   rN   r   r   r   test_multi_turn_chat_completion  sL   

!$


z4ImageOpenAITestMixin.test_multi_turn_chat_completionc              	   C   s^  t j| j| jd}|jjjdddddtiddddtidddd	d
gdgdd| 	 }|j
d jjdks:J |j
d jj}t|tsHJ td td|  td d|v ssd|v ssd|v ssd|v ssd|v ssJ d| dd|v sd|v sd|v sd|v sJ d| d|jsJ |jsJ |jjdksJ |jjdksJ |jjdksJ d S )Nr^   r_   rR   r   r(   zmulti-imagesrU   r   
modalitiesrN   z7I have two very different images. Please describe them.rV   rW   r   r   r   re   zMulti images response:
r   r   r   r   r   r   z+, should contain man, cab, SUV, taxi or carlogoz"S"SGgraphicz), should contain logo, S or SG or graphicr   )rf   rg   r   r   rh   ri   rj   r   r   r&   rk   rl   rX   r;   r   rE   rm   r   r   r   r   r   r   r   r   r   r   !test_multi_images_chat_completionE  sV   


"


z6ImageOpenAITestMixin.test_multi_images_chat_completionc                 C   s  ddl m}m} d}|||dd}t|}tjd|d |td}| }||	 }	g }
|	D ]!}t
|}t }|j|dd t| d	}|
| q0d
g dg}dddidd}|
D ]}d||d d< |d d |  qbddd}|d d | |S )Nr   )VideoReadercpu
   )ctxr   )dtypeJPEG)formatzutf-8rR   rW   r   r(   zdata:image/jpeg;base64,{}imager   r;   rN   $Please describe the video in detail.rV   )decordr   r   rp   nplinspaceinttolist	get_batchasnumpyr   	fromarrayioBytesIOsavepybase64	b64encodegetvaluedecoder   r   copy)r%   
video_pathr   r   max_frames_numvrtotal_frame_numuniform_sampled_frames	frame_idxframesbase64_framesframepil_imgbuff
base64_strr[   frame_formatbase64_framerY   r   r   r   prepare_video_images_messagesy  s:   

z2ImageOpenAITestMixin.prepare_video_images_messagesc                 C   s,  t }| |}tj| j| jd}| |}|jjj	d|dddd}|j
d jj}td td|  td d	|v sKd
|v sKd|v sKJ d| dd|v sod|v sod|v sod|v sod|v sod|v sod|v soJ d| dd|v sd|v sd|v sd|v sJ d| d| | | t|d d S )Nr^   r_   r      Fra   re   zVideo images response:
iPoddevice
microphonezT
        ====================== video_images response =====================
        z
        ===========================================================
        should contain 'iPod' or 'device' or 'microphone'
        r   r   
individualspeaker	presenterStevehandz
        ===========================================================
        should contain 'man' or 'person' or 'individual' or 'speaker' or 'presenter' or 'Steve' or 'hand'
        presentexaminedisplayholdz
        ===========================================================
        should contain 'present' or 'examine' or 'display' or 'hold'
        )VIDEO_JOBS_URLrA   rf   rg   r   r   r   rh   ri   rj   rk   rl   r;   rm   rn   ro   rp   r%   r(   r>   rs   r[   r?   video_responser   r   r   !test_video_images_chat_completion  sV   





z6ImageOpenAITestMixin.test_video_images_chat_completionN)rB   rC   rD   r   r   r   r   r   r   r   r   r   r   r   r   r~      s    )34-r~   c                   @   s   e Zd Zdd Zdd ZdS )VideoOpenAITestMixinc                 C   s&   ddd| iddddgdg}|S )	NrR   	video_urlr(   )rU   r   rN   r   rV   rW   r   )r%   r   r[   r   r   r   prepare_video_messages  s   z+VideoOpenAITestMixin.prepare_video_messagesc                 C   s\  t }| |}tj| j| jd}| |}|jjj	dd|dddd| 
 }|jd jj }td td|  td d	|v sWd
|v sWd|v sWd|v sWJ d| dd|v swd|v swd|v swd|v swd|v swd|v swJ d| dd|v sd|v sd|v sd|v sJ d| dd|v sd|v sJ d| d| | | t|d d S )Nr^   r_   r   r   Fra   re   zVideo response:
ipodr   r   phonezvideo_response: z#, should contain 'iPod' or 'device'r   r   r   r   r   r   z, should either have 'man' in video_response, or 'person' in video_response, or 'individual' in video_response or 'speaker' in video_response or 'presenter' or 'hand' in video_responser   r   r   r   z;, should contain 'present', 'examine', 'display', or 'hold'blackdarkz", should contain 'black' or 'dark'r   )r   rA   rf   rg   r   r   r   rh   ri   rj   r&   rk   rl   r;   rM   rm   rn   ro   rp   r   r   r   r   test_video_chat_completion  sT   


	







z/VideoOpenAITestMixin.test_video_chat_completionN)rB   rC   rD   r   r   r   r   r   r   r     s    r   c                   @   s   e Zd Zdd ZdS )OmniOpenAITestMixinc                 C   s   t j| j| jd}dddtidddtiddd	d
gdg}|jjjd|dddd}|j	d j
j}td td|  td | j|d | j|d d S )Nr^   rR   r   r(   r   rS   rT   rN   zI have an image and audio, which are not related at all. Please:  1. Describe the image in a sentence, 2. Repeat the exact words from the audio I provided. Be exactrV   rW   r_   r   r`   Fra   re   zMixed modality response:
)r?   )rN   )rf   rg   r   r   r   rw   rh   ri   rj   rk   rl   r;   rm   r   rQ   )r%   rs   r[   r?   rN   r   r   r   #test_mixed_modality_chat_completion*  s6   z7OmniOpenAITestMixin.test_mixed_modality_chat_completionN)rB   rC   rD   r   r   r   r   r   r   '  s    r   )r   r.   concurrent.futuresr   numpyr   rf   r   r6   PILr   sglang.srt.utilsr   sglang.test.test_utilsr   r   r   r   r   r   r   rw   r|   r	   rI   r~   r   r   r   r   r   r   <module>   s0    9S  ;
C