o
    پi                     @   s`   d dl Z d dlZd dlmZmZ d dlmZ d dlm	Z	m
Z
 d dlmZmZ G dd deZdS )    N)ListUnion)_num_image_tokens)PixtralForConditionalGenerationPixtralVisionModel)BaseMultimodalProcessorMultimodalSpecialTokensc                       sp   e Zd ZeegZdZdZdZde	de	de
e	e	f fddZ fd	d
Zdd Zdeeeef  fddZ  ZS )PixtralProcessorz<pad>      image_widthimage_heightreturnc          
      C   sp   | j  }}| j }}t|| || }|dkr)tt|| }tt|| }t||f||f\}}	|	|fS )N   )
image_size
patch_sizemaxintmathfloor _get_pixtral_hf_num_image_tokens)
selfr   r   	max_width
max_heightpatch_widthpatch_heightrationrowsncols r   \/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/multimodal/processors/pixtral.pyget_patch_grid_size   s   

z$PixtralProcessor.get_patch_grid_sizec                    s   t  j|||g|R i | t|dtj| _|j| _| jj| _| jj| _| j| j	_t
| jdr6| jj| j	_t|j| jd|| _|jdt|d| ji d S )Nimage_token_indexspatial_merge_size)image_tokenimage_token_id	pad_token)super__init__getattrr   DEFAULT_IMAGE_TOKEN_IDIM_TOKEN_IDvision_configr   r   
_processorhasattrr#   r   r$   build	mm_tokens	tokenizeradd_special_tokens	PAD_TOKEN)r   	hf_configserver_argsr-   argskwargs	__class__r   r    r(   /   s(   


zPixtralProcessor.__init__c                    s>   | j |jd |jd d\}}|| j || j f}||S )Nr   r   )r   r   )r!   sizer   resize)r   imagenum_w_tokensnum_h_tokensnew_sizer   r   r    _resizeH   s   

zPixtralProcessor._resize
image_datac                    sn    j | j|dd}|jr! fdd|jD }tj| I d H |_ | j\}}	}
||	  j jj	dS )NT)promptmultimodal_tokensrA   return_textc                    s   g | ]}  |qS r   )r@   ).0r<   r   r   r    
<listcomp>_   s    z:PixtralProcessor.process_mm_data_async.<locals>.<listcomp>)mm_items	input_idsim_token_idim_token)
load_mm_datar0   imagesasynciogatherprocess_and_combine_mm_datatolistr+   r-   r$   )r   rA   
input_textrequest_objr6   r7   mm_dataresize_tasksrH   rI   _r   rF   r    process_mm_data_asyncP   s$   
z&PixtralProcessor.process_mm_data_async)__name__
__module____qualname__r   r   modelsr3   IMG_BREAK_TOKEN_IDIMG_END_TOKEN_IDr   tupler!   r(   r@   r   r   strbytesrW   __classcell__r   r   r8   r    r	      s"    

r	   )rN   r   typingr   r   4transformers.models.pixtral.image_processing_pixtralr   r   sglang.srt.models.pixtralr   r   /sglang.srt.multimodal.processors.base_processorr   r   r	   r   r   r   r    <module>   s    