o
    wi                      @   s   d dl Z d dlmZmZ d dlZddlmZ ddlm	Z	m
Z
 ddlmZmZmZmZmZ ddlmZmZ ddlmZ G d	d
 d
eZG dd deddZG dd deZdgZdS )    N)OptionalUnion   )BatchFeature)
ImageInputmake_nested_list_of_images)ImagesKwargsMultiModalDataProcessingKwargsProcessorMixinUnpack)PreTokenizedInput	TextInput)	to_py_objc                   @   sJ   e Zd ZU ee ed< ee ed< ee ed< ee ed< ee ed< dS )Gemma3ImagesKwargsdo_pan_and_scanpan_and_scan_min_crop_sizepan_and_scan_max_num_crops"pan_and_scan_min_ratio_to_activatedo_convert_rgbN)__name__
__module____qualname__r   bool__annotations__intfloat r   r   i/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/transformers/models/gemma3/processing_gemma3.pyr      s   
 r   c                   @   s0   e Zd ZU eed< ddddddddd	Zd
S )Gemma3ProcessorKwargsimages_kwargsFT)paddingreturn_mm_token_type_ids      g333333?)r   r   r   r   )text_kwargsr    N)r   r   r   r   r   	_defaultsr   r   r   r   r   $   s   
 
r   F)totalc                
       s   e Zd ZddgZdZdZ		ddef fdd	Z				dd
ede	e
eee
 ee f dee defddZdddZdd Zdd Zedd Z  ZS )Gemma3Processorimage_processor	tokenizerAutoImageProcessorAutoTokenizerNr#   image_seq_lengthc                    sh   || _ |j| _|j| _|j| _d|jg| }d|j | |j d| _t jd|||d| d S )N z

)r)   r*   chat_templater   )	r-   image_token_id	boi_tokenimage_tokenjoin	eoi_tokenfull_image_sequencesuper__init__)selfr)   r*   r/   r-   kwargsimage_tokens_expanded	__class__r   r   r7   9   s   
zGemma3Processor.__init__imagestextr9   returnc                    sn  |d u r|d u rt djtfdjji|}t|tr"|g}nt|ts2t|d ts2t di }|d urt|}j	|fi |d }|sRfdd|D }t
|t
|krit dt
| d	t
| d
t|d  fdd|D }	tt|||	D ]a\}
\}} dd tj|D }t
|t
|krt dt
| dt
| dttt |D ]-\}}|rdj ddjg|  }|d | | ||t
j d   }|||
< qqfdd|D }|d dd }|d dd}jd d|i|d }j||dgd |r-t|d }t|}d||jk< | |d< ti |||dS )!Nz+Provide at least one of `text` or `images`.tokenizer_init_kwargsr   zAInvalid input text. Please provide a string, or a list of stringsr    c                    s"   g | ]}d   jgt| qS ) )r3   r1   len.0r=   r8   r   r   
<listcomp>l   s   " z,Gemma3Processor.__call__.<locals>.<listcomp>z1Received inconsistently sized batches of images (z) and text (z).	num_cropsc                    s&   g | ]} fd dt t|D qS )c                    s   g | ]}  d qS )r   )pop)rD   _rG   r   r   rF   u   s    z7Gemma3Processor.__call__.<locals>.<listcomp>.<listcomp>)rangerB   rC   rJ   r   r   rF   u   s   & c                 S   s   g | ]}|  qS r   )start)rD   mr   r   r   rF   w   s    zPrompt contained z image tokens but received z images.zHere is the original image z0 and here are some crops to help you see better rA   c                    s   g | ]
}|  j jqS r   )replacer1   r5   )rD   promptrE   r   r   rF      s    r%   return_tensorsr"   Fr>   image)
modalities	input_ids   token_type_ids)datatensor_typer   )
ValueError_merge_kwargsr   r*   init_kwargs
isinstancestrlistr   r)   rB   r   rH   	enumerateziprefinditerr1   reversedr3   _check_special_mm_tokensnparray
zeros_liker0   tolistr   )r8   r=   r>   videosaudior9   output_kwargsimage_inputsbatched_imagesbatch_num_crops	batch_idxrO   image_indexesnumidxformatted_image_textrP   r"   text_inputs	array_idsmm_token_type_idsr   )rG   r8   r   __call__O   sh   
&

zGemma3Processor.__call__c                 K   sH   i }|dur| j gt| }dgt| }|||d tdi |S )a  
        Computes the number of placeholder tokens needed for multimodal inputs with the given sizes.

        Args:
            image_sizes (`list[list[int]]`, *optional*):
                The input sizes formatted as (height, width) per each image.

        Returns:
            `MultiModalData`: A `MultiModalData` object holding number of tokens per each of the provided
            input modalities, along with other useful data.
        NrT   )num_image_tokensnum_image_patchesr   )r-   rB   updater	   )r8   image_sizesr9   vision_datarw   rx   r   r   r   _get_num_multimodal_tokens   s   z*Gemma3Processor._get_num_multimodal_tokensc                 O      | j j|i |S )z
        This method forwards all its arguments to GemmaTokenizerFast's [`~PreTrainedTokenizer.batch_decode`]. Please
        refer to the docstring of this method for more information.
        )r*   batch_decoder8   argsr9   r   r   r   r~         zGemma3Processor.batch_decodec                 O   r}   )z
        This method forwards all its arguments to GemmaTokenizerFast's [`~PreTrainedTokenizer.decode`]. Please refer to
        the docstring of this method for more information.
        )r*   decoder   r   r   r   r      r   zGemma3Processor.decodec                 C   s(   | j jdg }| jj}tt|| S )NrU   )r*   model_input_namesr)   r]   dictfromkeys)r8   tokenizer_input_namesimage_processor_input_namesr   r   r   r      s   z!Gemma3Processor.model_input_names)Nr#   )NNNN)N)r   r   r   
attributesimage_processor_classtokenizer_classr   r7   r   r   r   r   r]   r   r   r   rv   r|   r~   r   propertyr   __classcell__r   r   r;   r   r(   4   s6    

Jr(   )r`   typingr   r   numpyrd   feature_extraction_utilsr   image_utilsr   r   processing_utilsr   r	   r
   r   r   tokenization_utils_baser   r   utilsr   r   r   r(   __all__r   r   r   r   <module>   s    
