o
    ei                     @   s   d dl ZddlmZ ddlmZmZ ddlmZm	Z	m
Z
 ddlmZmZ ddlmZ G dd	 d	ed
dZeG dd de	ZdgZdS )    N   )BatchFeature)
ImageInputmake_nested_list_of_images)ProcessingKwargsProcessorMixinUnpack)PreTokenizedInput	TextInput)auto_docstringc                   @   s   e Zd ZdddiiZdS )Gemma3nProcessorKwargstext_kwargspaddingFN)__name__
__module____qualname__	_defaults r   r   l/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/transformers/models/gemma3n/processing_gemma3n.pyr      s    r   F)totalc                       s   e Zd Z			ddedef fddZe			ddedB d	eeB e	e B e	e B d
e
je	e B e	e
j B e	e	e  B dB dee def
ddZedd Z  ZS )Gemma3nProcessorN      audio_seq_lengthimage_seq_lengthc           
         s   || _ |j| _|j| _|j| _d|jg| }d|j | |j d| _|| _|j| _|j	| _	|j
| _
d|j
g| }	d|j	 |	 |j d| _t jd||||d| dS )a  
        audio_seq_length (int, *optional*, defaults to 188):
            The number of audio soft tokens that will be added to the text prompt
        image_seq_length (int, *optional*, defaults to 256):
            The number of image soft tokens that should be added to
         z

)feature_extractorimage_processor	tokenizerchat_templateNr   )r   audio_token_id	boa_tokenaudio_tokenjoin	eoa_tokenfull_audio_sequencer   image_token_id	boi_tokenimage_token	eoi_tokenfull_image_sequencesuper__init__)
selfr   r   r   r   r   r   kwargsaudio_tokens_expandedimage_tokens_expanded	__class__r   r   r,   !   s(   
zGemma3nProcessor.__init__imagestextaudior.   returnc                    s  |d u r|d u r|d u rt d jtfd jji|}t|tr&|g}nt|ts6t|d ts6td|d urZ j	|fi |d }|sP fdd|D } fdd|D }ni }|d ur j
|}t|} j
|fi |d	 }|s fd
d|D }t|t|krt dt| dt| d fdd|D }ni }|d dd }	 jdd|i|d ddi}
 j||
dgd |
d }t|}d|| jk< d|| jk< dd |
 D }
| |
d< ti |
|||	dS )Nz5Provide at least one of `text`, `images`, or `audio`.tokenizer_init_kwargsr   zAInvalid input text. Please provide a string, or a list of stringsaudio_kwargsc                    s   g | ]} j qS r   )r"   ).0_r-   r   r   
<listcomp>a   s    z-Gemma3nProcessor.__call__.<locals>.<listcomp>c                       g | ]
}|  j jqS r   )replacer"   r%   r9   promptr;   r   r   r<   d       images_kwargsc                    s"   g | ]}d   jgt| qS ) )r#   r(   len)r9   r3   r;   r   r   r<   o   s   " z1Received inconsistently sized batches of images (z) and text (z).c                    r=   r   )r>   r(   r*   r?   r;   r   r   r<   w   rA   r   return_tensorsr4   npimage)
modalities	input_ids   r   c                 S   s   i | ]	\}}||  qS r   )tolist)r9   kvr   r   r   
<dictcomp>   s    z-Gemma3nProcessor.__call__.<locals>.<dictcomp>token_type_ids)datatensor_typer   )
ValueError_merge_kwargsr   r   init_kwargs
isinstancestrlist	TypeErrorr   r   fetch_imagesr   rD   pop_check_special_mm_tokensrF   
zeros_liker&   r    itemsrK   r   )r-   r3   r4   r5   r.   output_kwargsaudio_inputsbatched_imagesimage_inputsrE   text_inputs	array_idsrO   r   r;   r   __call__G   sR   
 
zGemma3nProcessor.__call__c                 C   s<   | j jdg }| jj}| jj}dd |D }t|| | S )NrO   c                 S   s   g | ]}|d kr|qS )	num_cropsr   )r9   namer   r   r   r<      s    z6Gemma3nProcessor.model_input_names.<locals>.<listcomp>)r   model_input_namesr   r   rW   )r-   tokenizer_input_namesimage_processor_input_namesaudio_processor_input_namesr   r   r   rg      s
   z"Gemma3nProcessor.model_input_names)Nr   r   )NNN)r   r   r   intr,   r   r   r
   r	   rW   rF   ndarrayfloatr   r   r   rd   propertyrg   __classcell__r   r   r1   r   r      s4    &&@r   )numpyrF   feature_extraction_utilsr   image_utilsr   r   processing_utilsr   r   r   tokenization_utils_baser	   r
   utilsr   r   r   __all__r   r   r   r   <module>   s   
q