o
    ei                     @   s   d Z ddlmZ ddlmZ ddlmZmZmZ ddl	m
Z
mZmZ ddlmZmZ eeZG dd ded	d
ZeG dd deZdgZdS )zq
Processor class for InstructBLIP. Largely copy of Blip2Processor with addition of a tokenizer for the Q-Former.
   )BatchFeature)
ImageInput)ProcessingKwargsProcessorMixinUnpack)
AddedTokenPreTokenizedInput	TextInput)auto_docstringloggingc                   @   s(   e Zd Zddddddddddd	iZdS )InstructBlipProcessorKwargstext_kwargsTF    )	add_special_tokenspaddingstridereturn_overflowing_tokensreturn_special_tokens_maskreturn_offsets_mappingreturn_token_type_idsreturn_lengthverboseN)__name__
__module____qualname__	_defaults r   r   v/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/transformers/models/instructblip/processing_instructblip.pyr      s    r   F)totalc                
       sj   e Zd Zd fdd	Ze		ddedB deeB ee B ee B de	e
 defdd	Zed
d Z  ZS )InstructBlipProcessorNc                    sP   t |dstdddd| _|j| jgdd n|j| _|| _t ||| dS )a4  
        qformer_tokenizer (`AutoTokenizer`):
            An instance of ['PreTrainedTokenizer`]. The Q-Former tokenizer is a required input.
        num_query_tokens (`int`, *optional*):
            "
            Number of tokens used by the Qformer as queries, should be same as in model's config.
        image_tokenz<image>FT)
normalizedspecial)special_tokensN)hasattrr   r    
add_tokensnum_query_tokenssuper__init__)selfimage_processor	tokenizerqformer_tokenizerr&   kwargs	__class__r   r   r(   .   s   
zInstructBlipProcessor.__init__imagestextr-   returnc                    s  |d u r|d u rt d| jtfd| jji|}|d dd }i }|d urt|tr0|g}nt|ts@t|d ts@t d| j	|fi |d }|d|d< |d	|d
< |d 
dd urm|d d  | j8  < | j|fi |d }|d ur| jj| j }	d|d d< d|d d< d|d d< | j|	fi |d  |D ] fdd| D |< q|| |d ur| j|fi |d }
||
 t||d}|S )Nz,You have to specify at least images or text.tokenizer_init_kwargsr   return_tensorsr   zAInvalid input text. Please provide a string, or a list of strings	input_idsqformer_input_idsattention_maskqformer_attention_mask
max_lengthFr   r   
truncationc                    s   g | ]}  | qS r   r   ).0sampleimage_text_encodingkr   r   
<listcomp>h   s    z2InstructBlipProcessor.__call__.<locals>.<listcomp>images_kwargs)tensor_type)
ValueError_merge_kwargsr   r+   init_kwargspop
isinstancestrlistr,   getr&   r    contentupdater*   r   )r)   r0   r1   r-   output_kwargsr4   encodingqformer_text_encodingtext_encodingimage_tokensimage_encodingr   r=   r   __call__?   sH   


zInstructBlipProcessor.__call__c                 C   s$   | j j}| jj}ddg}|| | S )Nr6   r8   )r+   model_input_namesr*   )r)   tokenizer_input_namesimage_processor_input_namesqformer_input_namesr   r   r   rT   s   s   z'InstructBlipProcessor.model_input_names)N)NN)r   r   r   r(   r
   r   r	   r   rI   r   r   r   rS   propertyrT   __classcell__r   r   r.   r   r   ,   s     3r   N)__doc__image_processing_utilsr   image_utilsr   processing_utilsr   r   r   tokenization_utils_baser   r   r	   utilsr
   r   
get_loggerr   loggerr   r   __all__r   r   r   r   <module>   s   

N