o
    ei                     @   s   d Z ddlZddlmZ ddlmZmZ ddlmZ ddl	m
Z
mZmZ ddlmZmZmZ dd	lmZmZ dd
lmZmZ erFddlmZ eeZdefddZdd ZG dd deddZeG dd deZdgZ dS )z
Processor class for IDEFICS2.
    N)
accumulate)TYPE_CHECKINGUnion   )BatchFeature)
ImageInputis_valid_image
load_image)ProcessingKwargsProcessorMixinUnpack)
AddedToken	TextInput)auto_docstringlogging)PreTokenizedInputreturnc                 C   s   t | to	| dS )Nhttp)
isinstancestr
startswith)val r   n/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/transformers/models/idefics2/processing_idefics2.pyis_url(   s   r   c                 C   s   t | pt| S N)r   r   )elemr   r   r   is_image_or_image_url,   s   r   c                   @   s   e Zd ZdddddiZdS )Idefics2ProcessorKwargstext_kwargsTF)add_special_tokenspaddingis_split_into_wordsN)__name__
__module____qualname__	_defaultsr   r   r   r   r   0   s    r   F)totalc                       s   e Zd Z	ddededB f fddZdd Ze		dd	ee	e B e	e	e  B d
e
ede	e e	d f dee defddZ  ZS )Idefics2ProcessorN@   image_seq_lenchat_templatec                    s   t |ds,tddddj| _tddddj| _d| j| jgi}|| || j| _n|j| _|j| _|j| _tdddd| _	|d| j	gi || _
t j|||d	 d
S )ap  
        image_seq_len (`int`, *optional*, defaults to 64):
            The length of the image sequence i.e. the number of <image> tokens per image in the input.
            This parameter is used to build the string from the input prompt and image tokens and should match the
            config.perceiver_config.resampler_n_latents value for the model used.
        image_tokenz<fake_token_around_image>FT)
normalizedspecialz<image>additional_special_tokensz<end_of_utterance>)r+   N)hasattrr   contentfake_image_tokenr,   r    convert_tokens_to_idsimage_token_idimage_boundary_tokenend_of_utterance_tokenr*   super__init__)selfimage_processor	tokenizerr*   r+   kwargstokens_to_add	__class__r   r   r8   <   s   
	
zIdefics2Processor.__init__c                 C   sT   g }|D ]#}g }|D ]}t |r|| q
t|r!|t| q
|| q|S r   )r   appendr   r	   )r9   promptsprompt_imagespromptimagesr   r   r   r   _extract_images_from_promptsV   s   z.Idefics2Processor._extract_images_from_promptsrD   textr   r<   r   c              
      s  |d u rd u rt d| jtfd| jji|}|d dd }g }i }|d urt|tr2|g}nt|tsBt|d tsBt d| j	}| j
}	| |	| j  | }
| jjr[|
d }
g }tt| d}|D ])}|||	 ||	|
}|| | | }|| d	|}|| qj| j|fi |d }| j||d
gd || d urZtrggnltttfrtd r|d urt|tkrt d|	 dt| d	|	 dt d	dgtt|   fddtt|D n#gntttfs'td ttfs'td d s't ddd D }|d urC||ksCt d| d| ddd D | jfi |d }|| t||dS )Nz+You must provide either `text` or `images`.tokenizer_init_kwargsr   return_tensorsr   zAInvalid input text. Please provide a string, or a list of strings   z
(?=[^\s<]) image)
modalitieszThe total number of zP tokens in the prompts should be the same as the number of images passed. Found z tokens and z images.c                    s$   g | ]} |  |d    qS )   r   ).0icumsum_images_in_textrD   r   r   
<listcomp>   s    z.Idefics2Processor.__call__.<locals>.<listcomp>zdInvalid input images. Please provide a single image or a list of images or a list of list of images.c                 S      g | ]}t |qS r   )lenrN   sampler   r   r   rR          z!The number of images in the text z and images  z should be the same.c                 S   s   g | ]	}d d |D qS )c                 S   rS   r   )r	   )rN   imr   r   r   rR      rW   z9Idefics2Processor.__call__.<locals>.<listcomp>.<listcomp>r   rU   r   r   r   rR      s    images_kwargs)tensor_type)
ValueError_merge_kwargsr   r;   init_kwargspopr   r   listr2   r,   r*   r:   do_image_splittingrecompileescaper@   countreplacesub_check_special_mm_tokensupdater   tuplesumrT   r   ranger   )r9   rD   rF   r<   output_kwargsrH   n_images_in_textinputsr2   r,   	image_strprompt_stringsclosing_fake_patternrV   text_inputsn_images_in_imagesimage_inputsr   rP   r   __call__b   s   






zIdefics2Processor.__call__)Nr)   N)NN)r#   r$   r%   intr   r8   rE   r   r   r_   r   r   r   r   r   ru   __classcell__r   r   r>   r   r(   :   s(    r(   )!__doc__ra   	itertoolsr   typingr   r   feature_extraction_utilsr   image_utilsr   r   r	   processing_utilsr
   r   r   tokenization_utils_baser   r   utilsr   r   r   
get_loggerr#   loggerboolr   r   r   r(   __all__r   r   r   r   <module>   s&   

 
