o
    ei                     @   sZ   d Z ddlmZ ddlmZmZmZmZmZ ddl	m
Z
mZ eG dd deZdgZdS )z!
Processor class for LayoutLMv3.
   )ProcessorMixin)BatchEncodingPaddingStrategyPreTokenizedInput	TextInputTruncationStrategy)
TensorTypeauto_docstringc                (       s*  e Zd Zd  fdd	Ze																		d!deeB ee B ee B deee B dB d	eee  eeee   B dB d
ee eee  B dB de	de	e
B eB de	e
B eB dedB dededB de	dB de	dB de	de	de	de	de	de
eB dB def&ddZdd Zedd Z  ZS )"LayoutLMv3ProcessorNc                    s   t  || d S )N)super__init__)selfimage_processor	tokenizerkwargs	__class__ r/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/transformers/models/layoutlmv3/processing_layoutlmv3.pyr      s   zLayoutLMv3Processor.__init__TF    text	text_pairboxesword_labelsadd_special_tokenspadding
truncation
max_lengthstridepad_to_multiple_ofreturn_token_type_idsreturn_attention_maskreturn_overflowing_tokensreturn_special_tokens_maskreturn_offsets_mappingreturn_lengthverbosereturn_tensorsreturnc                 K   sD  | j jr|d urtd| j jr|d urtd| j ||d}|d ur7| j jr7|d u r7t|tr3|g}|d }| jdi d|d urB|n|d d|d urM|nd d|d urV|n|d d|d	|d
|d|d|	d|
d|d|d|d|d|d|d|d|d||}|d}|du r| ||d }||d< |S )NzdYou cannot provide bounding boxes if you initialized the image processor with apply_ocr set to True.zaYou cannot provide word labels if you initialized the image processor with apply_ocr set to True.)imagesr'   wordsr   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   pixel_valuesToverflow_to_sample_mappingr   )r   	apply_ocr
ValueError
isinstancestrr   popget_overflowing_images)r   r)   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r   featuresencoded_inputsr   r   r   __call__   sv   

	

zLayoutLMv3Processor.__call__c                 C   sL   g }|D ]	}| ||  qt|t|kr$tdt| dt| |S )Nz`Expected length of images to be the same as the length of `overflow_to_sample_mapping`, but got z and )appendlenr.   )r   r)   r,   images_with_overflow
sample_idxr   r   r   r2   e   s   z*LayoutLMv3Processor.get_overflowing_imagesc                 C   s   g dS )N)	input_idsbboxattention_maskr+   r   )r   r   r   r   model_input_namess   s   z%LayoutLMv3Processor.model_input_names)NN)NNNNTFNNr   NNNFFFFTN)__name__
__module____qualname__r   r	   r   r   listintboolr0   r   r   r   r   r5   r2   propertyr=   __classcell__r   r   r   r   r
      s~    

	

Hr
   N)__doc__processing_utilsr   tokenization_utils_baser   r   r   r   r   utilsr   r	   r
   __all__r   r   r   r   <module>   s   
`