o
    	۷i"                     @   s   d Z ddlZddlmZmZ ddlZddlmZ ddl	m
Z
mZmZ ddlmZmZ ddlmZmZmZmZmZmZ dd	lmZ dd
lmZmZ ddlmZ G dd deZeG dd deZdgZ dS )z%Fast Image processor class for OWLv2.    N)OptionalUnion)
functional   )BaseImageProcessorFastBatchFeatureDefaultFastImageProcessorKwargs)group_images_by_shapereorder_images)OPENAI_CLIP_MEANOPENAI_CLIP_STDChannelDimension
ImageInputPILImageResamplingSizeDict)Unpack)
TensorTypeauto_docstring   )OwlViTImageProcessorFastc                   @   s   e Zd ZdS )Owlv2FastImageProcessorKwargsN)__name__
__module____qualname__ r   r   ]/home/ubuntu/vllm_env/lib/python3.10/site-packages/transformers/models/owlv2/modular_owlv2.pyr   -   s    r   c                   @   sH  e Zd ZejZeZeZ	dddZ
dZdZdZdZdZeZdZdZdee fddZed	edee fd
dZd&d	ddeddfddZ	d&d	ed dee deded fddZ		d'dddededdfddZ d	ed dededed dededed ed!ee!eee f  d"ee!eee f  dee d#ee!e"e#f  de$fd$d%Z%dS )(Owlv2ImageProcessorFasti  )heightwidthgp?TNkwargsc                 K   s   t j| fi | d S N)r   __init__)selfr   r   r   r   r!   ?   s   z Owlv2ImageProcessorFast.__init__imagesc                 K   s   t j| |fi |S r    )r   
preprocess)r"   r#   r   r   r   r   r$   B   s   z"Owlv2ImageProcessorFast.preprocess      ?ztorch.Tensorconstant_valuereturnc           
      C   sL   |j dd \}}t||}|| }|| }dd||f}tj|||d}	|	S )z<
        Pad an image with zeros to the given size.
        Nr   )fill)shapemaxFpad)
r"   r#   r&   r   r   size
pad_bottom	pad_rightpaddingpadded_imager   r   r   _pad_imagesF   s   
z#Owlv2ImageProcessorFast._pad_imagesdisable_groupingc                 K   sJ   t ||d\}}i }| D ]\}}	| j|	|d}	|	||< qt||}
|
S )z
        Unlike the Base class `self.pad` where all images are padded to the maximum image size,
        Owlv2 pads an image to square.
        r4   )r&   )r	   itemsr3   r
   )r"   r#   r4   r&   r   grouped_imagesgrouped_images_indexprocessed_images_groupedr*   stacked_imagesprocessed_imagesr   r   r   r-   S   s   

zOwlv2ImageProcessorFast.padimager.   anti_aliasingc                 K   s  |j |jf}|j}t|dd |jt||j }|r}|du r0|d d jdd}n%t|t	| }t
|dk rEtdt
|dk|dk@ rUtd t
|dkr_|}	n dtd|   d }
tj||
d |
d f| d	}	n|}	tj|	|j |jfd
d}|S )az  
        Resize an image as per the original implementation.

        Args:
            image (`Tensor`):
                Image to resize.
            size (`dict[str, int]`):
                Dictionary containing the height and width to resize the image to.
            anti_aliasing (`bool`, *optional*, defaults to `True`):
                Whether to apply anti-aliasing when downsampling the image.
            anti_aliasing_sigma (`float`, *optional*, defaults to `None`):
                Standard deviation for Gaussian kernel when downsampling the image. If `None`, it will be calculated
                automatically.
        r   N   r   )minzFAnti-aliasing standard deviation must be greater than or equal to zerozWAnti-aliasing standard deviation greater than zero but not down-sampling along all axesr   )sigmaF)r.   	antialias)r   r   r*   torchtensortodeviceclamp
atleast_1d	ones_likeany
ValueErrorwarningswarnceilintr,   gaussian_blurtolistresize)r"   r<   r.   r=   anti_aliasing_sigmar   output_shapeinput_shapefactorsfilteredkernel_sizesoutr   r   r   rQ   k   s,   ,zOwlv2ImageProcessorFast.resize	do_resizeinterpolationzF.InterpolationModedo_pad
do_rescalerescale_factordo_normalize
image_mean	image_stdreturn_tensorsc              	   K   s"  t ||d\}}i }| D ]\}}| |||d|	|
}|||< qt||}|r0| j|d|d}t ||d\}}i }| D ]\}}|rR| j|||tjd}|||< q>t||}t ||d\}}i }| D ]\}}| |d|||	|
}|||< qft||}|rtj	|ddn|}t
d|i|d	S )
Nr5   Fr%   )r&   r4   )r<   r.   rZ   input_data_formatr   )dimpixel_values)datatensor_type)r	   r6   rescale_and_normalizer
   r-   rQ   r   FIRSTrB   stackr   )r"   r#   rY   r.   rZ   r[   r\   r]   r^   r_   r`   r4   ra   r   r7   r8   r9   r*   r:   r;   resized_images_groupedresized_stackresized_imagesr   r   r   _preprocess   sF   





z#Owlv2ImageProcessorFast._preprocess)r%   )TN)&r   r   r   r   BILINEARresampler   r_   r   r`   r.   r]   rY   r\   r^   r[   r   valid_kwargs	crop_sizedo_center_cropr   r!   r   r   r$   floatr3   listr   boolr-   r   rQ   r   strr   r   rm   r   r   r   r   r   0   s    


8	
r   )!__doc__rK   typingr   r   rB   torchvision.transforms.v2r   r,   image_processing_utils_fastr   r   r   image_transformsr	   r
   image_utilsr   r   r   r   r   r   processing_utilsr   utilsr   r   #owlvit.image_processing_owlvit_fastr   r   r   __all__r   r   r   r   <module>   s      
4