o
    iwf                     @   s   d dl mZ d dlmZmZ d dlZddlmZm	Z	m
Z
 ddlmZmZmZ ddlmZmZmZmZmZmZmZmZmZmZmZmZ ddlmZmZmZm Z  e rUd dl!Z!e "e#Z$G d	d
 d
eZ%d
gZ&dS )    )Iterable)OptionalUnionN   )BaseImageProcessorBatchFeatureget_size_dict)convert_to_rgbresizeto_channel_dimension_format)OPENAI_CLIP_MEANOPENAI_CLIP_STDChannelDimension
ImageInputPILImageResamplingget_image_sizeinfer_channel_dimension_formatis_scaled_imagemake_flat_list_of_imagesto_numpy_arrayvalid_imagesvalidate_preprocess_arguments)
TensorTypefilter_out_non_signature_kwargsis_vision_availableloggingc                #       s  e Zd ZdZdgZdddejdddddddfdedee	e
ef  d	ed
ededeeef dedeeeee f  deeeee f  dee dee ddf fddZejddfdejdee	e
ef ef d
edeee
ef  deee
ef  dejfddZe ddddddddddddejdfdedee dee	e
ef  d
ee dee dee dee deeeee f  deeeee f  deee
ef  dee deeeeeeef f  dee dedeee
ef  dejjf ddZ			d&dejdeeeeeef f deee
ef  deee
ef  dejf
d d!Z							d'dedee dee dee deee  deee  dee
 dee
 fd"d#Z	d(dejdeeee f deeee f deee
ef  dejf
d$d%Z  Z S ))JanusImageProcessora  
    Constructs a JANUS image processor.

    Args:
        do_resize (`bool`, *optional*, defaults to `True`):
            Whether to resize the image's (height, width) dimensions to the specified `size`. Can be overridden by the
            `do_resize` parameter in the `preprocess` method.
        size (`dict`, *optional*, defaults to `{"height": 384, "width": 384}`):
            Size of the output image after resizing. Can be overridden by the `size` parameter in the `preprocess`
            method.
        min_size (`int`, *optional*, defaults to 14):
            The minimum allowed size for the resized image. Ensures that neither the height nor width
            falls below this value after resizing.
        resample (`PILImageResampling`, *optional*, defaults to `Resampling.BICUBIC`):
            Resampling filter to use if resizing the image. Only has an effect if `do_resize` is set to `True`. Can be
            overridden by the `resample` parameter in the `preprocess` method.
        do_rescale (`bool`, *optional*, defaults to `True`):
            Whether to rescale the image by the specified scale `rescale_factor`. Can be overridden by the
            `do_rescale` parameter in the `preprocess` method.
        rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
            Scale factor to use if rescaling the image. Only has an effect if `do_rescale` is set to `True`. Can be
            overridden by the `rescale_factor` parameter in the `preprocess` method.
        do_normalize (`bool`, *optional*, defaults to `True`):
            Whether to normalize the image. Can be overridden by the `do_normalize` parameter in the `preprocess`
            method. Can be overridden by the `do_normalize` parameter in the `preprocess` method.
        image_mean (`float` or `list[float]`, *optional*, defaults to `IMAGENET_STANDARD_MEAN`):
            Mean to use if normalizing the image. This is a float or list of floats the length of the number of
            channels in the image. Can be overridden by the `image_mean` parameter in the `preprocess` method. Can be
            overridden by the `image_mean` parameter in the `preprocess` method.
        image_std (`float` or `list[float]`, *optional*, defaults to `IMAGENET_STANDARD_STD`):
            Standard deviation to use if normalizing the image. This is a float or list of floats the length of the
            number of channels in the image. Can be overridden by the `image_std` parameter in the `preprocess` method.
            Can be overridden by the `image_std` parameter in the `preprocess` method.
        do_convert_rgb (`bool`, *optional*, defaults to `True`):
            Whether to convert the image to RGB.
        do_pad (`bool`, *optional*, defaults to `True`):
            Whether to pad the image to square or not.
    pixel_valuesTN   gp?	do_resizesizemin_sizeresample
do_rescalerescale_factordo_normalize
image_mean	image_stddo_convert_rgbdo_padreturnc                    s   t  jdi | |d ur|nddd}t|dd}|| _|| _|| _|| _|| _|| _|d ur2|nt	| _
|	d ur;|	nt| _|
| _|| _|| _|d u rPd| _d S tdd |D | _d S )	Ni  )heightwidthTdefault_to_square)   r/   r/   c                 s   s    | ]	}t |d  V  qdS )   N)int).0x r4   d/home/ubuntu/.local/lib/python3.10/site-packages/transformers/models/janus/image_processing_janus.py	<genexpr>   s    z/JanusImageProcessor.__init__.<locals>.<genexpr>r4   )super__init__r   r   r    r"   r#   r$   r%   r   r&   r   r'   r(   r)   r!   background_colortuple)selfr   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   kwargs	__class__r4   r5   r8   _   s"   
zJanusImageProcessor.__init__imagedata_formatinput_data_formatc                 K   s   |du rt |}t||\}}t||}	t|dd}|d |d kr0td|d  d|d  |d }||	 }
tt||
 | jtt||
 | jg}t|f||||d|}|S )	an  
        Resize an image to dynamically calculated size.

        Args:
            image (`np.ndarray`):
                Image to resize.
            size (`dict[str, int]` or `int`):
                The size to resize the image to. If a dictionary, it should have the keys `"height"` and `"width"`.
            resample (`PILImageResampling`, *optional*, defaults to `PILImageResampling.BICUBIC`):
                `PILImageResampling` filter to use when resizing the image e.g. `PILImageResampling.BICUBIC`.
            data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format for the output image. If unset, the channel dimension format of the input
                image is used. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                - `None`: will be inferred from input
            input_data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format for the input image. If unset, the channel dimension format is inferred
                from the input image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.

        Returns:
            `np.ndarray`: The resized image.
        NTr-   r+   r,   z5Output height and width must be the same. Got height=z and width=)r    r"   r@   rA   )r   r   maxr   
ValueErrorr1   r!   r
   )r;   r?   r    r"   r@   rA   r<   r+   r,   max_sizedeltaoutput_size_nonpaddedr4   r4   r5   r
      s2   #
zJanusImageProcessor.resizeimagesreturn_tensorsr9   c              
      s  |dur|nj }durnj|dur|nj}dur!nj|dur*|nj}dur3njdur<nj|durE|nj}|durN|nj} durW nj	 dur`nj
tdd|}t|}t|sztdt|||d |rdd |D }dd |D }|rt|d	 rtd
 du rt|d	 |rfdd|D }|rȇ fdd|D }|rՇfdd|D }|rfdd|D }fdd|D }td|i|
d}|S )a`  
        Preprocess an image or batch of images.

        Args:
            images (`ImageInput`):
                Image to preprocess. Expects a single or batch of images with pixel values ranging from 0 to 255. If
                passing in images with pixel values between 0 and 1, set `do_rescale=False`.
            do_resize (`bool`, *optional*, defaults to `self.do_resize`):
                Whether to resize the image.
            size (`dict[str, int]`, *optional*, defaults to `self.size`):
                Controls the size of the image after `resize`. The shortest edge of the image is resized to
                `size["shortest_edge"]` whilst preserving the aspect ratio. If the longest edge of this resized image
                is > `int(size["shortest_edge"] * (1333 / 800))`, then the image is resized again to make the longest
                edge equal to `int(size["shortest_edge"] * (1333 / 800))`.
            resample (`PILImageResampling`, *optional*, defaults to `self.resample`):
                Resampling filter to use if resizing the image. Only has an effect if `do_resize` is set to `True`.
            do_rescale (`bool`, *optional*, defaults to `self.do_rescale`):
                Whether to rescale the image values between [0 - 1].
            rescale_factor (`float`, *optional*, defaults to `self.rescale_factor`):
                Rescale factor to rescale the image by if `do_rescale` is set to `True`.
            do_normalize (`bool`, *optional*, defaults to `self.do_normalize`):
                Whether to normalize the image.
            image_mean (`float` or `list[float]`, *optional*, defaults to `self.image_mean`):
                Image mean to normalize the image by if `do_normalize` is set to `True`.
            image_std (`float` or `list[float]`, *optional*, defaults to `self.image_std`):
                Image standard deviation to normalize the image by if `do_normalize` is set to `True`.
            do_convert_rgb (`bool`, *optional*, defaults to `self.do_convert_rgb`):
                Whether to convert the image to RGB.
            background_color (`tuple[int, int, int]`):
                The background color to use for the padding.
            do_pad (`bool`, *optional*, defaults to `self.do_pad`):
                Whether to pad the image to square or not.
            return_tensors (`str` or `TensorType`, *optional*):
                The type of tensors to return. Can be one of:
                    - Unset: Return a list of `np.ndarray`.
                    - `TensorType.TENSORFLOW` or `'tf'`: Return a batch of type `tf.Tensor`.
                    - `TensorType.PYTORCH` or `'pt'`: Return a batch of type `torch.Tensor`.
                    - `TensorType.NUMPY` or `'np'`: Return a batch of type `np.ndarray`.
                    - `TensorType.JAX` or `'jax'`: Return a batch of type `jax.numpy.ndarray`.
            data_format (`ChannelDimension` or `str`, *optional*, defaults to `ChannelDimension.FIRST`):
                The channel dimension format for the output image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                - Unset: Use the channel dimension format of the input image.
            input_data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format for the input image. If unset, the channel dimension format is inferred
                from the input image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
        NFr-   zkInvalid image type. Must be of type PIL.Image.Image, numpy.ndarray, torch.Tensor, tf.Tensor or jax.ndarray.)r#   r$   r%   r&   r'   r   r    r"   c                 S      g | ]}t |qS r4   )r	   r2   r?   r4   r4   r5   
<listcomp>,      z2JanusImageProcessor.preprocess.<locals>.<listcomp>c                 S   rI   r4   )r   rJ   r4   r4   r5   rK   /  rL   r   zIt looks like you are trying to rescale already rescaled images. If the input images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again.c                    s   g | ]}j | d qS ))r?   r    r"   rA   )r
   rJ   )rA   r"   r;   r    r4   r5   rK   <      c                    s   g | ]
}j | d qS ))r?   r9   rA   )pad_to_squarerJ   )r9   rA   r;   r4   r5   rK   C  s    c                    s   g | ]
}j | d qS ))r?   scalerA   )rescalerJ   )rA   r$   r;   r4   r5   rK   M  s    c                    s   g | ]}j | d qS )r?   meanstdrA   )	normalizerJ   )r&   r'   rA   r;   r4   r5   rK   S  rM   c                    s   g | ]	}t | d qS )input_channel_dim)r   rJ   )r@   rA   r4   r5   rK   X  s    r   datatensor_type)r   r"   r#   r$   r%   r&   r'   r(   r)   r9   r    r   fetch_imagesr   r   rC   r   r   loggerwarning_oncer   r   )r;   rG   r   r    r"   r#   r$   r%   r&   r'   rH   r(   r9   r)   r@   rA   encoded_outputsr4   )	r9   r@   r&   r'   rA   r"   r$   r;   r    r5   
preprocess   st   F
	zJanusImageProcessor.preprocessr   c                 C   s  t ||\}}|tjkr|jd n|jd }||kr*|dur&t|||}|S |}|S t||}t|tr8|g}nt||krFt	d| d|tjkrt
j|||f|jd}	t|D ]\}
}||	|
ddddf< qZ||kr|| d }||	dd||| ddf< |	S || d }||	dddd||| f< |	S t
j|||f|jd}	t|D ]\}
}||	dddd|
f< q||kr|| d }||	||| ddddf< |	S || d }||	dd||| ddf< |	S )a}  
        Pads an image to a square based on the longest edge.

        Args:
            image (`np.ndarray`):
                The image to pad.
            background_color (`int` or `tuple[int, int, int]`, *optional*, defaults to 0):
                The color to use for the padding. Can be an integer for single channel or a
                tuple of integers representing for multi-channel images. If passed as integer
                in multi-channel mode, it will default to `0` in subsequent channels.
            data_format (`str` or `ChannelDimension`, *optional*):
                The channel dimension format for the output image. Can be one of:
                    - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                    - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                If unset, will use same as the input image.
            input_data_format (`str` or `ChannelDimension`, *optional*):
                The channel dimension format for the input image. Can be one of:
                    - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                    - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.

        Returns:
            `np.ndarray`: The padded image.
        r   Nz(background_color must have no more than z) elements to match the number of channels)dtype   )r   r   FIRSTshaper   rB   
isinstancer1   lenrC   npzerosr`   	enumerate)r;   r?   r9   r@   rA   r+   r,   num_channelsmax_dimresulticolorstartr4   r4   r5   rN   `  sL   




z!JanusImageProcessor.pad_to_squarec	                 C   sR  |dur|n| j }|du rd| j n|}|dur|n| j}|dur#|n| j}|dur,|n| j}t|}t|d tjjrHt	|dkrD|S |d S |du rRt
|d }g }	|D ]@}
t|
}
|rg| j|
|||d}
|r{| j|
||d}
|
ddtj}
|r|r|dkrt|
tj|d	}
tj|
}
|	|
 qVd
|	i}|dkr|nd}t||dS )znApplies post-processing to the decoded image tokens by reversing transformations applied during preprocessing.Ng      ?r      )r?   r&   r'   rA   )rO   rA   r0   zPIL.Image.ImagerU   r   rW   )r#   r$   r%   r&   r'   r   rd   PILImagere   r   r   unnormalizerP   clipastyperf   uint8r   r   LAST	fromarrayappendr   )r;   rG   r#   r$   r%   r&   r'   rA   rH   r   r?   rX   r4   r4   r5   postprocess  s6   zJanusImageProcessor.postprocessc                 C   s   d}t |trt||krtd| dt| n|g| }t |tr7t||kr6td| dt| n|g| }tdd t||D }tdd |D }| j||||d}|S )	a~  
        Unnormalizes `image` using the mean and standard deviation specified by `mean` and `std`.
        image = (image * image_std) + image_mean
        Args:
            image (`torch.Tensor` of shape `(batch_size, num_channels, image_size, image_size)` or `(num_channels, image_size, image_size)`):
                Batch of pixel values to postprocess.
            image_mean (`float` or `Iterable[float]`):
                The mean to use for unnormalization.
            image_std (`float` or `Iterable[float]`):
                The standard deviation to use for unnormalization.
            input_data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format for the input image. If unset, the channel dimension format is inferred
                from the input image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
        r   zmean must have z$ elements if it is an iterable, got zstd must have c                 s   s    | ]
\}}| | V  qd S Nr4   )r2   rR   rS   r4   r4   r5   r6     s    z2JanusImageProcessor.unnormalize.<locals>.<genexpr>c                 s   s    | ]}d | V  qdS )ro   Nr4   )r2   rS   r4   r4   r5   r6     s    rQ   )rd   r   re   rC   r:   ziprT   )r;   r?   r&   r'   rA   ri   rev_image_meanrev_image_stdr4   r4   r5   rr     s"   



zJanusImageProcessor.unnormalize)r   NN)NNNNNNNrz   )!__name__
__module____qualname____doc__model_input_namesr   BICUBICboolr   dictstrr1   r   floatlistr8   rf   ndarrayr   r
   r   rb   r   r   r:   rp   rq   r^   rN   ry   r   rr   __classcell__r4   r4   r=   r5   r   5   s4   '
	
(
A	
 
M

	
8r   )'collections.abcr   typingr   r   numpyrf   image_processing_utilsr   r   r   image_transformsr	   r
   r   image_utilsr   r   r   r   r   r   r   r   r   r   r   r   utilsr   r   r   r   rp   
get_loggerr~   r[   r   __all__r4   r4   r4   r5   <module>   s   8
   
Y