o
    wif                     @   s  d dl mZ d dlmZmZ d dlmZmZmZm	Z	 d dl
ZddlmZmZmZ ddlmZmZmZmZmZ ddlmZmZmZmZmZmZmZmZmZm Z m!Z! dd	l"m#Z# dd
l$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+ ddl,m-Z- e* rwddlm.Z. e' r~d dl/Z/e( rddlm0Z0 e) rd dl1m2Z3 n	d dl4m2Z3 ndZ0e+5e6Z7edddddddddddddddej8fdee9 dee: dee9 dee	e:e;e: f  dee	e:e;e: f  dee9 dee< dee9 dee dee9 dee ded dee	e=e%f  dee fd d!Z>d8d"d#d$ee< d%d#fd&d'Z?d(ee d%e;e fd)d*Z@d+e;d# d%eAe< fd,d-ZBd.e	ejCd#f d/e<d%e;e	ejCd#f  fd0d1ZDG d2d3 d3ed4d5ZEe&G d6d7 d7eZFdS )9    )Iterable)	lru_cachepartial)AnyOptional	TypedDictUnionN   )BaseImageProcessorBatchFeatureget_size_dict)convert_to_rgbget_resize_output_image_sizeget_size_with_aspect_ratiogroup_images_by_shapereorder_images)ChannelDimension
ImageInput	ImageTypeSizeDictget_image_size#get_image_size_for_max_height_widthget_image_typeinfer_channel_dimension_formatmake_flat_list_of_imagesvalidate_kwargsvalidate_preprocess_arguments)Unpack)
TensorTypeauto_docstringis_torch_availableis_torchvision_availableis_torchvision_v2_availableis_vision_availablelogging)is_rocm_platform)PILImageResampling)pil_torch_interpolation_mapping)
functional
   maxsize
do_rescalerescale_factordo_normalize
image_mean	image_stddo_padsize_divisibilitydo_center_crop	crop_size	do_resizesizeresampler&   return_tensorsdata_formatc                 C   sN   t | |||||||||	|
|d |dur|dkrtd|tjkr%tddS )z
    Checks validity of typically used arguments in an `ImageProcessorFast` `preprocess` method.
    Raises `ValueError` if arguments incompatibility is caught.
    )r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   Nptz6Only returning PyTorch tensors is currently supported.z6Only channel first data format is currently supported.)r   
ValueErrorr   FIRST)r,   r-   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9    r=   e/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/transformers/image_processing_utils_fast.py"validate_fast_preprocess_argumentsJ   s&   
r?   tensortorch.Tensoraxisreturnc                 C   s6   |du r|   S z| j |dW S  ty   |  Y S w )zF
    Squeezes a tensor, but only if the axis specified has dim 1.
    N)rB   )squeezer;   )r@   rB   r=   r=   r>   safe_squeezeu   s   rE   valuesc                 C   s   dd t |  D S )zO
    Return the maximum value across all indices of an iterable of values.
    c                 S   s   g | ]}t |qS r=   )max).0values_ir=   r=   r>   
<listcomp>       z&max_across_indices.<locals>.<listcomp>)zip)rF   r=   r=   r>   max_across_indices   s   rM   imagesc                 C   s    t dd | D \}}}||fS )zH
    Get the maximum height and width across all images in a batch.
    c                 S   s   g | ]}|j qS r=   )shaperH   imgr=   r=   r>   rJ      s    z(get_max_height_width.<locals>.<listcomp>)rM   )rN   _
max_height	max_widthr=   r=   r>   get_max_height_width   s   rU   image
patch_sizec                 C   sj   g }t | tjd\}}td||D ]!}td||D ]}| dd||| ||| f }|| qq|S )a6  
    Divides an image into patches of a specified size.

    Args:
        image (`Union[np.array, "torch.Tensor"]`):
            The input image.
        patch_size (`int`):
            The size of each patch.
    Returns:
        list: A list of Union[np.array, "torch.Tensor"] representing the patches.
    )channel_dimr   N)r   r   r<   rangeappend)rV   rW   patchesheightwidthijpatchr=   r=   r>   divide_to_patches   s   "ra   c                   @   s  e Zd ZU ee ed< eeeef  ed< ee ed< ee	d  ed< ee ed< eeeef  ed< ee ed< ee	ee
f  ed	< ee ed
< ee	e
ee
 f  ed< ee	e
ee
 f  ed< ee ed< ee	eef  ed< ee ed< ee	eef  ed< ed ed< ee ed< dS )DefaultFastImageProcessorKwargsr5   r6   default_to_squarer&   F.InterpolationModer7   r3   r4   r,   r-   r.   r/   r0   do_convert_rgbr8   r9   input_data_formattorch.devicedevicedisable_groupingN)__name__
__module____qualname__r   bool__annotations__dictstrintr   floatlistr   r   r=   r=   r=   r>   rb      s$   
 rb   F)totalc                       sX  e Zd ZdZdZdZdZdZdZdZ	dZ
dZdZdZdZdZejZdZdZdgZeZdZdee ddf fddZ		dLd	d
dedddedd
f
ddZe		dLd	d
dee e f de!d dedd
f
ddZ"d	d
de#dd
fddZ$d	d
de%e#e&e# f de%e#e&e# f dd
fddZ'e(dd						dMde!e de!e%e#e)e# f  de!e%e#e)e# f  d e!e d!e!e# d"e!d# defd$d%Z*d&d
d ed!e#dede%e#e)e# f de%e#e)e# f dd
fd'd(Z+d	d
de,e-e f dd
fd)d*Z.d	e/de/fd+d,Z0de,fd-d.Z1d&e/de/fd/d0Z2			dNd	e/d1e!e d2e!e%e-ef  d"e!d# dd
f
d3d4Z3			dNd&e/d1e!e d2e!e%e-ef  d"e!d# de)d
 f
d5d6Z4						dMde!e d7e!e d8e!e de!e%e#e)e# f  de!e%e#e)e# f  d9e!e de,fd:d;Z5												dOd e!e d!e!e# de!e de!e%e#ee# f  de!e%e#ee# f  d<e!e de!e d=e!e d7e!e d>e!e%d?  d@e!e%e-e6f  d9e!e fdAdBZ7d&e/dee de8fdCdDZ9e:d&e/dee de8fdEdFZ;d&e)d
 d<edede!d d=ed7ed ed!e#dede!e%e#e)e# f  de!e%e#e)e# f  dGe!e d@e!e%e-e6f  de8fdHdIZ< fdJdKZ=  Z>S )PBaseImageProcessorFastNTgp?pixel_valueskwargsrC   c                    s   t  jdi | | |}|d| j}|d ur$t||d| jdnd | _|d| j}|d ur8t|ddnd | _| jj	
 D ]}||d }|d urTt| || qAt| |t| |d  qAt| jj	
 | _d S )Nr6   rc   r6   rc   r4   
param_namer=   )super__init__filter_out_unused_kwargspopr6   r   rc   r4   valid_kwargsro   keyssetattrgetattrrt   _valid_kwargs_names)selfrx   r6   r4   keykwarg	__class__r=   r>   r}      s   
zBaseImageProcessorFast.__init__rV   rA   r6   interpolationre   	antialiasc                 K   s   |dur|nt jj}|jr|jrt| dd |j|j}n8|jr-t||jdtj	d}n*|j
rB|jrBt| dd |j
|j}n|jrO|jrO|j|jf}ntd| dtj rgt rg| ||||S t j||||dS )a@  
        Resize an image to `(size["height"], size["width"])`.

        Args:
            image (`torch.Tensor`):
                Image to resize.
            size (`SizeDict`):
                Dictionary in the format `{"height": int, "width": int}` specifying the size of the output image.
            interpolation (`InterpolationMode`, *optional*, defaults to `InterpolationMode.BILINEAR`):
                `InterpolationMode` filter to use when resizing the image e.g. `InterpolationMode.BICUBIC`.

        Returns:
            `torch.Tensor`: The resized image.
        NF)r6   rc   rg   zjSize must contain 'height' and 'width' keys, or 'max_height' and 'max_width', or 'shortest_edge' key. Got .r   r   )FInterpolationModeBILINEARshortest_edgelongest_edger   r6   r   r   r<   rS   rT   r   r\   r]   r;   torchcompileris_compilingr%   compile_friendly_resizeresize)r   rV   r6   r   r   rx   new_sizer=   r=   r>   r      s4   zBaseImageProcessorFast.resizer   c                 C   s   | j tjkr5|  d } tj| |||d} | d } t| dkd| } t| dk d| } |  tj} | S tj| |||d} | S )z{
        A wrapper around `F.resize` so that it is compatible with torch.compile when the image is a uint8 tensor.
           r      r   )	dtyper   uint8rs   r   r   whereroundto)rV   r   r   r   r=   r=   r>   r   #  s   
z.BaseImageProcessorFast.compile_friendly_resizescalec                 K   s   || S )a?  
        Rescale an image by a scale factor. image = image * scale.

        Args:
            image (`torch.Tensor`):
                Image to rescale.
            scale (`float`):
                The scaling factor to rescale pixel values by.

        Returns:
            `torch.Tensor`: The rescaled image.
        r=   )r   rV   r   rx   r=   r=   r>   rescale8  s   zBaseImageProcessorFast.rescalemeanstdc                 K   s   t |||S )a  
        Normalize an image. image = (image - image_mean) / image_std.

        Args:
            image (`torch.Tensor`):
                Image to normalize.
            mean (`torch.Tensor`, `float` or `Iterable[float]`):
                Image mean to use for normalization.
            std (`torch.Tensor`, `float` or `Iterable[float]`):
                Image standard deviation to use for normalization.

        Returns:
            `torch.Tensor`: The normalized image.
        )r   	normalize)r   rV   r   r   rx   r=   r=   r>   r   L  s   z BaseImageProcessorFast.normalizer)   r*   r.   r/   r0   r,   r-   ri   rh   c                 C   sB   |r|rt j||dd|  }t j||dd|  }d}|||fS )N)ri   g      ?F)r   r@   )r   r.   r/   r0   r,   r-   ri   r=   r=   r>   !_fuse_mean_std_and_rescale_factorc  s
   

z8BaseImageProcessorFast._fuse_mean_std_and_rescale_factorrN   c                 C   sR   | j ||||||jd\}}}|r| |jtjd||}|S |r'| ||}|S )z/
        Rescale and normalize images.
        )r.   r/   r0   r,   r-   ri   )r   )r   ri   r   r   r   float32r   )r   rN   r,   r-   r.   r/   r0   r=   r=   r>   rescale_and_normalizet  s   	z,BaseImageProcessorFast.rescale_and_normalizec                 K   s>   |j du s
|jdu rtd|  t||d |d fS )a  
        Center crop an image to `(size["height"], size["width"])`. If the input size is smaller than `crop_size` along
        any edge, the image is padded with 0's and then center cropped.

        Args:
            image (`"torch.Tensor"`):
                Image to center crop.
            size (`dict[str, int]`):
                Size of the output image.

        Returns:
            `torch.Tensor`: The center cropped image.
        Nz=The size dictionary must have keys 'height' and 'width'. Got r\   r]   )r\   r]   r;   r   r   center_crop)r   rV   r6   rx   r=   r=   r>   r     s   z"BaseImageProcessorFast.center_cropc                 C      t |S )a'  
        Converts an image to RGB format. Only converts if the image is of type PIL.Image.Image, otherwise returns the image
        as is.
        Args:
            image (ImageInput):
                The image to convert.

        Returns:
            ImageInput: The converted image.
        )r   )r   rV   r=   r=   r>   r        z%BaseImageProcessorFast.convert_to_rgbc                 C   sB   | j du r|S | j D ]}||v rtd| d || q
|S )zJ
        Filter out the unused kwargs from the kwargs dictionary.
        Nz!This processor does not use the `z ` parameter. It will be ignored.)unused_kwargsloggerwarning_oncer   )r   rx   
kwarg_namer=   r=   r>   r~     s   


z/BaseImageProcessorFast.filter_out_unused_kwargsc                 C   r   )z
        Prepare the images structure for processing.

        Args:
            images (`ImageInput`):
                The input images to process.

        Returns:
            `ImageInput`: The images with a valid nesting.
        )r   )r   rN   r=   r=   r>   _prepare_images_structure  r   z0BaseImageProcessorFast._prepare_images_structurerf   rg   c                 C   s   t |}|tjtjtjfvrtd| |r| |}|tjkr't|}n|tjkr3t	
| }|d u r;t|}|tjkrI|ddd }|d urR||}|S )NzUnsupported input image type    r   r	   )r   r   PILTORCHNUMPYr;   r   r   pil_to_tensorr   
from_numpy
contiguousr   r   LASTpermuter   )r   rV   rf   rg   ri   
image_typer=   r=   r>   _process_image  s    




z%BaseImageProcessorFast._process_imagec                    sj   |  |}t| j|||d t|dkot|d ttf}|r* fdd|D }|S  fdd|D }|S )a4  
        Prepare the input images for processing.

        Args:
            images (`ImageInput`):
                The input images to process.
            do_convert_rgb (`bool`, *optional*):
                Whether to convert the images to RGB.
            input_data_format (`str` or `ChannelDimension`, *optional*):
                The input data format of the images.
            device (`torch.device`, *optional*):
                The device to put the processed images on.

        Returns:
            List[`torch.Tensor`]: The processed images.
        )rf   rg   ri   r   c                    s   g | ]} fd d|D qS )c                       g | ]} |qS r=   r=   rP   process_image_partialr=   r>   rJ     rK   zKBaseImageProcessorFast._prepare_input_images.<locals>.<listcomp>.<listcomp>r=   )rH   nested_listr   r=   r>   rJ     s    z@BaseImageProcessorFast._prepare_input_images.<locals>.<listcomp>c                    r   r=   r=   rP   r   r=   r>   rJ     rK   )r   r   r   len
isinstancert   tuple)r   rN   rf   rg   ri   has_nested_structureprocessed_imagesr=   r   r>   _prepare_input_images  s   

z,BaseImageProcessorFast._prepare_input_imagesr4   rc   r9   c                 K   s   |du ri }|durt d
i t||d}|dur$t d
i t|dd}t|tr-t|}t|tr6t|}|du r=tj}||d< ||d< ||d< ||d< ||d< ||d	< |S )z
        Update kwargs that need further processing before being validated
        Can be overridden by subclasses to customize the processing of kwargs.
        Nry   r4   rz   r6   rc   r/   r0   r9   r=   )r   r   r   rt   r   r   r<   )r   r6   r4   rc   r/   r0   r9   rx   r=   r=   r>   _further_process_kwargs  s&   

z.BaseImageProcessorFast._further_process_kwargsr5   r3   r7   rd   r8   c                 K   s$   t |||||||||	|
||d dS )z@
        validate the kwargs for the preprocess method.
        )r,   r-   r.   r/   r0   r5   r6   r3   r4   r7   r8   r9   N)r?   )r   r,   r-   r.   r/   r0   r5   r6   r3   r4   r7   r8   r9   rx   r=   r=   r>   _validate_preprocess_kwargsC  s   
z2BaseImageProcessorFast._validate_preprocess_kwargsc                 O   s   | j |g|R i |S N)
preprocess)r   rN   argsrx   r=   r=   r>   __call__e  s   zBaseImageProcessorFast.__call__c           	      O   s   t | | jd | jD ]}||t| |d  q|d}|d}|d}| j||||d}| jd
i |}| jd
i | |d}t	|t
tfrQt| n||d< |d |d	 | j|g|R i |S )N)captured_kwargsvalid_processor_keysrf   rg   ri   )rN   rf   rg   ri   r7   r   rc   r9   r=   )r   r   r   
setdefaultr   r   r   r   r   r   rr   r&   r'   _preprocess)	r   rN   r   rx   r   rf   rg   ri   r7   r=   r=   r>   r   h  s"   






z!BaseImageProcessorFast.preprocessrj   c              	   K   s   t ||d\}}i }| D ]\}}|r| j|||d}|||< qt||}t ||d\}}i }| D ]\}}|r@| ||}| ||||	|
|}|||< q4t||}|r]tj|ddn|}td|i|dS )N)rj   )rV   r6   r   r   )dimrw   )datatensor_type)	r   itemsr   r   r   r   r   stackr   )r   rN   r5   r6   r   r3   r4   r,   r-   r.   r/   r0   rj   r8   rx   grouped_imagesgrouped_images_indexresized_images_groupedrO   stacked_imagesresized_imagesprocessed_images_groupedr   r=   r=   r>   r     s&   



z"BaseImageProcessorFast._preprocessc                    s&   t   }|dd  |dd  |S )N_valid_processor_keysr   )r|   to_dictr   )r   encoder_dictr   r=   r>   r     s   
zBaseImageProcessorFast.to_dict)NT)NNNNNN)NNN)NNNNNNNNNNNN)?rk   rl   rm   r7   r/   r0   r6   rc   r4   r5   r3   r,   r-   r.   rf   r8   r   r<   r9   rg   ri   model_input_namesrb   r   r   r   r}   r   rn   r   staticmethodr   rr   r   r   rs   r   r   r   r   r   rt   r   r   rp   rq   r   r   r   r~   r   r   r   r   r   r   r   r   r   r   r   r   __classcell__r=   r=   r   r>   rv      s   
6








%
+	
&	


"'	

,rv   r   )Gcollections.abcr   	functoolsr   r   typingr   r   r   r   numpynpimage_processing_utilsr
   r   r   image_transformsr   r   r   r   r   image_utilsr   r   r   r   r   r   r   r   r   r   r   processing_utilsr   utilsr   r   r    r!   r"   r#   r$   utils.import_utilsr%   r&   r   r'   torchvision.transforms.v2r(   r   torchvision.transforms
get_loggerrk   r   r<   rn   rs   rt   rr   rq   r?   rE   rM   r   rU   arrayra   rb   rv   r=   r=   r=   r>   <module>   s   4$	
	
*

