o
    wi{0                  
   @   s0  d dl Z d dlmZ d dlmZmZmZ d dlmZ d dl	m
Z
mZ d dlmZ ddlmZ dd	lmZmZmZmZ dd
lmZmZmZmZmZmZ erRddlmZ e rYd dlZe rcd dlm Z! n	e rld dl"m Z! dddee#ee# f de$de#def
ddZ%G dd deZ&eG dd deZ'dgZ(dS )    N)Iterable)TYPE_CHECKINGOptionalUnion)BatchFeature)group_images_by_shapereorder_images)BeitImageProcessorFast   )DefaultFastImageProcessorKwargs)IMAGENET_STANDARD_MEANIMAGENET_STANDARD_STDPILImageResamplingSizeDict)
TensorTypeauto_docstringis_torch_availableis_torchvision_availableis_torchvision_v2_availablerequires_backends)DepthEstimatorOutput)
functionalinput_imagetorch.Tensoroutput_sizekeep_aspect_ratiomultiplereturnc                 C   s   ddd}| j dd  \}}|\}}|| }	|| }
|r-td|
 td|	 k r+|
}	n|	}
||	| |d}||
| |d}t||dS )	Nr   c                 S   sP   t | | | }|d ur||krt| | | }||k r&t| | | }|S N)roundmathfloorceil)valr   min_valmax_valx r'   `/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/transformers/models/dpt/modular_dpt.pyconstrain_to_multiple_of>   s   z>get_resize_output_image_size.<locals>.constrain_to_multiple_of   )r   heightwidth)r   N)shapeabsr   )r   r   r   r   r)   input_heightinput_widthoutput_heightoutput_widthscale_heightscale_width
new_height	new_widthr'   r'   r(   get_resize_output_image_size8   s   
r9   c                   @   sN   e Zd ZU dZee ed< ee ed< ee ed< ee ed< ee ed< dS )DPTFastImageProcessorKwargsa  
    ensure_multiple_of (`int`, *optional*, defaults to 1):
        If `do_resize` is `True`, the image is resized to a size that is a multiple of this value. Can be overidden
        by `ensure_multiple_of` in `preprocess`.
    do_pad (`bool`, *optional*, defaults to `False`):
        Whether to apply center padding. This was introduced in the DINOv2 paper, which uses the model in
        combination with DPT.
    size_divisor (`int`, *optional*):
        If `do_pad` is `True`, pads the image dimensions to be divisible by this value. This was introduced in the
        DINOv2 paper, which uses the model in combination with DPT.
    keep_aspect_ratio (`bool`, *optional*, defaults to `False`):
        If `True`, the image is resized to the largest possible size such that the aspect ratio is preserved. Can
        be overidden by `keep_aspect_ratio` in `preprocess`.
    do_reduce_labels (`bool`, *optional*, defaults to `self.do_reduce_labels`):
        Whether or not to reduce all label values of segmentation maps by 1. Usually used for datasets where 0
        is used for background, and background itself is not included in all classes of a dataset (e.g.
        ADE20k). The background label will be replaced by 255.
    ensure_multiple_ofsize_divisordo_padr   do_reduce_labelsN)__name__
__module____qualname____doc__r   int__annotations__boolr'   r'   r'   r(   r:   _   s   
 r:   c                '   @   s  e Zd ZejZeZeZ	dddZ
dZdZdZdZdZdZdZdZdZdZdZeZdd	 Z				d,d
ddedddedee deddfddZ	d-d
ddeddfddZded dedededed dededededed ee eee f  d!ee eee f  dedee d"edee d#ee d$ee e!e"f  de#f&d%d&Z$	d.d'd(d)ee e"ee%eef  df  dee&e!e"f  fd*d+Z'dS )/DPTImageProcessorFasti  r,   TFgp?r+   Nc                   C   s   t d)NzNo need to override this method)NotImplementedErrorr'   r'   r'   r(   	from_dict   s   zDPTImageProcessorFast.from_dictimager   sizeinterpolationzF.InterpolationMode	antialiasr;   r   r   c                 C   sJ   |j r|jstd|  t||j |jf||d}t j||||dS )a<  
        Resize an image to `(size["height"], size["width"])`.

        Args:
            image (`torch.Tensor`):
                Image to resize.
            size (`SizeDict`):
                Dictionary in the format `{"height": int, "width": int}` specifying the size of the output image.
            interpolation (`InterpolationMode`, *optional*, defaults to `InterpolationMode.BILINEAR`):
                `InterpolationMode` filter to use when resizing the image e.g. `InterpolationMode.BICUBIC`.
            antialias (`bool`, *optional*, defaults to `True`):
                Whether to use antialiasing when resizing the image
            ensure_multiple_of (`int`, *optional*):
                If `do_resize` is `True`, the image is resized to a size that is a multiple of this value
            keep_aspect_ratio (`bool`, *optional*, defaults to `False`):
                If `True`, and `do_resize` is `True`, the image is resized to the largest possible size such that the aspect ratio is preserved.

        Returns:
            `torch.Tensor`: The resized image.
        zDThe size dictionary must contain the keys 'height' and 'width'. Got )r   r   r   )rK   rL   )r-   r.   
ValueErrorkeysr9   r	   resize)selfrI   rJ   rK   rL   r;   r   r   r'   r'   r(   rO      s   
zDPTImageProcessorFast.resizer<   c                 C   sN   |j dd \}}dd }|||\}}|||\}}	|||	|f}
t||
S )a  
        Center pad a batch of images to be a multiple of `size_divisor`.

        Args:
            image (`torch.Tensor`):
                Image to pad.  Can be a batch of images of dimensions (N, C, H, W) or a single image of dimensions (C, H, W).
            size_divisor (`int`):
                The width and height of the image will be padded to a multiple of this number.
        r*   Nc                 S   s2   t | | | }||  }|d }|| }||fS )N   )r    r"   )rJ   r<   new_sizepad_sizepad_size_leftpad_size_rightr'   r'   r(   _get_pad   s
   z1DPTImageProcessorFast.pad_image.<locals>._get_pad)r/   Fpad)rP   rI   r<   r-   r.   rV   pad_top
pad_bottompad_left	pad_rightpaddingr'   r'   r(   	pad_image   s   zDPTImageProcessorFast.pad_imageimagesr>   	do_resizedo_center_crop	crop_size
do_rescalerescale_factordo_normalize
image_mean	image_stdr=   disable_groupingreturn_tensorsc              	   K   s   |r|  |}t||d\}}i }| D ]\}}|r%| j|||||d}|||< qt||}t||d\}}i }| D ]"\}}|rI| ||}|rQ| ||}| |||	|
||}|||< q=t||}|rptj	|dd}|S |}|S )N)rh   )rI   rJ   rK   r;   r   r   )dim)
reduce_labelr   itemsrO   r   center_cropr^   rescale_and_normalizetorchstack)rP   r_   r>   r`   rJ   rK   ra   rb   rc   rd   re   rf   rg   r   r;   r=   r<   rh   ri   kwargsgrouped_imagesgrouped_images_indexresized_images_groupedr/   stacked_imagesresized_imagesprocessed_images_groupedprocessed_imagesr'   r'   r(   _preprocess   s>   




z!DPTImageProcessorFast._preprocessoutputsr   target_sizesc                 C   s   t | d |j}|durt|t|krtdg }|du r%dgt| n|}t||D ]"\}}|durGtjjj|	d	d|ddd
 }|d	|i q,|S )
a  
        Converts the raw output of [`DepthEstimatorOutput`] into final depth predictions and depth PIL images.
        Only supports PyTorch.

        Args:
            outputs ([`DepthEstimatorOutput`]):
                Raw outputs of the model.
            target_sizes (`TensorType` or `List[Tuple[int, int]]`, *optional*):
                Tensor of shape `(batch_size, 2)` or list of tuples (`Tuple[int, int]`) containing the target size
                (height, width) of each image in the batch. If left to None, predictions will not be resized.

        Returns:
            `List[Dict[str, TensorType]]`: A list of dictionaries of tensors representing the processed depth
            predictions.
        ro   Nz]Make sure that you pass in as many target sizes as the batch dimension of the predicted depthr   r+   bicubicF)rJ   modealign_cornerspredicted_depth)r   r   lenrM   zipro   nnr   interpolate	unsqueezesqueezeappend)rP   rz   r{   r   resultsdepthtarget_sizer'   r'   r(   post_process_depth_estimation  s"   
z3DPTImageProcessorFast.post_process_depth_estimation)NTr+   F)r+   r   )(r?   r@   rA   r   BICUBICresampler   rf   r   rg   rJ   r`   rc   re   r=   rd   r;   r   r>   rb   ra   r:   valid_kwargsrH   r   rE   r   rC   rO   r^   listfloatr   strr   r   ry   tupledictr   r'   r'   r'   r(   rF   z   s    

+
	

>rF   ))r    collections.abcr   typingr   r   r   "transformers.image_processing_baser   transformers.image_transformsr   r   3transformers.models.beit.image_processing_beit_fastr	   image_processing_utils_fastr   image_utilsr   r   r   r   utilsr   r   r   r   r   r   modeling_outputsr   ro   torchvision.transforms.v2r   rW   torchvision.transformsrC   rE   r9   r:   rF   __all__r'   r'   r'   r(   <module>   sB    

' 
@