o
    ei(                  
   @   s  d dl Z d dlmZ d dlmZmZ d dlZddlmZ ddl	m
Z
 ddlmZmZ ddlmZmZmZmZ dd	lmZmZmZ d
dlmZ ddlmZ erTddlmZ d dlm  m  m Z! ddde"ee" B de#de"def
ddZ$eG dd deZ%dgZ&dS )    N)Iterable)TYPE_CHECKINGOptional   )BatchFeature)BaseImageProcessorFast)group_images_by_shapereorder_images)IMAGENET_STANDARD_MEANIMAGENET_STANDARD_STDPILImageResamplingSizeDict)
TensorTypeauto_docstringrequires_backends   )BeitImageProcessorFast   )DPTImageProcessorKwargs)DepthEstimatorOutputinput_imagetorch.Tensoroutput_sizekeep_aspect_ratiomultiplereturnc                 C   s   ddd}| j dd  \}}|\}}|| }	|| }
|r-td|
 td|	 k r+|
}	n|	}
||	| |d}||
| |d}t||dS )	Nr   c                 S   sP   t | | | }|d ur||krt| | | }||k r&t| | | }|S N)roundmathfloorceil)valr   min_valmax_valx r%   a/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/transformers/models/dpt/modular_dpt.pyconstrain_to_multiple_of4   s   z>get_resize_output_image_size.<locals>.constrain_to_multiple_ofr   )r   heightwidth)r   N)shapeabsr   )r   r   r   r   r'   input_heightinput_widthoutput_heightoutput_widthscale_heightscale_width
new_height	new_widthr%   r%   r&   get_resize_output_image_size.   s   
r6   c                &   @   sj  e Zd ZejZeZeZ	dddZ
dZdZdZdZdZdZdZdZdZdZeZ				d*dd	d
eded dededB dedd	fddZ	d+dd	dedd	fddZded	 deded
eded dedededededeee B dB deee B dB dededB d ededB d!edB d"ee B dB de!f&d#d$Z"	d,d%d&d'e ee#eef  B dB dB dee$ee f  fd(d)Z%dS )-DPTImageProcessorFasti  r)   TFgp?r   Nimager   sizeinterpolationztvF.InterpolationMode	antialiasensure_multiple_ofr   r   c                 C   sJ   |j r|jstd|  t||j |jf||d}tj| ||||dS )a<  
        Resize an image to `(size["height"], size["width"])`.

        Args:
            image (`torch.Tensor`):
                Image to resize.
            size (`SizeDict`):
                Dictionary in the format `{"height": int, "width": int}` specifying the size of the output image.
            interpolation (`InterpolationMode`, *optional*, defaults to `InterpolationMode.BILINEAR`):
                `InterpolationMode` filter to use when resizing the image e.g. `InterpolationMode.BICUBIC`.
            antialias (`bool`, *optional*, defaults to `True`):
                Whether to use antialiasing when resizing the image
            ensure_multiple_of (`int`, *optional*):
                If `do_resize` is `True`, the image is resized to a size that is a multiple of this value
            keep_aspect_ratio (`bool`, *optional*, defaults to `False`):
                If `True`, and `do_resize` is `True`, the image is resized to the largest possible size such that the aspect ratio is preserved.

        Returns:
            `torch.Tensor`: The resized image.
        zDThe size dictionary must contain the keys 'height' and 'width'. Got )r   r   r   )r:   r;   )r*   r+   
ValueErrorkeysr6   r   resize)selfr8   r9   r:   r;   r<   r   r   r%   r%   r&   r?   h   s   

zDPTImageProcessorFast.resizesize_divisorc                 C   sN   |j dd \}}dd }|||\}}|||\}}	|||	|f}
t||
S )a  
        Center pad a batch of images to be a multiple of `size_divisor`.

        Args:
            image (`torch.Tensor`):
                Image to pad.  Can be a batch of images of dimensions (N, C, H, W) or a single image of dimensions (C, H, W).
            size_divisor (`int`):
                The width and height of the image will be padded to a multiple of this number.
        r(   Nc                 S   s2   t | | | }||  }|d }|| }||fS )Nr   )r   r    )r9   rA   new_sizepad_sizepad_size_leftpad_size_rightr%   r%   r&   _get_pad   s
   z1DPTImageProcessorFast.pad_image.<locals>._get_pad)r,   tvFpad)r@   r8   rA   r*   r+   rF   pad_top
pad_bottompad_left	pad_rightpaddingr%   r%   r&   	pad_image   s   zDPTImageProcessorFast.pad_imageimagesdo_reduce_labels	do_resizedo_center_crop	crop_size
do_rescalerescale_factordo_normalize
image_mean	image_stddo_paddisable_groupingreturn_tensorsc              	   K   s   |r|  |}t||d\}}i }| D ]\}}|r%| j|||||d}|||< qt||}t||d\}}i }| D ]"\}}|rI| ||}|rQ| ||}| |||	|
||}|||< q=t||}td|i|dS )N)rZ   )r8   r9   r:   r<   r   pixel_values)datatensor_type)	reduce_labelr   itemsr?   r	   center_croprN   rescale_and_normalizer   )r@   rO   rP   rQ   r9   r:   rR   rS   rT   rU   rV   rW   rX   r   r<   rY   rA   rZ   r[   kwargsgrouped_imagesgrouped_images_indexresized_images_groupedr,   stacked_imagesresized_imagesprocessed_images_groupedprocessed_imagesr%   r%   r&   _preprocess   s8   




z!DPTImageProcessorFast._preprocessoutputsr   target_sizesc                 C   s   t | d |j}|durt|t|krtdg }|du r%dgt| n|}t||D ]"\}}|durGtjjj|	d	d|ddd
 }|d	|i q,|S )
a  
        Converts the raw output of [`DepthEstimatorOutput`] into final depth predictions and depth PIL images.
        Only supports PyTorch.

        Args:
            outputs ([`DepthEstimatorOutput`]):
                Raw outputs of the model.
            target_sizes (`TensorType` or `List[Tuple[int, int]]`, *optional*):
                Tensor of shape `(batch_size, 2)` or list of tuples (`Tuple[int, int]`) containing the target size
                (height, width) of each image in the batch. If left to None, predictions will not be resized.

        Returns:
            `List[Dict[str, TensorType]]`: A list of dictionaries of tensors representing the processed depth
            predictions.
        torchNz]Make sure that you pass in as many target sizes as the batch dimension of the predicted depthr   r   bicubicF)r9   modealign_cornerspredicted_depth)r   rr   lenr=   ziprn   nn
functionalinterpolate	unsqueezesqueezeappend)r@   rl   rm   rr   resultsdepthtarget_sizer%   r%   r&   post_process_depth_estimation   s"   
z3DPTImageProcessorFast.post_process_depth_estimation)NTr   F)r   r   )&__name__
__module____qualname__r   BICUBICresampler
   rW   r   rX   r9   rQ   rT   rV   rY   rU   r<   r   rS   rR   rP   r   valid_kwargsr   r   boolintr?   rN   listfloatstrr   r   rk   tupledictr~   r%   r%   r%   r&   r7   U   s    

-
	


=r7   )'r   collections.abcr   typingr   r   rn   image_processing_baser   image_processing_utils_fastr   image_transformsr   r	   image_utilsr
   r   r   r   utilsr   r   r   beit.image_processing_beit_fastr   image_processing_dptr   modeling_outputsr   $torchvision.transforms.v2.functional
transformsv2rv   rG   r   r   r6   r7   __all__r%   r%   r%   r&   <module>   s:   

' 
=