o
    i a                     @   sb  d dl Z d dlmZmZ d dlZd dlZddlmZm	Z	m
Z
 ddlmZmZ ddlmZmZmZmZmZmZmZmZmZmZmZmZmZ ddlmZmZmZm Z  ddl!m"Z" d	d
l#m$Z$ e rgd dl%m&Z&m'Z' e rnd dl%Z%e(e)Z*	ddej+deee,ef  fddZ-	ddedeee,ef  defddZ.defddZ/e"ddG dd deZ0dgZ1dS )    N)OptionalUnion   )BaseImageProcessorBatchFeatureget_size_dict)resizeto_channel_dimension_format)ChannelDimension
ImageInput	ImageTypePILImageResamplingget_image_typeinfer_channel_dimension_formatis_pil_imageis_scaled_imageis_valid_imageis_vision_availableto_numpy_arrayvalid_imagesvalidate_preprocess_arguments)
TensorTypeis_matplotlib_availableloggingrequires_backends)requires   )LightGlueKeypointMatchingOutput)Image	ImageDrawimageinput_data_formatc                 C   s   |t jkr$| jd dkrdS t| d | d ko#t| d | d kS |t jkrH| jd dkr2dS t| d | d	 koGt| d	 | d
 kS d S )Nr   r   Tr   .r   .   ..r   .r   .r%   )r
   FIRSTshapenpallLAST)r    r!    r/   l/home/ubuntu/.local/lib/python3.10/site-packages/transformers/models/lightglue/image_processing_lightglue.pyis_grayscale9   s   
,
,r1   returnc                 C   s   t tdg t| tjrZt| |dr| S |tjkr7| d d | d d  | d d  }tj|gd	 d
d}|S |tj	krX| d d | d d  | d d  }tj|gd	 dd}|S t| t
jjsc| S | d} | S )ao  
    Converts an image to grayscale format using the NTSC formula. Only support numpy and PIL Image. TODO support torch
    and tensorflow grayscale conversion

    This function is supposed to return a 1-channel image, but it returns a 3-channel image with the same value in each
    channel, because of an issue that is discussed in :
    https://github.com/huggingface/transformers/pull/25786#issuecomment-1730176446

    Args:
        image (Image):
            The image to convert.
        input_data_format (`ChannelDimension` or `str`, *optional*):
            The channel dimension format for the input image.
    visionr!   r"   gŏ1w-!?r#   gbX9?r$   gv/?r   r   )axisr'   r(   r)   r&   L)r   convert_to_grayscale
isinstancer,   ndarrayr1   r
   r*   stackr.   PILr   convert)r    r!   
gray_imager/   r/   r0   r7   G   s    
$
$
r7   imagesc                    sh   d}dd  t | tr0t| dkrt fdd| D r| S t fdd| D r0dd	 | D S t|)
N)z-Input images must be a one of the following :z - A pair of PIL images.z - A pair of 3D arrays.z! - A list of pairs of PIL images.z  - A list of pairs of 3D arrays.c                 S   s,   t | pt| ot| tjkot| jdkS )z$images is a PIL Image or a 3D array.r   )r   r   r   r   r;   lenr+   )r    r/   r/   r0   _is_valid_imagev   s   "z8validate_and_format_image_pairs.<locals>._is_valid_imager%   c                 3       | ]} |V  qd S Nr/   .0r    r@   r/   r0   	<genexpr>}       z2validate_and_format_image_pairs.<locals>.<genexpr>c                 3   s<    | ]}t |tot|d kot fdd|D V  qdS )r%   c                 3   rA   rB   r/   rC   rE   r/   r0   rF      rG   z<validate_and_format_image_pairs.<locals>.<genexpr>.<genexpr>N)r8   listr?   r-   )rD   
image_pairrE   r/   r0   rF      s    


c                 S   s   g | ]	}|D ]}|qqS r/   r/   )rD   rI   r    r/   r/   r0   
<listcomp>   s    z3validate_and_format_image_pairs.<locals>.<listcomp>)r8   rH   r?   r-   
ValueError)r>   error_messager/   rE   r0   validate_and_format_image_pairsm   s   
"rM   )torch)backendsc                       s  e Zd ZdZdgZddejdddfdedee	e
ef  ded	ed
ededdf fddZ		d&dejde	e
ef deee
ef  deee
ef  fddZdddddddejdf	dee dee	e
ef  dee d	ee d
ee dee deee
ef  dedeee
ef  defddZ	d'dedeeee f dedee	e
ejf  fddZdedee	e
ejf  ded fd d!Zd"d# Zdedefd$d%Z   Z!S )(LightGlueImageProcessorap  
    Constructs a LightGlue image processor.

    Args:
        do_resize (`bool`, *optional*, defaults to `True`):
            Controls whether to resize the image's (height, width) dimensions to the specified `size`. Can be overridden
            by `do_resize` in the `preprocess` method.
        size (`dict[str, int]` *optional*, defaults to `{"height": 480, "width": 640}`):
            Resolution of the output image after `resize` is applied. Only has an effect if `do_resize` is set to
            `True`. Can be overridden by `size` in the `preprocess` method.
        resample (`PILImageResampling`, *optional*, defaults to `Resampling.BILINEAR`):
            Resampling filter to use if resizing the image. Can be overridden by `resample` in the `preprocess` method.
        do_rescale (`bool`, *optional*, defaults to `True`):
            Whether to rescale the image by the specified scale `rescale_factor`. Can be overridden by `do_rescale` in
            the `preprocess` method.
        rescale_factor (`int` or `float`, *optional*, defaults to `1/255`):
            Scale factor to use if rescaling the image. Can be overridden by `rescale_factor` in the `preprocess`
            method.
        do_grayscale (`bool`, *optional*, defaults to `True`):
            Whether to convert the image to grayscale. Can be overridden by `do_grayscale` in the `preprocess` method.
    pixel_valuesTNgp?	do_resizesizeresample
do_rescalerescale_factordo_grayscaler2   c                    s\   t  jdi | |d ur|nddd}t|dd}|| _|| _|| _|| _|| _|| _d S )Ni  i  )heightwidthFdefault_to_squarer/   )	super__init__r   rR   rS   rT   rU   rV   rW   )selfrR   rS   rT   rU   rV   rW   kwargs	__class__r/   r0   r]      s   

z LightGlueImageProcessor.__init__r    data_formatr!   c                 K   s0   t |dd}t|f|d |d f||d|S )aL  
        Resize an image.

        Args:
            image (`np.ndarray`):
                Image to resize.
            size (`dict[str, int]`):
                Dictionary of the form `{"height": int, "width": int}`, specifying the size of the output image.
            data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format of the output image. If not provided, it will be inferred from the input
                image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
            input_data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format for the input image. If unset, the channel dimension format is inferred
                from the input image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
        FrZ   rX   rY   )rS   rb   r!   )r   r   )r^   r    rS   rb   r!   r_   r/   r/   r0   r      s   zLightGlueImageProcessor.resizereturn_tensorsc                    sp  |dur|n| j }|dur|n| j}|dur|n| j}|dur!|n| j}|dur*|n| j}|dur3|n| j}t|dd}t|}t|sHt	dt
|||||d dd |D }t|d re|retd	 |
du rot|d }
g  |D ]+}|r| j||||
d
}|r| j|||
d}|rt||
d}t||	|
d} | qs fddtdt dD }d|i}t||dS )a   
        Preprocess an image or batch of images.

        Args:
            images (`ImageInput`):
                Image pairs to preprocess. Expects either a list of 2 images or a list of list of 2 images list with
                pixel values ranging from 0 to 255. If passing in images with pixel values between 0 and 1, set
                `do_rescale=False`.
            do_resize (`bool`, *optional*, defaults to `self.do_resize`):
                Whether to resize the image.
            size (`dict[str, int]`, *optional*, defaults to `self.size`):
                Size of the output image after `resize` has been applied. If `size["shortest_edge"]` >= 384, the image
                is resized to `(size["shortest_edge"], size["shortest_edge"])`. Otherwise, the smaller edge of the
                image will be matched to `int(size["shortest_edge"]/ crop_pct)`, after which the image is cropped to
                `(size["shortest_edge"], size["shortest_edge"])`. Only has an effect if `do_resize` is set to `True`.
            resample (`PILImageResampling`, *optional*, defaults to `self.resample`):
                Resampling filter to use if resizing the image. This can be one of `PILImageResampling`, filters. Only
                has an effect if `do_resize` is set to `True`.
            do_rescale (`bool`, *optional*, defaults to `self.do_rescale`):
                Whether to rescale the image values between [0 - 1].
            rescale_factor (`float`, *optional*, defaults to `self.rescale_factor`):
                Rescale factor to rescale the image by if `do_rescale` is set to `True`.
            do_grayscale (`bool`, *optional*, defaults to `self.do_grayscale`):
                Whether to convert the image to grayscale.
            return_tensors (`str` or `TensorType`, *optional*):
                The type of tensors to return. Can be one of:
                    - Unset: Return a list of `np.ndarray`.
                    - `TensorType.TENSORFLOW` or `'tf'`: Return a batch of type `tf.Tensor`.
                    - `TensorType.PYTORCH` or `'pt'`: Return a batch of type `torch.Tensor`.
                    - `TensorType.NUMPY` or `'np'`: Return a batch of type `np.ndarray`.
                    - `TensorType.JAX` or `'jax'`: Return a batch of type `jax.numpy.ndarray`.
            data_format (`ChannelDimension` or `str`, *optional*, defaults to `ChannelDimension.FIRST`):
                The channel dimension format for the output image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                - Unset: Use the channel dimension format of the input image.
            input_data_format (`ChannelDimension` or `str`, *optional*):
                The channel dimension format for the input image. If unset, the channel dimension format is inferred
                from the input image. Can be one of:
                - `"channels_first"` or `ChannelDimension.FIRST`: image in (num_channels, height, width) format.
                - `"channels_last"` or `ChannelDimension.LAST`: image in (height, width, num_channels) format.
                - `"none"` or `ChannelDimension.NONE`: image in (height, width) format.
        NFrZ   zkInvalid image type. Must be of type PIL.Image.Image, numpy.ndarray, torch.Tensor, tf.Tensor or jax.ndarray.)rR   rS   rT   rU   rV   c                 S      g | ]}t |qS r/   r   rC   r/   r/   r0   rJ   4      z6LightGlueImageProcessor.preprocess.<locals>.<listcomp>r   zIt looks like you are trying to rescale already rescaled images. If the input images have pixel values between 0 and 1, set `do_rescale=False` to avoid rescaling them again.)r    rS   rT   r!   )r    scaler!   r4   )input_channel_dimc                       g | ]
} ||d   qS r%   r/   rD   i
all_imagesr/   r0   rJ   O      r%   rQ   )datatensor_type)rR   rT   rU   rV   rW   rS   r   rM   r   rK   r   r   loggerwarning_oncer   r   rescaler7   r	   appendranger?   r   )r^   r>   rR   rS   rT   rU   rV   rW   rc   rb   r!   r_   r    image_pairsrp   r/   rm   r0   
preprocess   sN   :	z"LightGlueImageProcessor.preprocess        outputstarget_sizes	thresholdc                 C   sp  |j jd t|krtdtdd |D stdt|tr*tj||j j	d}n|jd dks8|jd dkr<td|}|j
 }||d	d	ddd }|tj}g }t|j ||jd
d
df |jd
d
df D ]G\}}}	}
|d dk}|d dk}|d | }|d | }|	| }|
| }t||k|d	k}|| }|||  }|| }||||d qn|S )a  
        Converts the raw output of [`KeypointMatchingOutput`] into lists of keypoints, scores and descriptors
        with coordinates absolute to the original image sizes.
        Args:
            outputs ([`KeypointMatchingOutput`]):
                Raw outputs of the model.
            target_sizes (`torch.Tensor` or `list[tuple[tuple[int, int]]]`, *optional*):
                Tensor of shape `(batch_size, 2, 2)` or list of tuples of tuples (`tuple[int, int]`) containing the
                target size `(height, width)` of each image in the batch. This must be the original image size (before
                any processing).
            threshold (`float`, *optional*, defaults to 0.0):
                Threshold to filter out the matches with low scores.
        Returns:
            `list[Dict]`: A list of dictionaries, each dictionary containing the keypoints in the first and second image
            of the pair, the matching scores and the matching indices.
        r   zRMake sure that you pass in as many target sizes as the batch dimension of the maskc                 s   s    | ]	}t |d kV  qdS )r%   N)r?   )rD   target_sizer/   r/   r0   rF   m  s    zILightGlueImageProcessor.post_process_keypoint_matching.<locals>.<genexpr>zTEach element of target_sizes must contain the size (h, w) of each image of the batch)devicer   r%   r&   N)
keypoints0
keypoints1matching_scores)maskr+   r?   rK   r-   r8   rH   rN   tensorr~   	keypointscloneflipreshapetoint32zipmatchesr   logical_andru   )r^   rz   r{   r|   image_pair_sizesr   results	mask_pairkeypoints_pairr   scoresmask0mask1r   r   matches0scores0valid_matchesmatched_keypoints0matched_keypoints1r   r/   r/   r0   post_process_keypoint_matchingU  sF   

&z6LightGlueImageProcessor.post_process_keypoint_matchingr>   keypoint_matching_outputzImage.Imagec                    s  t   dd  D   fddtdt dD }g }t||D ]\}}|d jdd \}}|d jdd \}	}
tjt||	||
 dftjd	}|d |d|d|f< |d |d|	|df< t	
|}t|}|d
 d\}}|d d\}}t|||||d D ]D\}}}}}| |}|j|||| |f|dd |j|d |d |d |d fdd |j|| d |d || d |d fdd q|| q!|S )a  
        Plots the image pairs side by side with the detected keypoints as well as the matching between them.

        Args:
            images (`ImageInput`):
                Image pairs to plot. Same as `LightGlueImageProcessor.preprocess`. Expects either a list of 2
                images or a list of list of 2 images list with pixel values ranging from 0 to 255.
            keypoint_matching_output (List[Dict[str, torch.Tensor]]]):
                A post processed keypoint matching output

        Returns:
            `List[PIL.Image.Image]`: A list of PIL images, each containing the image pairs side by side with the detected
            keypoints as well as the matching between them.
        c                 S   rd   r/   re   rC   r/   r/   r0   rJ     rf   zGLightGlueImageProcessor.visualize_keypoint_matching.<locals>.<listcomp>c                    ri   rj   r/   rk   r>   r/   r0   rJ     ro   r   r%   Nr   r   )dtyper   r   r   )fillrY   black)r   )rM   rv   r?   r   r+   r,   zerosmaxuint8r   	fromarrayr   Drawunbind
_get_colorlineellipseru   )r^   r>   r   rw   r   rI   pair_outputheight0width0height1width1
plot_imageplot_image_pildrawkeypoints0_xkeypoints0_ykeypoints1_xkeypoints1_ykeypoint0_xkeypoint0_ykeypoint1_xkeypoint1_ymatching_scorecolorr/   r   r0   visualize_keypoint_matching  s<    


&"z3LightGlueImageProcessor.visualize_keypoint_matchingc                 C   s*   t dd|  }t d| }d}|||fS )zMaps a score to a color.   r   r   )int)r^   scorergbr/   r/   r0   r     s   
z"LightGlueImageProcessor._get_colorc              	      s  t dt t rddlm} ntdt  dd  D   fddtdt	 dD }t
||D ]\}}|d jdd \}}|d	 jdd \}	}
tt||	||
 d
f}|d d |d|d|f< |d	 d |d|	|df< || |d |d d	\}}|d d	\}}t
|||||d D ]2\}}}}}|j||| g||g|d| ddd |j||ddd |j|| |ddd q|  q3dS )a  
        Plots the image pairs side by side with the detected keypoints as well as the matching between them. Requires
        matplotlib to be installed.

        .. deprecated::
            `plot_keypoint_matching` is deprecated and will be removed in a future version. Use `visualize_keypoint_matching` instead.

        Args:
            images (`ImageInput`):
                Image pairs to plot. Same as `LightGlueImageProcessor.preprocess`. Expects either a list of 2 images or
                a list of list of 2 images list with pixel values ranging from 0 to 255.
            keypoint_matching_output ([`LightGlueKeypointMatchingOutput`]):
                Raw outputs of the model.
        zx`plot_keypoint_matching` is deprecated and will be removed in transformers v. Use `visualize_keypoint_matching` instead.r   Nz@Please install matplotlib to use `plot_keypoint_matching` methodc                 S   rd   r/   re   rC   r/   r/   r0   rJ     rf   zBLightGlueImageProcessor.plot_keypoint_matching.<locals>.<listcomp>c                    ri   rj   r/   rk   r   r/   r0   rJ     ro   r%   r   r   g     o@offr   r   r   RdYlGng?g      ?)r   alpha	linewidthr   )cs)warningswarnFutureWarningr   matplotlib.pyplotpyplotImportErrorrM   rv   r?   r   r+   r,   r   r   imshowr5   r   plotget_cmapitemscattershow)r^   r>   r   pltrw   rI   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r/   r   r0   plot_keypoint_matching  sD   



z.LightGlueImageProcessor.plot_keypoint_matching)NN)ry   )"__name__
__module____qualname____doc__model_input_namesr   BILINEARboolr   dictstrr   floatr]   r,   r9   r   r
   r   r*   r   r   rx   r   rH   tuplerN   Tensorr   r   r   r   r   __classcell__r/   r/   r`   r0   rP      s    	

*	

z
D
6rP   rB   )2r   typingr   r   numpyr,   rN   image_processing_utilsr   r   r   image_transformsr   r	   image_utilsr
   r   r   r   r   r   r   r   r   r   r   r   r   utilsr   r   r   r   utils.import_utilsr   modeling_lightgluer   r;   r   r   
get_loggerr   rr   r9   r   r1   r7   rM   rP   __all__r/   r/   r/   r0   <module>   sH   <


&   
