o
    iF                     @   sT  d dl mZ d dlmZmZmZ d dlZd dlZd dlm	Z	 d dl
mZ ddlmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZmZmZ ddlmZ ddlmZ ddlmZ ddlm Z  eeddG dd deZ!G dd de	j"Z#dd Z$dIddZ%dej&de'dej&fdd Z(	!dJd"e	j"d#ej&d$ej&d%ej&d&eej& d'e)d(e)d)ee fd*d+Z*G d,d- d-e	j"Z+G d.d/ d/e	j"Z,G d0d1 d1e	j"Z-d2ej&d3ej&d4ej&dej&fd5d6Z.G d7d8 d8e	j"Z/G d9d: d:e	j"Z0eG d;d< d<eZ1d=ej&d>e)de2ej&ej&f fd?d@Z3dAej&dBe'dCe'dej&fdDdEZ4edFdG dGdH dHe1Z5d<dHgZ6dS )K    )	dataclass)CallableOptionalUnionN)nnpad_sequence   )ACT2FN)FlashAttentionKwargs)ALL_ATTENTION_FUNCTIONSPreTrainedModel)Unpack)ModelOutputTransformersKwargsauto_docstring)deprecate_kwarg)can_return_tuple   )AutoModelForKeypointDetection   )LightGlueConfiga  
    Base class for outputs of LightGlue keypoint matching models. Due to the nature of keypoint detection and matching,
    the number of keypoints is not fixed and can vary from image to image, which makes batching non-trivial. In the
    batch of images, the maximum number of matches is set as the dimension of the matches and matching scores. The mask
    tensor is used to indicate which values in the keypoints, matches, matching_scores and prune tensors are keypoint
    matching information.
    )custom_introc                   @   s   e Zd ZU dZdZeej ed< dZ	eej ed< dZ
eej ed< dZeej ed< dZeej ed< dZeej ed< dZeeej  ed	< dZeeej  ed
< dS )LightGlueKeypointMatchingOutputa  
    loss (`torch.FloatTensor` of shape `(1,)`, *optional*):
        Loss computed during training.
    matches (`torch.FloatTensor` of shape `(batch_size, 2, num_matches)`):
        Index of keypoint matched in the other image.
    matching_scores (`torch.FloatTensor` of shape `(batch_size, 2, num_matches)`):
        Scores of predicted matches.
    keypoints (`torch.FloatTensor` of shape `(batch_size, num_keypoints, 2)`):
        Absolute (x, y) coordinates of predicted keypoints in a given image.
    prune (`torch.IntTensor` of shape `(batch_size, num_keypoints)`):
        Pruning mask indicating which keypoints are removed and at which layer.
    mask (`torch.BoolTensor` of shape `(batch_size, num_keypoints)`):
        Mask indicating which values in matches, matching_scores, keypoints and prune are keypoint matching
        information.
    hidden_states (`Tuple[torch.FloatTensor, ...]`, *optional*):
        Tuple of `torch.FloatTensor` (one for the output of each stage) of shape `(batch_size, 2, num_channels,
        num_keypoints)` returned when `output_hidden_states=True` is passed or when
        `config.output_hidden_states=True`
    attentions (`Tuple[torch.FloatTensor, ...]`, *optional*):
        Tuple of `torch.FloatTensor` (one for each layer) of shape `(batch_size, 2, num_heads, num_keypoints,
        num_keypoints)` returned when `output_attentions=True` is passed or when
        `config.output_attentions=True`
    Nlossmatchesmatching_scores	keypointsprunemaskhidden_states
attentions)__name__
__module____qualname____doc__r   r   torchFloatTensor__annotations__r   r   r   r   	IntTensorr   r    tupler!    r+   r+   d/home/ubuntu/.local/lib/python3.10/site-packages/transformers/models/lightglue/modeling_lightglue.pyr   '   s   
 r   c                       s\   e Zd Zdef fddZ	d
dejdee de	e
ej e
ejejf f fdd	Z  ZS )LightGluePositionalEncoderconfigc                    s,   t    tjd|j|j d dd| _d S )Nr   Fbias)super__init__r   Lineardescriptor_dimnum_attention_heads	projectorselfr.   	__class__r+   r,   r2   U   s   
"z#LightGluePositionalEncoder.__init__Fr   output_hidden_statesreturnc                 C   sN   |  |}|jddd}t|}t|}||f}|r"||f}|S |f}|S )Nr   dim)r6   repeat_interleaver&   cossin)r8   r   r;   projected_keypoints
embeddingscosinessinesoutputr+   r+   r,   forwardY   s   


z"LightGluePositionalEncoder.forwardF)r"   r#   r$   r   r2   r&   Tensorr   boolr   r*   rH   __classcell__r+   r+   r9   r,   r-   T   s    r-   c                 C   sB   | dd d df }| ddd df }t j| |gddd}|S )N.r   r   r=   r>   )r&   stackflatten)xx1x2rot_xr+   r+   r,   rotate_halfe   s   rT   c           	      C   sj   | j }|  } | }||}||}| | t| |  }|| t||  }|j|d|j|dfS )a  Applies Rotary Position Embedding to the query and key tensors.

    Args:
        q (`torch.Tensor`): The query tensor.
        k (`torch.Tensor`): The key tensor.
        cos (`torch.Tensor`): The cosine part of the rotary embedding.
        sin (`torch.Tensor`): The sine part of the rotary embedding.
        position_ids (`torch.Tensor`, *optional*):
            Deprecated and unused.
        unsqueeze_dim (`int`, *optional*, defaults to 1):
            The 'unsqueeze_dim' argument specifies the dimension along which to unsqueeze cos[position_ids] and
            sin[position_ids] so that they can be properly broadcasted to the dimensions of q and k. For example, note
            that cos[position_ids] and sin[position_ids] have the shape [batch_size, seq_len, head_dim]. Then, if q and
            k have the shape [batch_size, heads, seq_len, head_dim], then setting unsqueeze_dim=1 makes
            cos[position_ids] and sin[position_ids] broadcastable to the shapes of q and k. Similarly, if q and k have
            the shape [batch_size, seq_len, heads, head_dim], then set unsqueeze_dim=2.
    Returns:
        `tuple(torch.Tensor)` comprising of the query and key tensors rotated using the Rotary Position Embedding.
    dtype)rV   float	unsqueezerT   to)	qkrA   rB   position_idsunsqueeze_dimrV   q_embedk_embedr+   r+   r,   apply_rotary_pos_embm   s   

r`   r    n_repr<   c                 C   s^   | j \}}}}|dkr| S | dddddddddf |||||} | ||| ||S )z
    This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
    r   N)shapeexpandreshape)r    ra   batchnum_key_value_headsslenhead_dimr+   r+   r,   	repeat_kv   s
   0ri           modulequerykeyvalueattention_maskscalingdropoutkwargsc                 K   s   t || j}t || j}	t||dd| }
|d ur3|d d d d d d d |jd f }|
| }
tjj|
dtj	d
|j}
tjj|
|| jd}
t|
|	}|dd }||
fS )Nr   r	   rM   r=   )r?   rV   )ptrainingr   )ri   num_key_value_groupsr&   matmul	transposerb   r   
functionalsoftmaxfloat32rY   rV   rq   rt   
contiguous)rk   rl   rm   rn   ro   rp   rq   rr   
key_statesvalue_statesattn_weightscausal_maskattn_outputr+   r+   r,   eager_attention_forward   s   
&r   c                       s   e Zd ZdZdedef fddZedddd		
	
	
	
ddej	de
eej	ej	f  de
ej	 de
ej	 de
ej	 dee deej	e
ej	 f fddZ  ZS )LightGlueAttentionz=Multi-headed attention from 'Attention Is All You Need' paperr.   	layer_idxc                    s   t    || _|| _t|d|j|j | _|j|j | _	| jd | _
|j| _d| _tj|j|j| j |jd| _tj|j|j| j |jd| _tj|j|j| j |jd| _tj|j| j |j|jd| _d S )Nrh   g      Tr/   )r1   r2   r.   r   getattrhidden_sizer5   rh   rf   ru   rp   attention_dropout	is_causalr   r3   attention_biasq_projk_projv_projo_projr8   r.   r   r9   r+   r,   r2      s(   
zLightGlueAttention.__init__past_key_valuepast_key_valuesz4.58)new_nameversionNr    position_embeddingsro   encoder_hidden_statesencoder_attention_maskrr   r<   c                 K   s"  |j d d }g |d| jR }| ||dd}	|d u}
|
r%|n|}|
r+|n|}| ||dd}| ||dd}|d urV|\}}t|	|||\}	}t}| j	j
dkrdt| j	j
 }|| |	|||f| jspdn| j| jd|\}}|jg |dR   }| |}||fS )Nr=   r   r   eagerrj   )rq   rp   )rb   rh   r   viewrw   r   r   r`   r   r.   _attn_implementationr   rt   r   rp   rd   r{   r   )r8   r    r   ro   r   r   rr   input_shapehidden_shapequery_statesis_cross_attentioncurrent_statescurrent_attention_maskr|   r}   rA   rB   attention_interfacer   r~   r+   r+   r,   rH      s:   


zLightGlueAttention.forward)NNNN)r"   r#   r$   r%   r   intr2   r   r&   rJ   r   r*   r   r   rH   rL   r+   r+   r9   r,   r      s.    r   c                       8   e Zd Zdef fddZdejdejfddZ  ZS )LightGlueMLPr.   c                    sV   t    || _t|j | _t|j|j| _	t|j|j
| _tj|jdd| _d S )NT)elementwise_affine)r1   r2   r.   r
   
hidden_actactivation_fnr   r3   intermediate_sizefc1r   fc2	LayerNorm
layer_normr7   r9   r+   r,   r2      s   
zLightGlueMLP.__init__r    r<   c                 C   s,   |  |}| |}| |}| |}|S N)r   r   r   r   )r8   r    r+   r+   r,   rH     s
   



zLightGlueMLP.forward	r"   r#   r$   r   r2   r&   rJ   rH   rL   r+   r+   r9   r,   r      s    r   c                       s|   e Zd Zdedef fddZ		ddejdejdejd	ee	 d
ee	 de
ejee
ej  ee
ej  f fddZ  ZS )LightGlueTransformerLayerr.   r   c                    s:   t    t||| _t|| _t||| _t|| _d S r   )r1   r2   r   self_attentionr   self_mlpcross_attention	cross_mlpr   r9   r+   r,   r2     s
   

z"LightGlueTransformerLayer.__init__Fdescriptorsr   ro   r;   output_attentionsr<   c                 C   sX  |rdnd }|r
dnd }|r||f }|j \}}	}
| j||||d\}}tj||gdd}| |}|| }|r<||f}|dd|	|
d||	|
}|d urb|dddd|	d|dd|	nd }| j||||d\}}tj||gdd}| |}|| }|r||f}||||	|
f | |||	|
f | }|r||f |f }|||fS )Nr+   )r   ro   r   r=   r>   r   r   )r   r   r   )	rb   r   r&   catr   rd   flipr   r   )r8   r   r   ro   r;   r   all_hidden_statesall_attentions
batch_sizenum_keypointsr4   attention_outputself_attentionsintermediate_statesoutput_statesself_attention_descriptorsself_attention_hidden_statesr   r   cross_attention_outputcross_attentionscross_intermediate_statescross_output_statescross_attention_hidden_statesr+   r+   r,   rH     s`   


	
$


z!LightGlueTransformerLayer.forward)FF)r"   r#   r$   r   r   r2   r&   rJ   r   rK   r*   rH   rL   r+   r+   r9   r,   r   
  s"    "r   
similaritymatchability0matchability1c           
      C   s   | j \}}}tj|tj|dd }tj| d}tj| dd ddd}| ||d |d fd}	|| | |	ddd|d|f< tj|d |	dddddf< tj|d |	dddddf< |	S )z;create the log assignment matrix from logits and similarityr   r   r=   rM   r   N)	rb   r   rx   
logsigmoidrw   log_softmaxr{   new_fullsqueeze)
r   r   r   r   num_keypoints_0num_keypoints_1certaintiesscores0scores1scoresr+   r+   r,   sigmoid_log_double_softmax]  s    ""&&r   c                       sT   e Zd Zdef fddZdejdejdejfddZdejdejfd	d
Z  Z	S )LightGlueMatchAssignmentLayerr.   c                    s@   t    |j| _tj| j| jdd| _tj| jddd| _d S )NTr/   r   )r1   r2   r4   r   r3   final_projectionmatchabilityr7   r9   r+   r,   r2   m  s   
z&LightGlueMatchAssignmentLayer.__init__r   r   r<   c                 C   s2  |j \}}}| |}|tj| j|jdd  }||d d||}|d d df }|d d df }||dd }	|d urr||d d|}|d d df d}
|d d df ddd}|
| }|		|dkt
|	jj}	| |}||d d|d}|d d df }|d d df }t|	||}|S )Ndeviceg      ?r   r   r   r=   rM   )rb   r   r&   tensorr4   r   rd   rw   rX   masked_fillfinforV   minr   r   )r8   r   r   r   r   r4   m_descriptorsm_descriptors0m_descriptors1r   mask0mask1r   matchability_0matchability_1r   r+   r+   r,   rH   t  s&   

z%LightGlueMatchAssignmentLayer.forwardc                 C   s    |  |}tj|d}|S )z0Get matchability of descriptors as a probabilityr=   )r   r   rx   sigmoidr   )r8   r   r   r+   r+   r,   get_matchability  s   
z.LightGlueMatchAssignmentLayer.get_matchability)
r"   r#   r$   r   r2   r&   rJ   rH   r   rL   r+   r+   r9   r,   r   l  s    r   c                       r   )LightGlueTokenConfidenceLayerr.   c                    s   t    t|jd| _d S Nr   )r1   r2   r   r3   r4   tokenr7   r9   r+   r,   r2     s   
z&LightGlueTokenConfidenceLayer.__init__r   r<   c                 C   s$   |  | }tj|d}|S )Nr=   )r   detachr   rx   r   r   )r8   r   r   r+   r+   r,   rH     s   z%LightGlueTokenConfidenceLayer.forwardr   r+   r+   r9   r,   r     s    r   c                   @   s.   e Zd ZU dZeed< dZdZdZdZ	dZ
dS )LightGluePreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    r.   	lightgluepixel_valuesFTN)r"   r#   r$   r%   r   r(   base_model_prefixmain_input_namesupports_gradient_checkpointing_supports_flash_attn_supports_sdpar+   r+   r+   r,   r     s   
 r   r   	thresholdc                 C   sh  | j \}}}| ddddddf d}| ddddddf d}|j}|j}tj|j d |jdd }tj|j d |jdd }	||d|k}
|	|d|k}|j }|	d}t
|
||}t
||d||}|
||k@ }||d|@ }t
||d}t
||d}t||gdd|d d}t||gdd|d d}||fS )z1obtain matches from a score matrix [Bx M+1 x N+1]Nr=   r   r   r   r   )rb   maxindicesr&   aranger   gathervaluesexp
new_tensorwhererN   rw   rd   )r   r   r   _max0max1matches0matches1indices0indices1mutual0mutual1zeromatching_scores0matching_scores1valid0valid1r   r   r+   r+   r,   get_matches_from_scores  s(     

""r  r   heightwidthc                 C   sV   t j||g| j| jdd }|d }|djd }| |ddddf  |d  } | S )a  
    Normalize keypoints locations based on image image_shape

    Args:
        keypoints (`torch.Tensor` of shape `(batch_size, num_keypoints, 2)`):
            Keypoints locations in (x, y) format.
        height (`int`):
            Image height.
        width (`int`):
            Image width.

    Returns:
        Normalized keypoints locations of shape (`torch.Tensor` of shape `(batch_size, num_keypoints, 2)`).
    r   rV   Nr   r=   .).NN)r&   r   r   rV   r   r   )r   r  r	  sizeshiftscaler+   r+   r,   normalize_keypoints  s
   r  zV
    LightGlue model taking images as inputs and outputting the matching of them.
    c                       s  e Zd ZdZdef fddZdedefddZ		d/d
e	j
de	j
dee dee	j
ee	j
e	j
f f fddZde	j
dede	j
de	j
de	j
f
ddZd0ddZde	j
de	j
dede	j
fddZd
e	j
de	j
de	j
de	j
de	j
de	j
defddZdd  Zde	j
d!e	j
d"e	j
d#e	j
dee	j
e	j
f f
d$d%Z			d1de	j
d
e	j
d&ed'edee	j
 d(ee dee dee	j
e	j
e	j
eef fd)d*Zee			d1d+e	jd,ee	j d(ee dee deeef f
d-d.Z  ZS )2LightGlueForKeypointMatchingar  
    LightGlue is a model matching keypoints in images by leveraging detections from a keypoint detector such as
    SuperPoint. It is based on the SuperGlue architecture and is designed to be lightweight and efficient.
    It consists of :
        1. Keypoint Encoder
        2. A Graph Neural Network with self and cross attention layers
        3. Matching Assignment layers

    The correspondence ids use -1 to indicate non-matching points.

    Philipp Lindenberger, Paul-Edouard Sarlin and Marc Pollefeys. LightGlue: Local Feature Matching at Light Speed.
    In ICCV 2023. https://huggingface.co/papers/2306.13643
    r.   c                    s   t    tj j jd| _ jj| _ j	| _	 j
| _ j| _ j| _ j| _| j	| jkr;tj| j| j	dd| _nt | _t | _t fddt j
D | _t fddt j
D | _t fddt j
d D | _|   d S )	N)trust_remote_codeTr/   c                    s   g | ]}t  |d qS ))r   )r   ).0ir.   r+   r,   
<listcomp>      z9LightGlueForKeypointMatching.__init__.<locals>.<listcomp>c                       g | ]}t  qS r+   )r   r  r   r  r+   r,   r        c                    r  r+   )r   r  r  r+   r,   r    r  r   )r1   r2   r   from_configkeypoint_detector_configr  keypoint_detectordescriptor_decoder_dim keypoint_detector_descriptor_dimr4   num_hidden_layers
num_layersfilter_thresholddepth_confidencewidth_confidencer   r3   input_projectionIdentityr-   positional_encoder
ModuleListrangetransformer_layersmatch_assignment_layerstoken_confidence	post_initr7   r9   r  r,   r2     s0   


z%LightGlueForKeypointMatching.__init__layer_indexr<   c                 C   s*   ddt d| | j   }t |ddS )z-scaled confidence threshold for a given layerg?g?g      r   r   )npr   r  clip)r8   r,  r   r+   r+   r,   _get_confidence_threshold  s   z6LightGlueForKeypointMatching._get_confidence_thresholdFr   r   r;   c                 C   s,   |   }| |}| j||d}||fS )Nr;   )r   r{   r#  r%  )r8   r   r   r;   projected_descriptorskeypoint_encoding_outputr+   r+   r,   _keypoint_processing  s   
z1LightGlueForKeypointMatching._keypoint_processingkeypoint_confidencesr   
num_pointsc           
      C   s   |j \}}|| jd k r6||dkd}||d d}| |}d||k  jdd|  }|| jk}	|	S tj	|tj
d}	|	S )zRevaluate whether we should stop inference based on the confidence of the keypointsr   r   r   r=   g      ?r>   rU   )rb   r  r   rd   r/  rW   sumr!  r&   onesrK   )
r8   r4  r,  r   r5  r   r   r   ratio_confidentearly_stopped_pairsr+   r+   r,   _get_early_stopped_image_pairs'  s   


z;LightGlueForKeypointMatching._get_early_stopped_image_pairsNc                 C   s@   |d ur|| }|| }| j | ||}t|| j\}}||fS r   )r)  r  r   )r8   r   r   r,  early_stopsr   r   r   r+   r+   r,   _get_keypoint_matching:  s   z3LightGlueForKeypointMatching._get_keypoint_matchingconfidencesr   c                 C   s,   |d| j  k}|dur||| |kO }|S )z#mask points which should be removedr   N)r"  r/  )r8   r=  r   r,  keepr+   r+   r,   _get_pruning_maskB  s   z.LightGlueForKeypointMatching._get_pruning_maskr   prune_outputc                    s   |j \}}	}	| j| |}
| ||
|  |dktd  fdd||d |d  |fD \}}}}}t|D ]}|||| f  d7  < q;dd ||||fD \}}}}||f}t|ddd	}|||||fS )
z
        For a given layer, prune keypoints based on the confidence of the keypoints and the matchability of the
        descriptors.
        r   Fc                 3   s$    | ]}d d t | D V  qdS )c                 S   s   g | ]\}}|| qS r+   r+   )r  tr   r+   r+   r,   r  ^  r  zULightGlueForKeypointMatching._do_layer_keypoint_pruning.<locals>.<genexpr>.<listcomp>N)zipr  r   pruned_keypoints_maskr+   r,   	<genexpr>]  s
    
zJLightGlueForKeypointMatching._do_layer_keypoint_pruning.<locals>.<genexpr>r   c                 s   s    | ]	}t |d dV  qdS )T)batch_firstNr   )r  pruned_tensorr+   r+   r,   rF  e  s
    

Tr=   rG  padding_value)	rb   r)  r   r?  r   r&   r   r'  r   )r8   r   r   r   r   r@  r4  r,  r   r   descriptors_matchabilitypruned_descriptorspruned_keypoints_0pruned_keypoints_1pruned_maskpruned_indicesr  pruned_keypointsr+   rD  r,   _do_layer_keypoint_pruningI  s   

z7LightGlueForKeypointMatching._do_layer_keypoint_pruningc                    s   t   t  jd } | } |  dd ||fD \}}dd ||fD \}} fdd||||fD \}}}}||||fS )Nr   c                 s       | ]
}t |d ddV  qdS )Tr=   rI  Nr   rC  r+   r+   r,   rF  {  
    
zMLightGlueForKeypointMatching._concat_early_stopped_outputs.<locals>.<genexpr>c                 s   rS  )Tr   rI  Nr   rC  r+   r+   r,   rF    rT  c                 3   s    | ]}|  V  qd S r   r+   rC  early_stops_indicesr+   r,   rF    s
    
)r&   rN   r   rb   )r8   rV  final_pruned_keypoints_indices!final_pruned_keypoints_iterationsr   r   idsorder_indicesr+   rU  r,   _concat_early_stopped_outputsn  s$   



	z:LightGlueForKeypointMatching._concat_early_stopped_outputsr   r   r   c                    s  |j \ } fdd|||fD \}}}|d d df }|d d df }|d d df }|d d df }	|d d df }
|d d df }tj d d|fd|j|jd}tj d d|f|j|jd}t d D ]T}t|| dkd|| d|| j	dd||d|| f< t|	| dkd|| d|	| j	dd||d|| f< |
| ||d|| f< || ||d|| f< qi||fS )	Nc                 3   s"    | ]}|  d  d dV  qdS )r   r=   N)rd   rC  r   r+   r,   rF    s    
zJLightGlueForKeypointMatching._do_final_keypoint_pruning.<locals>.<genexpr>r   r   r   r=   r
  )r   )
rb   r&   fullr   rV   zerosr'  r   r   clamp)r8   r   r   r   r   r   r   r   r   r   r  r  _matches_matching_scoresr  r+   r\  r,   _do_final_keypoint_pruning  s0   
	
 &&z7LightGlueForKeypointMatching._do_final_keypoint_pruningr  r	  r   c           (   
      s  |rdnd }|r
dnd }	|j d dkr.|j d d }
|j|
dtjd||
||
||	fS |j}|j \}}}}tj||ddd}||d |d}|d urX||d |nd }||d || j}tj	|d |d}t
|||}| j|||d	\}}|d }| jdk}| jdk}g }g }g }g }g }tj	d||d|d d}t|}t| jD ]}| }|d ur| ||}ntj||d
 f|jd}| j| |||||d}|\}}} |r|| }|r|	|  }	|r|| jd k r| j| |}!| j|!|||d}"ntj|tjd}"t|"r|"d |  }#| j||| d\}$}%|t|# |t|$ |t|% |rO|t|   |t|   ||"  }t fdd||d |d ||fD \}}&}'}}|&|'f}|rt fdd|||!fD \}}}!t|"r n|r|  ||||||!|\}}}}}q|r|r| !|||||\}}}}| "||||\}}n| ||| jd \}}t|| j }||d|}|||||	fS )Nr+   r   r   r=   rU   r   r>   r   r0  rM   )ro   r;   r   )r5  r;  c                 3       | ]}|   V  qd S r   r+   rC  rc  r+   r,   rF    
    
zALightGlueForKeypointMatching._match_image_pair.<locals>.<genexpr>c                 3   rd  r   r+   rC  rc  r+   r,   rF  #  re  )#rb   r   r&   r   	new_zerosr   r6  rd   r  r   r  r3  r!  r"  rc   	ones_liker'  r  r  get_extended_attention_maskr7  r(  r*  r:  rK   anyr@   r<  extendlistr*   allrR  r[  rb  )(r8   r   r   r  r	  r   r   r;   r   r   rb   r   r   r   initial_num_keypointsnum_points_per_pairimage_indicesr2  do_early_stopdo_keypoint_pruningrV  r   r   rW  rX  pruned_keypoints_indicespruned_keypoints_iterationsr,  r   extended_attention_masklayer_outputr    	attentionr4  r9  early_stopped_image_indicesearly_stopped_matchesearly_stopped_matching_scoreskeypoints_0
keypoint_1r+   rc  r,   _match_image_pair  s   










	
z.LightGlueForKeypointMatching._match_image_pairr   labelsc              
   C   s  d }|d ur
t d|d ur|n| jj}|d ur|n| jj}|jdks*|ddkr.t d|j\}}}}	}
||d ||	|
}| |}|d d \}}}}||ddd	|}||dd| j
	|}||dd}| }|d d d d d d df |
 |d d d d d d df< |d d d d d d df |	 |d d d d d d df< | j|||	|
|||d	\}}}}}t||||||||d
S )Nz9LightGlue is not trainable, no labels should be provided.   r   r   zOInput must be a 5D tensor of shape (batch_size, 2, num_channels, height, width)   r=   r   )r   r   r;   )r   r   r   r   r   r   r    r!   )
ValueErrorr.   r   r;   ndimr  rb   rd   r  rY   r  cloner|  r   )r8   r   r}  r   r;   r   r   r   channelsr  r	  keypoint_detectionsr   r   r   absolute_keypointsr   r   r   r    r!   r+   r+   r,   rH   `  sJ   	
88
z$LightGlueForKeypointMatching.forwardrI   r   )NNN)r"   r#   r$   r%   r   r2   r   rW   r/  r&   rJ   r   rK   r*   r3  r:  r<  r?  rR  r[  rb  r|  r   r   r'   
LongTensorr   r   rH   rL   r+   r+   r9   r,   r    s     


 
% 
+	
 .
r  r   )rj   )7dataclassesr   typingr   r   r   numpyr-  r&   r   torch.nn.utils.rnnr   activationsr
   modeling_flash_attention_utilsr   modeling_utilsr   r   processing_utilsr   utilsr   r   r   utils.deprecationr   utils.genericr   auto.modeling_autor   configuration_lightgluer   r   Moduler-   rT   r`   rJ   r   ri   rW   r   r   r   r   r   r   r   r   r*   r  r  r  __all__r+   r+   r+   r,   <module>   s   	#

HS
)$    0