o
    eiU                     @   s   d dl Z d dlmZ d dlm  mZ ddlmZmZm	Z	 ddl
mZmZmZmZmZ e r4d dlmZ e r=d dlmZ dd	 ZG d
d dejZG dd dejZ					dddZdS )    N   )is_scipy_availableis_vision_availablerequires_backends   )box_iou	dice_lossgeneralized_box_iounested_tensor_from_tensor_listsigmoid_focal_losslinear_sum_assignment)center_to_corners_formatc                 C   s   dd t | |D S )Nc                 S   s   g | ]	\}}||d qS ))logits
pred_boxes ).0abr   r   \/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/transformers/loss/loss_rt_detr.py
<listcomp>'   s    z!_set_aux_loss.<locals>.<listcomp>)zip)outputs_classoutputs_coordr   r   r   _set_aux_loss&   s   r   c                       s0   e Zd ZdZ fddZe dd Z  ZS )RTDetrHungarianMatchera  This class computes an assignment between the targets and the predictions of the network

    For efficiency reasons, the targets don't include the no_object. Because of this, in general, there are more
    predictions than targets. In this case, we do a 1-to-1 matching of the best predictions, while the others are
    un-matched (and thus treated as non-objects).

    Args:
        config: RTDetrConfig
    c                    sz   t    t| dg |j| _|j| _|j| _|j	| _	|j
| _|j| _| j| j  kr8| j  kr8dkr;td d S d S )Nscipyr   z#All costs of the Matcher can't be 0)super__init__r   matcher_class_cost
class_costmatcher_bbox_cost	bbox_costmatcher_giou_cost	giou_costuse_focal_lossmatcher_alphaalphamatcher_gammagamma
ValueError)selfconfig	__class__r   r   r   5   s   
"
zRTDetrHungarianMatcher.__init__c                 C   s~  |d j dd \}}|d dd}tdd |D }td	d |D }| jrft|d dd}|dd|f }d| j || j  d| d
 	   }	| jd| | j  |d
 	   }
|
|	 }n|d dd
d}|dd|f  }tj||dd}tt|t| }| j| | j|  | j|  }|||d }dd |D }dd t||dD }dd |D S )a  Performs the matching

        Params:
            outputs: This is a dict that contains at least these entries:
                 "logits": Tensor of dim [batch_size, num_queries, num_classes] with the classification logits
                 "pred_boxes": Tensor of dim [batch_size, num_queries, 4] with the predicted box coordinates

            targets: This is a list of targets (len(targets) = batch_size), where each target is a dict containing:
                 "class_labels": Tensor of dim [num_target_boxes] (where num_target_boxes is the number of ground-truth
                           objects in the target) containing the class labels
                 "boxes": Tensor of dim [num_target_boxes, 4] containing the target box coordinates

        Returns:
            A list of size batch_size, containing tuples of (index_i, index_j) where:
                - index_i is the indices of the selected predictions (in order)
                - index_j is the indices of the corresponding selected targets (in order)
            For each batch element, it holds:
                len(index_i) = len(index_j) = min(num_queries, num_target_boxes)
        r   Nr   r   r   r   c                 S      g | ]}|d  qS class_labelsr   r   vr   r   r   r   ^       z2RTDetrHungarianMatcher.forward.<locals>.<listcomp>c                 S   r/   boxesr   r2   r   r   r   r   _   r4   g:0yE>)pc                 S      g | ]}t |d  qS r5   lenr2   r   r   r   r   u       c                 S   s   g | ]
\}}t || qS r   r   )r   icr   r   r   r   v   s    c                 S   s0   g | ]\}}t j|t jd t j|t jd fqS )dtype)torch	as_tensorint64)r   r=   jr   r   r   r   x   s   0 )shapeflattenrA   catr%   Fsigmoidr'   r)   logsoftmaxcdistr	   r   r"   r    r$   viewcpu	enumeratesplit)r+   outputstargets
batch_sizenum_queriesout_bbox
target_idstarget_bboxout_probneg_cost_classpos_cost_classr    r"   r$   cost_matrixsizesindicesr   r   r   forwardD   s&   &"
zRTDetrHungarianMatcher.forward)	__name__
__module____qualname____doc__r   rA   no_gradr^   __classcell__r   r   r-   r   r   *   s
    
r   c                       s   e Zd ZdZ fddZdddZdddZe d	d
 Z	dd Z
dd ZdddZdd Zdd ZdddZdd Zedd Zdd Z  ZS )
RTDetrLossah  
    This class computes the losses for RTDetr. The process happens in two steps: 1) we compute hungarian assignment
    between ground truth boxes and the outputs of the model 2) we supervise each pair of matched ground-truth /
    prediction (supervise class and box).

    Args:
        matcher (`DetrHungarianMatcher`):
            Module able to compute a matching between targets and proposals.
        weight_dict (`Dict`):
            Dictionary relating each loss with its weights. These losses are configured in RTDetrConf as
            `weight_loss_vfl`, `weight_loss_bbox`, `weight_loss_giou`
        losses (`list[str]`):
            List of all the losses to be applied. See `get_loss` for a list of all available losses.
        alpha (`float`):
            Parameter alpha used to compute the focal loss.
        gamma (`float`):
            Parameter gamma used to compute the focal loss.
        eos_coef (`float`):
            Relative classification weight applied to the no-object category.
        num_classes (`int`):
            Number of object categories, omitting the special no-object category.
    c                    s|   t    t|| _|j| _|j|j|jd| _	ddg| _
|j| _t|jd }| j|d< | d| |j| _|j| _d S )N)loss_vfl	loss_bbox	loss_giouvflr6   r   r7   empty_weight)r   r   r   matcher
num_labelsnum_classesweight_loss_vflweight_loss_bboxweight_loss_giouweight_dictlosseseos_coefficienteos_coefrA   onesregister_bufferfocal_loss_alphar'   focal_loss_gammar)   )r+   r,   rj   r-   r   r   r      s   



zRTDetrLoss.__init__Tc                 C   st  d|vrt dd|vrt d| |}|d | }tjdd t||D dd}tt| t|\}	}
t|	}	|d }td	d t||D }tj	|j
d d
 | jtj|jd}|||< tj|| jd ddd df }tj||jd}|	|j||< |d| }t| }| j|| j d|  | }tj|||dd}|d |j
d  | }d|iS )Nr   #No predicted boxes found in outputsr   z$No predicted logits found in outputsc                 S       g | ]\}\}}|d  | qS r5   r   r   _target_r=   r   r   r   r           z.RTDetrLoss.loss_labels_vfl.<locals>.<listcomp>r   dimc                 S   rz   r0   r   r{   r   r   r   r      r~   r   r@   devicer   rm   .r7   r?   none)weight	reductionrf   )KeyError_get_source_permutation_idxrA   rG   r   r   r   detachdiagfullrE   rm   rC   r   rH   one_hot
zeros_liker@   to	unsqueezerI   r'   powr)    binary_cross_entropy_with_logitsmeansum)r+   rQ   rR   r]   	num_boxesrJ   idx	src_boxestarget_boxesiousr}   
src_logitstarget_classes_originaltarget_classestargettarget_score_originaltarget_score
pred_scorer   lossr   r   r   loss_labels_vfl   s0   

 zRTDetrLoss.loss_labels_vflc                 C   s   d|vrt d|d }| |}tdd t||D }tj|jdd | jtj|j	d}	||	|< t
|dd|	| j}
d	|
i}|S )
zClassification loss (NLL)
        targets dicts must contain the key "class_labels" containing a tensor of dim [nb_target_boxes]
        r   z#No logits were found in the outputsc                 S   rz   r0   r   r{   r   r   r   r      r~   z*RTDetrLoss.loss_labels.<locals>.<listcomp>Nr   r   r   loss_ce)r   r   rA   rG   r   r   rE   rm   rC   r   rH   cross_entropy	transposeclass_weight)r+   rQ   rR   r]   r   rJ   r   r   r   r   r   rr   r   r   r   loss_labels   s   
zRTDetrLoss.loss_labelsc                 C   sb   |d }|j }tjdd |D |d}| djdkd}tj	|
 |
 }	d|	i}
|
S )	z
        Compute the cardinality error, i.e. the absolute error in the number of predicted non-empty boxes. This is not
        really a loss, it is intended for logging purposes only. It doesn't propagate gradients.
        r   c                 S   r9   r0   r:   r2   r   r   r   r      r<   z/RTDetrLoss.loss_cardinality.<locals>.<listcomp>)r   r7   g      ?r   cardinality_error)r   rA   rB   rI   maxvaluesr   nn
functionall1_lossfloat)r+   rQ   rR   r]   r   r   r   target_lengths	card_predcard_errrr   r   r   r   loss_cardinality   s   zRTDetrLoss.loss_cardinalityc                 C   s   d|vrt d| |}|d | }tjdd t||D dd}i }tj||dd}	|	 | |d	< d
tt	t
|t
| }
|
 | |d< |S )a;  
        Compute the losses related to the bounding boxes, the L1 regression loss and the GIoU loss. Targets dicts must
        contain the key "boxes" containing a tensor of dim [nb_target_boxes, 4]. The target boxes are expected in
        format (center_x, center_y, w, h), normalized by the image size.
        r   ry   c                 S   rz   r5   r   )r   tr}   r=   r   r   r   r      r~   z)RTDetrLoss.loss_boxes.<locals>.<listcomp>r   r   r   r   rg   r   rh   )r   r   rA   rG   r   rH   r   r   r   r	   r   )r+   rQ   rR   r]   r   r   r   r   rr   rg   rh   r   r   r   
loss_boxes   s   
zRTDetrLoss.loss_boxesc                 C   s   d|vrt d| |}| |}|d }|| }dd |D }t| \}	}
|	|}	|	| }	tjj|dddf |	j	dd ddd	}|ddd
f 
d}|	
d}	|	|j	}	t||	|t||	|d}|S )z
        Compute the losses related to the masks: the focal loss and the dice loss. Targets dicts must contain the key
        "masks" containing a tensor of dim [nb_target_boxes, h, w].
        
pred_masksz#No predicted masks found in outputsc                 S   r/   )masksr   r   r   r   r   r   r   
  r4   z)RTDetrLoss.loss_masks.<locals>.<listcomp>NbilinearF)sizemodealign_cornersr   r   )	loss_mask	loss_dice)r   r   _get_target_permutation_idxr
   	decomposer   r   r   interpolaterE   rF   rM   r   r   )r+   rQ   rR   r]   r   
source_idx
target_idxsource_masksr   target_masksvalidrr   r   r   r   
loss_masks   s(   





zRTDetrLoss.loss_masksc                 C   s   |d }|  |}tdd t||D }tj|jd d | jtj|jd}	||	|< t	j
|	| jd ddd d	f }
t	j||
d
 dd}|d |jd  | }d|iS )Nr   c                 S   rz   r0   r   r{   r   r   r   r      r~   z.RTDetrLoss.loss_labels_bce.<locals>.<listcomp>r   r   r   r   .r7   g      ?r   r   loss_bce)r   rA   rG   r   r   rE   rm   rC   r   rH   r   r   r   r   r+   rQ   rR   r]   r   rJ   r   r   r   r   r   r   r   r   r   loss_labels_bce  s   
 zRTDetrLoss.loss_labels_bcec                 C   4   t dd t|D }t dd |D }||fS )Nc                 S   s    g | ]\}\}}t ||qS r   rA   	full_like)r   r=   sourcer}   r   r   r   r   -  r~   z:RTDetrLoss._get_source_permutation_idx.<locals>.<listcomp>c                 S   s   g | ]\}}|qS r   r   )r   r   r}   r   r   r   r   .  r4   rA   rG   rO   )r+   r]   	batch_idxr   r   r   r   r   +     z&RTDetrLoss._get_source_permutation_idxc                 C   r   )Nc                 S   s    g | ]\}\}}t ||qS r   r   )r   r=   r}   r   r   r   r   r   3  r~   z:RTDetrLoss._get_target_permutation_idx.<locals>.<listcomp>c                 S   s   g | ]\}}|qS r   r   )r   r}   r   r   r   r   r   4  r4   r   )r+   r]   r   r   r   r   r   r   1  r   z&RTDetrLoss._get_target_permutation_idxc                 C   s   d|vrt d|d }| |}tdd t||D }tj|jd d | jtj|j	d}	||	|< t
j|	| jd dd	d d
f }
t||
| j| j}|d |jd  | }d|iS )Nr   zNo logits found in outputsc                 S   rz   r0   r   r{   r   r   r   r   >  r~   z0RTDetrLoss.loss_labels_focal.<locals>.<listcomp>r   r   r   r   .r7   
loss_focal)r   r   rA   rG   r   r   rE   rm   rC   r   rH   r   r   r'   r)   r   r   r   r   r   r   loss_labels_focal7  s   
 zRTDetrLoss.loss_labels_focalc                 C   sL   | j | j| j| j| j| j| jd}||vrtd| d|| ||||S )N)labelscardinalityr6   r   bcefocalri   zLoss z not supported)r   r   r   r   r   r   r   r*   )r+   r   rQ   rR   r]   r   loss_mapr   r   r   get_lossI  s   	zRTDetrLoss.get_lossc           
   	   C   s   | d | d }}dd |D }|d d j }g }t|D ]A\}}|dkrItj|tj|d}	|	|}	t|| t|	ks?J ||| |	f q|tjdtj|dtjdtj|df q|S )Ndn_positive_idxdn_num_groupc                 S   r9   r0   r:   r   r   r   r   r   Z  r<   z6RTDetrLoss.get_cdn_matched_indices.<locals>.<listcomp>r   r1   r   )	r   rO   rA   arangerC   tiler;   appendzeros)
dn_metarR   r   r   num_gtsr   dn_match_indicesr=   num_gtgt_idxr   r   r   get_cdn_matched_indicesW  s    
z"RTDetrLoss.get_cdn_matched_indicesc           
   	      s  dd |  D }||}tdd |D }tj|gtjtt| j	d}tj
|dd }i }jD ]}|||||fddD | q7d	|v rt|d	 D ]8\ }||}jD ]*}|d
krpqi|||||fddD  fdd  D | qiq\d|v rd|vrtd|d |}||d d  }t|d D ]8\ }jD ]0}|d
krqi }	j|||||fi |	fddD  fdd  D | qq|S )a  
        This performs the loss computation.

        Args:
             outputs (`dict`, *optional*):
                Dictionary of tensors, see the output specification of the model for the format.
             targets (`list[dict]`, *optional*):
                List of dicts, such that `len(targets) == batch_size`. The expected keys in each dict depends on the
                losses applied, see each loss' doc.
        c                 S   s   i | ]\}}d |vr||qS )auxiliary_outputsr   r   kr3   r   r   r   
<dictcomp>y  s    z&RTDetrLoss.forward.<locals>.<dictcomp>c                 s   s    | ]	}t |d  V  qdS )r1   Nr:   r   r   r   r   	<genexpr>  s    z%RTDetrLoss.forward.<locals>.<genexpr>r   r   )minc                    *   i | ]}|j v r| | j |  qS r   rq   r   r   l_dictr+   r   r   r        * r   r   c                    r   r   r   r   r   r   r   r     r   c                        i | ]\}}|d    |qS )_aux_r   r   r=   r   r   r     r~   dn_auxiliary_outputsdenoising_meta_valuesz}The output must have the 'denoising_meta_values` key. Please, ensure that 'outputs' includes a 'denoising_meta_values' entry.r   c                    r   r   r   r   r   r   r   r     r   c                    r   )_dn_r   r   r   r   r   r     r~   )itemsrk   r   rA   rB   r   nextiterr   r   clampitemrr   r   updaterO   r*   r   )
r+   rQ   rR   outputs_without_auxr]   r   rr   r   r   kwargsr   )r=   r   r+   r   r^   n  sN   "




zRTDetrLoss.forward)T)r_   r`   ra   rb   r   r   r   rA   rc   r   r   r   r   r   r   r   r   staticmethodr   r^   rd   r   r   r-   r   re   {   s"    





re   c
                 K   s  t |}|| i }| |d< ||d< d }|jrx|	d ur4tj||	d dd\}}tj||	d dd\}}t|d d d df dd|d d d df dd}||d	< |d	 t|g|g |	d urxt|dd|dd|d
< |	|d< |||}t|	 }|||fS )Nr   r   dn_num_splitr   r   r7   r   r   r   r   r   )
re   r   auxiliary_lossrA   rP   r   r   extendr   r   )r   r   r   r   r,   r   r   enc_topk_logitsenc_topk_bboxesr   r   	criterionoutputs_lossr   dn_out_coorddn_out_class	loss_dictr   r   r   r   RTDetrForObjectDetectionLoss  s*   
:

r  )NNNNN)rA   torch.nnr   torch.nn.functionalr   rH   utilsr   r   r   loss_for_object_detectionr   r   r	   r
   r   scipy.optimizer   transformers.image_transformsr   r   Moduler   re   r  r   r   r   r   <module>   s(   	Q  <