o
    ॵi                     @   s   d dl mZmZ d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
mZ d dlmZ d dlmZ d dlmZ d d	lmZ e Zejejejd
G dd deZdd Zdd ZG dd deZdd ZdddZG dd dejZ dS )    )AnyDictN)nn)	Pipelines)InputPipeline)	PIPELINES)	LoadImage)Tasks)
get_logger)module_namec                       s\   e Zd Zdef fddZdefddZdeeef fddZ	deeef fd	d
Z
  ZS )VidtPipelinemodelc                    sj   t  jd
d|i| | j  ttddgt tjg dg ddg| _	t
 | _ddd| _d	S )a  
        use `model` to create a vidt pipeline for prediction
        Args:
            model: model id on modelscope hub.
        Example:
            >>> from modelscope.pipelines import pipeline
            >>> vidt_pipeline = pipeline('image-object-detection', 'damo/ViDT-logo-detection')
            >>> result = vidt_pipeline(
                'data/test/images/vidt_test1.png')
            >>> print(f'Output: {result}.')
        r   i  )g
ףp=
?gv/?gCl?)gZd;O?gy&1?g?)meanstdnegativepositive)r      N )super__init__r   eval
transformsComposeResizeToTensor	Normalize	transformPostProcesspostprocessors	label_dic)selfr   kwargs	__class__r   Y/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/pipelines/cv/vidt_pipeline.pyr      s   
zVidtPipeline.__init__inputsc           
      K   s~   t |}|jd |jd g}| |}|g}|g}t|| j}t|}|| j}i }	|j	|	d< |j
|	d< ||	d< |	S )Nr   r   tensorsmaskorig_target_sizes)r	   convert_to_imgsizer   torchtensortodevicenested_tensor_from_tensor_listr'   r(   )
r!   r&   preprocess_paramsimgori_sizeimagetensor_listr)   samplesresr   r   r%   
preprocess.   s   



zVidtPipeline.preprocessc           	      K   st   |d }|d }|d }t    | ||\}}i }||d< ||d< ||d< |W  d    S 1 s3w   Y  d S )Nr'   r(   r)   out_pred_logitsout_pred_boxes)r,   no_gradr   )	r!   r&   forward_paramsr'   r(   r)   r9   r:   r7   r   r   r%   forward=   s   
$zVidtPipeline.forwardc           
      K   s   |  |d |d |d }t|d }g }g }g }|D ]}||d  || j|d   ||d  qi }	||	d< ||	d< ||	d	< |	S )
Nr9   r:   r)   r   r      scoreslabelsboxes)r   get_predictionsappendr    )
r!   r&   post_paramsresultsbatch_predictionsr?   r@   rA   sub_preoutputsr   r   r%   postprocessI   s"   
zVidtPipeline.postprocess)__name__
__module____qualname__strr   r   r8   r   r   r=   rI   __classcell__r   r   r#   r%   r      s
    r   c                 C   s   t dd | D }t| g| }|\}}}}| d j}| d j}tj|||d}	tj|||ftj|d}
t| |	|
D ].\}}}|d |j	d d |j	d d |j	d f 
| d|d |j	d d |j	d f< q:t|	|
S )Nc                 S   s   g | ]}t |jqS r   )listshape).0r2   r   r   r%   
<listcomp>^   s    z2nested_tensor_from_tensor_list.<locals>.<listcomp>r   )dtyper/   r   r>   F)_max_by_axislenrS   r/   r,   zerosonesboolziprP   copy_NestedTensor)r5   max_sizebatch_shapebchwrS   r/   r-   r(   r2   pad_imgmr   r   r%   r0   \   s   

2"
r0   c                 C   sB   | d }| dd  D ]}t |D ]\}}t|| |||< qq
|S )Nr   r   )	enumeratemax)the_listmaxessublistindexitemr   r   r%   rT   k   s   rT   c                   @   s,   e Zd Zdd Zdd Zdd Zdd Zd	S )
r[   c                 C   s   || _ || _d S Nr'   r(   )r!   r'   r(   r   r   r%   r   v   s   
zNestedTensor.__init__c                 C   s@   | j |}| j}|d ur|d usJ ||}nd }t||S rk   )r'   r.   r(   r[   )r!   r/   cast_tensorr(   	cast_maskr   r   r%   r.   z   s   
zNestedTensor.toc                 C   s   | j | jfS rk   rl   r!   r   r   r%   	decompose   s   zNestedTensor.decomposec                 C   s
   t | jS rk   )rM   r'   ro   r   r   r%   __repr__   s   
zNestedTensor.__repr__N)rJ   rK   rL   r   r.   rp   rq   r   r   r   r%   r[   t   s
    r[   c                 C   sL   |  d\}}}}|d|  |d|  |d|  |d|  g}tj|ddS )Ng      ?dim)unbindr,   stack)xx_cy_cra   r`   r^   r   r   r%   box_cxcywh_to_xyxy   s   ,rz   皙?c           
      C   s   g }| D ]H}g }t t|d D ]6}t|d |  }t|d |  }g }|d |  D ]	}	|t|	 q0||krF||||g q|| q|S )Nr?   r@   rA   )rangerU   floatcpuintrC   )
post_resultsbbox_thubatch_final_resper_img_resper_img_final_resiscorelabelbboxitr   r   r%   rB      s   rB   c                       s2   e Zd ZdZd fdd	Ze dd Z  ZS )r   zQ This module converts the model's output into the format expected by the coco apiNc                    s   t    || _d S rk   )r   r   processor_dct)r!   r   r#   r   r%   r      s   

zPostProcess.__init__c              	   C   s   t |t |ks
J |jd dksJ | }tj||jd dddd\}}|}||jd  }||jd  }	t|}
t|
d|d	ddd}
|
d\}}tj||||gddtj}|
|dddddf  }
d	d
 t||	|
D }|S )a   Perform the computation

        Parameters:
            out_logits: raw logits outputs of the model
            out_bbox: raw bbox outputs of the model
            target_sizes: tensor of dimension [batch_size x 2] containing the size of each images of the batch
                          For evaluation, this must be the original image size (before any data augmentation)
                          For visualization, this should be the image size after data augment, but before padding
        r   r>   r   rr   d   rs      Nc                 S   s   g | ]\}}}|||d qS ))r?   r@   rA   r   )rQ   slr^   r   r   r%   rR      s    
z'PostProcess.forward.<locals>.<listcomp>)rU   rP   sigmoidr,   topkviewrz   gather	unsqueezerepeatru   rv   r.   float32rY   )r!   
out_logitsout_bboxtarget_sizesprobtopk_valuestopk_indexesr?   
topk_boxesr@   rA   img_himg_w	scale_fctrE   r   r   r%   r=      s0   

zPostProcess.forwardrk   )	rJ   rK   rL   __doc__r   r,   r;   r=   rN   r   r   r#   r%   r      s
    r   )r{   )!typingr   r   r,   torchvision.transformsr   r   modelscope.metainfor   modelscope.pipelines.baser   r   modelscope.pipelines.builderr   modelscope.preprocessorsr	   modelscope.utils.constantr
   modelscope.utils.loggerr   loggerregister_moduleimage_object_detectionvidtr   r0   rT   objectr[   rz   rB   Moduler   r   r   r   r%   <module>   s*   H	
