o
    ॵi%                     @   s   d dl Z d dlmZ d dlmZmZ d dlZd dlZd dl	Z
d dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZ e Zej ej!ej!dG dd deZ"dS )    N)AnyDict)tqdm)	Pipelines)	KNetTrack)
OutputKeys)InputPipeline)	PIPELINES)Config)	ModelFileTasks)
get_logger)module_namec                       s   e Zd ZdZdef fddZdedeeef fddZ	deeef deeef fd	d
Z
deeef deeef fddZ  ZS )!VideoInstanceSegmentationPipelinea   Video Instance Segmentation Pipeline.

    Examples:

    >>> from modelscope.pipelines import pipeline

    >>> detector = pipeline('video-instance-segmentation', 'damo/cv_swinb_video-instance-segmentation')
    >>> detector("http://www.modelscope.cn/api/v1/models/damo/cv_swinb_video-instance-segmentation/repo?Revision=master"
    >>>             "&FilePath=resources/kitti-step_testing_image_02_0000.mp4")
    >>>   {
    >>>    "boxes": [
    >>>        [
    >>>            [
    >>>            0,
    >>>            446.9007568359375,
    >>>            36.374977111816406,
    >>>            907.0919189453125,
    >>>            337.439208984375,
    >>>            0.333
    >>>            ],
    >>>            [
    >>>            1,
    >>>            454.3310241699219,
    >>>            336.08477783203125,
    >>>            921.26904296875,
    >>>            641.7871704101562,
    >>>            0.792
    >>>            ]
    >>>        ],
    >>>        [
    >>>            [
    >>>            0,
    >>>            446.9007568359375,
    >>>            36.374977111816406,
    >>>            907.0919189453125,
    >>>            337.439208984375,
    >>>            0.333
    >>>            ],
    >>>            [
    >>>            1,
    >>>            454.3310241699219,
    >>>            336.08477783203125,
    >>>            921.26904296875,
    >>>            641.7871704101562,
    >>>            0.792
    >>>            ]
    >>>        ]
    >>>    ],
    >>>    "masks": [
    >>>        [
    >>>            [
    >>>            [False, False, False, ..., False, False, False],
    >>>            [False, False, False, ..., False, False, False],
    >>>            [False, False, False, ..., False, False, False],
    >>>            ...,
    >>>            [False, False, False, ..., False, False, False],
    >>>            [False, False, False, ..., False, False, False],
    >>>            [False, False, False, ..., False, False, False]
    >>>            ],
    >>>            [
    >>>            [False, False, False, ..., False, False, False],
    >>>            [False, False, False, ..., False, False, False],
    >>>            [False, False, False, ..., False, False, False],
    >>>            ...,
    >>>            [False, False, False, ..., False, False, False],
    >>>            [False, False, False, ..., False, False, False],
    >>>            [False, False, False, ..., False, False, False]
    >>>            ]
    >>>        ],
    >>>        [
    >>>            [
    >>>            [False, False, False, ..., False, False, False],
    >>>            [False, False, False, ..., False, False, False],
    >>>            [False, False, False, ..., False, False, False],
    >>>            ...,
    >>>            [False, False, False, ..., False, False, False],
    >>>            [False, False, False, ..., False, False, False],
    >>>            [False, False, False, ..., False, False, False]
    >>>            ],
    >>>            [
    >>>            [False, False, False, ..., False, False, False],
    >>>            [False, False, False, ..., False, False, False],
    >>>            [False, False, False, ..., False, False, False],
    >>>            ...,
    >>>            [False, False, False, ..., False, False, False],
    >>>            [False, False, False, ..., False, False, False],
    >>>            [False, False, False, ..., False, False, False]
    >>>            ]
    >>>        ]
    >>>    ]
    >>>   }
    >>>
    modelc                    s   t  jd|dd| td|  t|tj}t|tj}td|  t	
|| _|dd| _t|| _tj|t| jd}| j|d  | j| j | _td	 d
| _tg dtj| _tg dtj| _d| _dS )z
        use `model` to create a video panoptic segmentation pipeline for prediction
        Args:
            model: model id on modelscope hub.
        F)r   auto_collatezloading model from zloading config from max_video_framesi  )map_location
state_dictzload model done    g33333^@gR]@gRY@g(\2M@g(\L@g     L@N )super__init__loggerinfoospjoinr   TORCH_MODEL_FILECONFIGURATIONr   	from_filecfggetr   r   r   torchloaddeviceload_state_dicttoevalpad_size_divisornparrayfloat32meanstdto_rgb)selfr   kwargs
model_pathconfig_path
checkpoint	__class__r   p/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/pipelines/cv/video_instance_segmentation_pipeline.pyr   |   s$   


z*VideoInstanceSegmentationPipeline.__init__inputreturnc              	   C   s  t |tstdt| g }g }g }g }t|}|tj| _|tj	| _
d}| r| \}}	|s8n|| jkr>n}t|	d}
t|
| j| j| j}tj|| jdd}ddtjg dtjdtjg d	tjdd
ddd
d}|	j|d< |
j|d< |j|d< ||d< |dkrttt|g dg| jg}|gg}|t|g d || |d7 }| s/t|gg}|gg}|||t|| j|d}|S )z^
         Read video and process into 'imgs', 'img_metas', 'ref_img', 'ref_img_metas'
        z input should be a str,  but got r   )i  ih  )pad_valFNr   )dtyper   T)r/   r0   r1   )flipflip_directionimg_norm_cfgvideo_idis_video_data	ori_shape	img_shape	pad_shapeframe_id)   r      rH   )
video_nameimgs	img_metasref_imgref_img_metas) 
isinstancestr	TypeErrortypecv2VideoCapturer$   CAP_PROP_FPSfpsCAP_PROP_FRAME_COUNTframe_countisOpenedreadr   mmcvimresizeimnormalizer/   r0   r1   impad_to_multipler+   r,   r-   r.   shaper%   
from_numpy	transposer)   r'   append)r2   r:   rJ   rK   ref_imgsrM   cap	frame_idxretframeresize_frame
norm_frame	pad_frameref_img_metaresultr   r   r9   
preprocess   s|   








,z,VideoInstanceSegmentationPipeline.preprocessc                 C   s   g }g }t  B |d }|d }|d }|d }| j||||d}tt|d D ]}	||d |	 d  ||d |	 d  q*W d   n1 sMw   Y  ||d	}
|
S )
a7  
         Segmentation Instance (bounding boxes or masks) in the video passed as inputs.

         Args:
             input (`Video`):
                 The pipeline handles two types of images:

                 - A string containing an HTTP(S) link pointing to a video
                 - A string containing a local path to a video

                 The pipeline accepts a single video as input.


         Return:
             A dictionary of result. If the input is a video, a dictionary
             is returned.

             The dictionary contain the following keys:

             - **boxes** (`List[float]) -- The bounding boxes [index, x1, y1, x2, y2, score] of instance in each frame.
             - **masks** (`List[List[bool]]`, optional) -- The instance mask [[False,...,False],...,[False,...,False]]
         rJ   rK   rL   rM   )rL   rM   r   rH   N)boxesmasks)r%   no_gradr   rangelenra   )r2   r:   bbox_resultsmask_resultsrJ   rK   rL   rM   segm_resultsiioutputr   r   r9   forward   s&   
z)VideoInstanceSegmentationPipeline.forwardinputsc                 C   s   |S )Nr   )r2   rx   r   r   r9   postprocess  s   z-VideoInstanceSegmentationPipeline.postprocess)__name__
__module____qualname____doc__rO   r   r   r   r   rl   rw   ry   __classcell__r   r   r7   r9   r      s    ^"J*.r   )#osos.pathpathr   typingr   r   rR   rZ   numpyr,   r%   r   modelscope.metainfor   ;modelscope.models.cv.video_instance_segmentation.video_knetr   modelscope.outputsr   modelscope.pipelines.baser   r	   modelscope.pipelines.builderr
   modelscope.utils.configr   modelscope.utils.constantr   r   modelscope.utils.loggerr   r   register_modulevideo_instance_segmentationr   r   r   r   r9   <module>   s,   