o
    ॵi                     @   s   d dl Z d dlmZ d dlmZmZ d dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZ e Zejeje	jdG dd deZ dS )    N)AnyDict)	Pipelines)BaseVideoModel)
OutputKeys)InputPipeline)	PIPELINES)ReadVideoData)Config)	ModelFileTasks)
get_logger)module_namec                       s   e Zd Zdef fddZdedeeef fddZdeeef deeef fdd	Z	e
 dddZdeeef deeef fddZ  ZS )HICOSSLVideoEmbeddingPipelinemodelc                    s   t  jdd|i| t| jtj}td|  t| jtj	}td|  t
|| _t| jd| j| _| j  | jjtj|| jdd dd td	 d
S )z
        use `model` to create a hicossl video embedding pipeline for prediction
        Args:
            model: model id on modelscope hub.
        r   zloading model from zloading config from )cfg)map_locationmodel_stateF)strictzload model doneN )super__init__ospjoinr   r   TORCH_MODEL_FILEloggerinfoCONFIGURATIONr   	from_filer   r   todeviceinfer_modelevalload_state_dicttorchload)selfr   kwargs
model_pathconfig_path	__class__r   l/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/pipelines/cv/hicossl_video_embedding_pipeline.pyr      s   
z&HICOSSLVideoEmbeddingPipeline.__init__inputreturnc                 C   sB   t |trt| j|dd| j}n	tdt| d|i}|S )N   )num_temporal_views_overridez input should be a str,  but got 
video_data)
isinstancestrr
   r   r    r!   	TypeErrortype)r'   r.   video_input_dataresultr   r   r-   
preprocess,   s   
z(HICOSSLVideoEmbeddingPipeline.preprocessc                 C   s"   |  |d }tj|j  iS )Nr2   )perform_inferencer   VIDEO_EMBEDDINGdatacpunumpy)r'   r.   featurer   r   r-   forward6   s   z%HICOSSLVideoEmbeddingPipeline.forward   c              	   C   s`   t |d| }g }t|D ]}|| ||| |d |  d  qtj|dd}|S )a1   Perform feature extracting for a given video
        Args:
            model (BaseVideoModel): video model with loadded state dict.
            max_bsz (int): the maximum batch size, limited by GPU memory.
        Returns:
            pred (Tensor): the extracted features for input video clips.
        r   r0   )dim)mathceilsizerangeappendr"   r%   cat)r'   r<   max_bsziter_num
preds_listipredr   r   r-   r:   :   s   	 z/HICOSSLVideoEmbeddingPipeline.perform_inferenceinputsc                 C   s   |S )Nr   )r'   rN   r   r   r-   postprocessK   s   z)HICOSSLVideoEmbeddingPipeline.postprocess)rA   )__name__
__module____qualname__r4   r   r   r   r   r9   r@   r%   no_gradr:   rO   __classcell__r   r   r+   r-   r      s    "
*r   )!rC   os.pathpathr   typingr   r   r%   modelscope.metainfor   'modelscope.models.cv.action_recognitionr   modelscope.outputsr   modelscope.pipelines.baser   r   modelscope.pipelines.builderr	   modelscope.preprocessorsr
   modelscope.utils.configr   modelscope.utils.constantr   r   modelscope.utils.loggerr   r   register_modulevideo_embeddinghicossl_video_embeddingr   r   r   r   r-   <module>   s$   