o
    ॵi                     @   s   d dl Z d dlmZ d dlmZmZ d dlZd dl	Z	d dl
mZ d dlmZ d dlmZmZmZmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZmZ d dlm Z  e  Z!ej"ej#ej$dG dd deZ%dS )    N)AnyDict)	Pipelines)Model)LengthAdaptiveTokenizerinit_transform_dict	load_dataload_frames_from_video)
OutputKeys)InputPipeline)	PIPELINES)Config)	ModelFileTasks)
get_logger)module_namec                       s   e Zd Zdef fddZdedeeef fddZdeeef deeef fdd	Z	d
eeef deeef fddZ
  ZS )VopRetrievalSEPipelinemodelc                    s.  t  jdd|i| t|| j| _td || _	t
t|tj| _t| jjjd | _td tt|d dd}t| jj|| _td d	|v rett|d
| j| _n+d|v rutt|d| j| _nd|v rtt|d| j| _ntt|d| j| _td dS )a   Card VopRetrievalSE Pipeline.

        Examples:
        >>>
        >>>   from modelscope.pipelines import pipeline
        >>>   vop_pipeline = pipeline(Tasks.vop_retrieval,
        >>>            model='damo/cv_vit-b32_retrieval_vop_bias')
        >>>
        >>>   # IF DO TEXT-TO-VIDEO:
        >>>   input_text = 'a squid is talking'
        >>>   result = vop_pipeline(input_text)
        >>>   result:
        >>>   {'output_data': array([['video8916']], dtype='<U9'),'mode': 't2v'}
        >>>
        >>>   # IF DO VIDEO-TO-TEXT:
        >>>   input_video = 'video10.mp4'
        >>>   result = vop_pipeline(input_video)
        >>>   result:
        >>>   {'output_data': array([['assorted people are shown holding cute pets']], dtype='<U163'), 'mode': 'v2t'}
        >>>
        r   zload model done	clip_testzload transform donezbpe_simple_vocab_16e6.txt.gzzutf-8
zload tokenizer donevop_biaszBias_msrvtt9k_features.pklvop_partialzPartial_msrvtt9k_features.pklvop_projzProj_msrvtt9k_features.pklzVoP_msrvtt9k_features.pklzload database doneN )super__init__r   from_pretrainedtodevicer   loggerinfo	local_pthr   	from_fileospjoinr   CONFIGURATIONcfgr   
hyperparam	input_resimg_transformgzipopenreaddecodesplitr   	tokenizerr   database)selfr   kwargsbpe_path	__class__r   e/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/pipelines/cv/vop_retrieval_se_pipeline.pyr      sF   


zVopRetrievalSEPipeline.__init__inputreturnc           	         s   t |trfd|v r@g }|fD ]!}t j|}t| jjj jjj	\}} 
|}|| qtj|ddj jdd}d}n/ j|dddd}t |tjrX|j jdd}n fd	d
| D }d}n	tdt| ||d}|S )Nz.mp4r   )dimTnon_blockingv2tpt)return_tensorspadding
truncationc                    s"   i | ]\}}||j  jd dqS )Tr;   )r   r   ).0keyvalr2   r   r7   
<dictcomp>k   s    z5VopRetrievalSEPipeline.preprocess.<locals>.<dictcomp>t2vz input should be a str,  but got )
input_datamode)
isinstancestrr$   r%   r"   r	   r'   r(   
num_framesvideo_sample_typer*   appendtorchstackr   r   r0   Tensoritems	TypeErrortype)	r2   r8   preprocess_paramsquery
video_pathimgsidxsrI   resultr   rE   r7   
preprocessV   s@   





z!VopRetrievalSEPipeline.preprocessc                 K   s   | j \}}}}t h |d dkr8| j|d }||j }tj|| jjjddd 	 
 }	t||	 }
n+|d dkrc| j|d }||j }tj|| jjjddd 	 
 }	t||	 }
|
|d d}|W  d    S 1 svw   Y  d S )	NrI   rG   rH   )kr:      r=   )output_datarI   )r1   rO   no_gradr   get_text_featuresTtopkr'   r(   cpunumpynparrayget_video_features)r2   r8   forward_paramstext_embedsvid_embeds_pooledvid_idstextsquery_featsscoreretrieval_idxsresresultsr   r   r7   forwardv   s>   




$zVopRetrievalSEPipeline.forwardinputsc                 K   s   |S )Nr   )r2   rt   post_paramsr   r   r7   postprocess   s   z"VopRetrievalSEPipeline.postprocess)__name__
__module____qualname__rK   r   r   r   r   r[   rs   rv   __classcell__r   r   r5   r7   r      s    9 


r   )&r+   os.pathpathr$   typingr   r   re   rf   rO   modelscope.metainfor   modelscope.modelsr   "modelscope.models.cv.vop_retrievalr   r   r   r	   modelscope.outputsr
   modelscope.pipelines.baser   r   modelscope.pipelines.builderr   modelscope.utils.configr   modelscope.utils.constantr   r   modelscope.utils.loggerr   r    register_modulevop_retrievalvop_retrieval_ser   r   r   r   r7   <module>   s&   