o
    ߥi!                     @   s.  d dl Z d dlZd dlmZ d dlmZmZ d dlZd dl	Z
d dlZd dlmZ d dlm  mZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lm Z  d d
l!m"Z"m#Z# d dl$m%Z% ddl&m'Z'm(Z( ddl)m*Z*m+Z+m,Z, e% Z-ej.e#j/ej0dG dd deZ1dS )    N)AnyDict)Image)shot_detector)tqdm)Models)
TorchModel)MODELS)Config)	ModelFileTasks)
get_logger   )get_contextual_relation_networkget_shot_encoder)get_pred_boundary
pred2scenescene2video)module_namec                       s   e Zd Zdef fddZdeeef deeejf fddZ	dd	 Z
d
d Zdd ZdejdejfddZdeeef fddZdd Zdd Z  ZS )MovieSceneSegmentationModel	model_dirc              	      sN  t  j|g|R i | t|tj}tj|dd}t|tj}t	
|| _dd }t| j| _|d| j| t| j| _|d| j| | jjjj}| jjjj| d }	t|	d| _|d	| j| t | _| jjdi | jjj ttjd
tjdt dt! tj"g dg ddg| _#| jj$j%j}
| jj$j%j|
 j&| _&d| _'dS )zstr -- model file root.cpu)map_locationc                 S   s@   |  }| D ]\}}| d | }|| ||< q|| d S )N.)
state_dictitemsload_state_dict)prefixmodel
src_params	own_statenameparamsrc_name r$   g/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/cv/movie_scene_segmentation/model.pyload_param_with_prefix-   s
   zDMovieSceneSegmentationModel.__init__.<locals>.load_param_with_prefixshot_encodercrnhidden_size   head_sbd   )sizeinterpolation   )g
ףp=
?gv/?gCl?)gZd;O?gy&1?g?)meanstdgh㈵>Nr$   )(super__init__ospjoinr   TORCH_MODEL_FILEtorchloadCONFIGURATIONr
   	from_filecfgr   r'   r   r(   r   contextual_relation_networkr!   paramsnnLinearr+   r   initpreprocessorshot_detectTFComposeResizer   BICUBIC
CenterCropToTensor	Normalizetest_transformdatasetsampling_methodneighbor_sizeeps)selfr   argskwargs
model_pathr=   config_pathr&   crn_namehdimrL   	__class__r$   r%   r3   #   sB   

z$MovieSceneSegmentationModel.__init__inputsreturnc                 C   s   | d}|d }| |}tj| | dd}|dk}|dk}d\}}	|t||	  | | | j  }
|	t||	  | | | j  }|
| }||  }tj	|dd}t
||d	}|S )
Nvideolabelnone)	reductionr   r   )r   r   dim)predloss)popshared_stepFcross_entropysqueezefloatsumrN   r7   argmaxdict)rO   rX   datalabelsoutputsra   lposlnegppr>   wpwnwprobsrer$   r$   r%   forwardQ   s    

""z#MovieSceneSegmentationModel.forwardc                    s  t d | jjj}| jjj}|d  |d }t }t	
|| }g }i }| j  tt|D ]j}	|	| }
|	d | |k rE|	d | n|}||
| }|d d }|d d } fddt||d D }| j|||}| |||}t||}| |}tj|dd	}||d d df     q1|d
t|i |dt|i t|d
 |ksJ | j  |S )NzBegin scene detect ......shot_timecode_lstshot_idx_lstr   r   c                    s   i | ]}| | qS r$   r$   ).0irw   r$   r%   
<dictcomp>}   s    z9MovieSceneSegmentationModel.inference.<locals>.<dictcomp>r^   r`   sid)loggerinfor;   pipelinebatch_size_per_gpur(   attention_maskdevicelenmathceilr   startr   rangeget_frame_imgget_batch_inputr7   stacktorc   rd   softmaxextendr   detachnumpyupdatenparangerelease)rO   batchbsr   rx   shot_numcnt
infer_predinfer_resultr{   r   endbatch_shot_idx_lstshot_start_idxshot_end_idxbatch_timecode_lstbatch_shot_keyf_lstrX   input_rm   probr$   r|   r%   	inferencef   sD   



 

$
z%MovieSceneSegmentationModel.inferencec                 C   sd   t   | |}t|jdksJ W d    n1 sw   Y  | j|d d\}}| |}|S )N   )mask)r7   no_gradextract_shot_representationr   shaper(   r+   )rO   rX   	shot_repr_pooledr`   r$   r$   r%   rc      s   


z'MovieSceneSegmentationModel.shared_stepc                 C   sr   |    }| jjjd }t| t|j	d D ]}dt
|d d}t||}t|||  qd S )Nz	/featuresr   shot_   z.npy)rg   r   r   r;   rK   img_pathosmakedirsr   r   strzfillr4   r5   r   save)rO   _reprfeatpthidxr!   r$   r$   r%   save_shot_feat   s   
z*MovieSceneSegmentationModel.save_shot_featc           
         sr   t  jdks	J  j\}}}}}}tj d|d  fddt|D }t|jdd}	tj|	d|d}	|	S )	z& inputs [b s k c h w] -> output [b d]    zb s k c h w -> (b s) k c h w)sc                    s"   g | ]}  d d |f qS )N)r'   )rz   _krX   rO   r$   r%   
<listcomp>   s   " zKMovieSceneSegmentationModel.extract_shot_representation.<locals>.<listcomp>r   r^   z(b s) d -> b s d)r   r   einops	rearranger   r7   r   r0   )
rO   rX   br   kchrs   keyframe_reprr   r$   r   r%   r      s   z7MovieSceneSegmentationModel.extract_shot_representationc                 K   sx   t d |d }|d }| jjj}t||}t||\}}}	}
| jjjr4t|d ||}t	d|  t
|||	|
fS )NzGenerate scene .......r   	shot2keyfinput_video_pthzSplit scene video saved to )r   r   r;   r   save_thresholdr   r   save_split_scener   printr   )rO   rX   rQ   	pred_dictr   thres	anno_dictscene_dict_lst
scene_listr   shot_dict_lstre_dirr$   r$   r%   postprocess   s   



z'MovieSceneSegmentationModel.postprocessc                    s   g }t |D ]\}} fdd|D }tj|dd}|| qtj|dd}g }t |D ]\}}|| }	||	 }
||
 q-|S )Nc                    s   g | ]}  |qS r$   )rJ   )rz   	one_framerO   r$   r%   r      s    
z?MovieSceneSegmentationModel.get_batch_input.<locals>.<listcomp>r   r^   )	enumerater7   r   append)rO   shot_keyf_lstr   rx   single_shot_featr   one_shot	shot_featshot_idx	shot_idx_	_one_shotr$   r   r%   r      s   
z+MovieSceneSegmentationModel.get_batch_inputc           	      C   s   t d | jj|fi | jjj\}}}t d g }t|D ]%\}}t|d t	| j
 | j
d  }t|d|d d }|| q ||||fS )NzBegin shot detect......zShot detect done!shot_idr   r   num_shot)r   r   r   rB   r;   rA   r   intr   r   rM   clipr   )	rO   rX   rw   annor   rx   r   r   r   r$   r$   r%   
preprocess   s   

z&MovieSceneSegmentationModel.preprocess)__name__
__module____qualname__r   r3   r   r   r7   Tensorrv   r   rc   r   r   r   r   r   __classcell__r$   r$   rV   r%   r      s    $.,

r   )2r   r   os.pathpathr4   typingr   r   r   r   r   r7   torch.nnr>   torch.nn.functional
functionalrd   torchvision.transforms
transformsrC   PILr   shotdetect_scenedetect_lgssr   r   modelscope.metainfor   'modelscope.models.base.base_torch_modelr   modelscope.models.builderr	   modelscope.utils.configr
   modelscope.utils.constantr   r   modelscope.utils.loggerr   	get_modelr   r   utils.save_opr   r   r   r   register_modulemovie_scene_segmentationresnet50_bertr   r$   r$   r$   r%   <module>   s4   