o
    ߥil"                     @   s   d dl mZ d dlmZmZ d dlZd dlZd dl	m
Z
 d dlmZ d dlmZmZ d dlmZ d dlmZ d dlmZ d dlmZmZ d d	lmZ e Zd
d Zdd Zdd Zdd Zdd Z ej!ej"ej"dG dd deZ#dS )    N)DictUnion)Models)Tensor
TorchModel)MODELS)cpd_auto)PGL_SUM)	ModelFileTasks)
get_loggerc                 C   s  t | t j} t | | j}t|t|jd d dddd\}}|d }t dg||d gf}g }t	t
|d D ]%}|| ||d  d g}|t
|d krZ|| ||d  g}|| q:t t|}g }t	t
|D ]}	||	 d ||	 d  }|| qot t|}
||
fS )Nr      x   g?)ncpvmaxlmin      )nparrayfloat32dotTr   minshapeconcatenaterangelenappendlist)
video_featn_frameKchange_points_temp_change_pointsidxsegmenttemp_n_frame_per_segchange_points_idxn_frame_per_seg r+   g/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/cv/video_summarization/summarizer.pyget_change_points   s0   
r-   c              	      s"   fddt |d D }t |d D ]P}t  d D ]G}|dks%|dkr,d|| |< q||d  |krVt||d  ||d  |||d     ||d  | || |< q||d  | || |< qqg } }t |ddD ] }|| | ||d  | kr|d|d  |||d  8 }qn|S )a   Maximize the value that a knapsack of capacity W can hold. You can either put the item or discard it, there is
    no concept of putting some part of item in the knapsack.

    :param int W: Maximum capacity -in frames- of the knapsack.
    :param list[int] wt: The weights (lengths -in frames-) of each video shot.
    :param list[float] val: The values (importance scores) of each video shot.
    :param int n: The number of the shots.
    :return: A list containing the indices of the selected shots.
    c                    s"   g | ]}d d t  d D qS )c                 S   s   g | ]}d qS )r   r+   .0r$   r+   r+   r,   
<listcomp>;   s    z(knap_sack.<locals>.<listcomp>.<listcomp>r   )r   r.   Wr+   r,   r0   ;   s   " zknap_sack.<locals>.<listcomp>r   r   )r   maxinsert)r2   wtvalnr"   iwselectedr+   r1   r,   	knap_sack1   s&   
(	r<   c                 C   s  g }t t|D ]}| | }|| }|| }|| }	tj|tjd}
|	jtkr-|	tj}	|	d |kr;t	|	|gg}	t t|	d D ]"}|	| |	|d  }}|t|kr]d|
||< qC|| |
||< qCg }g }|D ]"}|
|d |d  d  |
|
|d |d d     ql|d }t|d d d }t|||t|}tj|d d tjd}|D ]}d||| d || d d < q|
| q|S )a   Generate the automatic machine summary, based on the video shots; the frame importance scores; the number of
    frames in the original video and the position of the sub-sampled frames of the original video.

    :param list[np.ndarray] all_shot_bound: The video shots for all the -original- testing videos.
    :param list[np.ndarray] all_scores: The calculated frame importance scores for all the sub-sampled testing videos.
    :param list[np.ndarray] all_nframes: The number of frames for all the -original- testing videos.
    :param list[np.ndarray] all_positions: The position of the sub-sampled frames for all the -original- testing videos.
    :return: A list containing the indices of the selected frames for all the -original- testing videos.
    )dtyper3   r   r   g333333?)r   r   r   zerosr   r=   intastypeint32r   r   meanitemr<   int8)all_shot_bound
all_scoresall_nframesall_positionsall_summariesvideo_index
shot_boundframe_init_scoresn_frames	positionsframe_scoresr9   pos_left	pos_rightshot_imp_scoresshot_lengthsshot
final_shotfinal_max_lengthr;   summaryr+   r+   r,   generate_summaryR   sB   

"rX   c                 C   s.   t | d\}}t |d\}}d|||f }|S )N<   z%02d:%02d:%06.3f)divmod)secondsmshtimer+   r+   r,   transform_time   s   r`   c           
   	   C   s   g }d}d}d}t | D ]\}}|r|du r|}d}q|r*|d }|||g d}q|r@| d dkr@t| d }|||g g }|D ]}	||	t|	d t| t|	d t| gd qD|S )Nr3   FTr   r   )frame
timestamps)	enumerater   r   r`   float)
rW   fpsframes_liststart_frame	end_frameis_summary_framer9   r&   outputsegr+   r+   r,   summary_format   s6   
rl   )module_namec                       s   e Zd Zdef fddZdeeef deeef fddZdeeef deeef fdd	Zdeeef deee	e
ef f fd
dZ  ZS )PGLVideoSummarization	model_dirc                    s   t  j|g|R i | t|tj}t | _t	ddddddd| _
tj r0td| _ntd| _| j
| j| _
| | j
|| _
| jrP| j
  d	S | j
  d	S )
zinitialize the video summarization model from the `model_dir` path.

        Args:
            model_dir (str): the model path.
        i         addabsolute)
input_sizeoutput_sizenum_segmentsheadsfusionpos_enccudacpuN)super__init__ospjoinr
   TORCH_MODEL_FILEnnMSELosslossr	   modeltorchrz   is_availabledevice_deviceto_load_pretrainedtrainingtraineval)selfro   argskwargs
model_path	__class__r+   r,   r}      s&   

zPGLVideoSummarization.__init__inputreturnc                 C   s.   |d }|d }|  |\}}d| ||iS )Nframe_featuresgtscorer   )r   r   )r   r   r   r   predsattn_weightsr+   r+   r,   _train_forward   s   z$PGLVideoSummarization._train_forwardc                 C   s   |d }|  |\}}d|iS )Nr   scores)r   )r   r   r   yr   r+   r+   r,   _inference_forward   s   z(PGLVideoSummarization._inference_forwardc                 C   s@   |  D ]\}}|| | j||< q| jr| |S | |S )zreturn the result by the model

        Args:
            input (Dict[str, Tensor]): the preprocessed data

        Returns:
            Dict[str, Union[list, Tensor]]: results
        )itemsr   r   r   r   r   )r   r   keyvaluer+   r+   r,   forward   s
   


zPGLVideoSummarization.forward)__name__
__module____qualname__strr}   r   r   r   r   r   r   r   __classcell__r+   r+   r   r,   rn      s    "

rn   )$os.pathpathr~   typingr   r   numpyr   r   torch.nnr   modelscope.metainfor   modelscope.models.baser   r   modelscope.models.builderr   5modelscope.models.cv.video_summarization.kts.cpd_autor   0modelscope.models.cv.video_summarization.pgl_sumr	   modelscope.utils.constantr
   r   modelscope.utils.loggerr   loggerr-   r<   rX   r`   rl   register_modulevideo_summarizationrn   r+   r+   r+   r,   <module>   s,   !8!