o
    ߥi5                     @   sT  d Z ddlZddlZddlZddlmZ ddlm  mZ	 ddl
mZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZmZ ddlmZ e ZG d	d
 d
ejZejejejdG dd deZG dd deZG dd dejZ G dd dejZ!G dd dejZ"G dd dejZ#G dd dejZ$G dd deZ%dddZ&dS ) z VideoInpaintingProcess
The implementation here is modified based on STTN,
 originally Apache 2.0 License and publicly available at https://github.com/researchmm/STTN
    N)Models)Model)
TorchModel)MODELS)	ModelFileTasks)
get_loggerc                       s.   e Zd Z fddZdd Zd	ddZ  ZS )
BaseNetworkc                    s   t t|   d S N)superr	   __init__)self	__class__ j/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/cv/video_inpainting/inpainting_model.pyr      s   zBaseNetwork.__init__c                 C   sN   t | tr	| d } d}|  D ]}|| 7 }qtdt| j|d f  d S )Nr   zoNetwork [%s] was created. Total number of parameters: %.1f million. To see the architecture, do print(network).i@B )
isinstancelist
parametersnumelprinttype__name__)r   
num_paramsparamr   r   r   print_network   s   
zBaseNetwork.print_networknormal{Gz?c                    s@    fdd}|  | |  D ]}t|dr|  qdS )z
        initialize network's weights
        init_type: normal | xavier | kaiming | orthogonal
        https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix/blob/9451e70673400885567d08a9e97ade2524c700d0/models/networks.py#L39
        c                    s  | j j}|ddkr7t| dr| jd urtj| jjd t| dr3| j	d ur5tj| j	jd d S d S d S t| dr|ddksJ|ddkrd	krYtj
| jjd  nLd
krhtjj| jj d n=dkrwtjj| jjdd n.dkrtjj| jjddd ndkrtjj| jj d ndkr|   ntd t| dr| j	d urtj| j	jd d S d S d S d S d S )NInstanceNorm2dweightg      ?biasg        ConvLinearr   xavier)gainxavier_uniformkaimingr   fan_in)amode
orthogonalnonez-initialization method [%s] is not implemented)r   r   findhasattrr    nninit	constant_datar!   normal_xavier_normal_xavier_uniform_kaiming_normal_orthogonal_reset_parametersNotImplementedError)m	classnamer%   	init_typer   r   	init_func/   s>   
z+BaseNetwork.init_weights.<locals>.init_funcinit_weightsN)applychildrenr.   r?   )r   r=   r%   r>   r:   r   r<   r   r?   (   s   

zBaseNetwork.init_weights)r   r   )r   
__module____qualname__r   r   r?   __classcell__r   r   r   r   r	      s    r	   )module_namec                       s   e Zd Zd fdd	Z  ZS )VideoInpaintingr   c                    s   t  j|||d| t | _tj rd}nd}tjd|t	j
|d}| j|d  | j  || _| jdkrUtj rU| jd| j td	| j d S d
| _td d S )N)	model_dir	device_idcudacpuz{}/{})map_locationnetGr   zcuda:{}zUse GPU: {}r   zUse CPU for inference)r   r   InpaintGeneratormodeltorchrI   is_availableloadformatr   TORCH_MODEL_BIN_FILEload_state_dictevalrH   tologgerinfo)r   rG   rH   argskwargsdevicepretrained_paramsr   r   r   r   V   s*   

zVideoInpainting.__init__)r   )r   rB   rC   r   rD   r   r   r   r   rF   R   s    rF   c                       s.   e Zd Zd fdd	Zdd Zdd Z  ZS )	rM   Tc                    sR  t t|   d}d}g d}g }t|D ]}|t||d qtj| | _ttj	dddddd	tj
d
ddtj	dddddd	tj
d
ddtj	dddddd	tj
d
ddtj	d|dddd	tj
d
dd| _tt|ddddtj
d
ddtj	dddddd	tj
d
ddtdddddtj
d
ddtj	dddddd	| _|r|   d S d S )N      ))0      )      )rb      )rc      )hidden   @   rd      kernel_sizestridepadding皙?Tinplace   rj   rl   )r   rM   r   rangeappendTransformerBlockr/   
SequentialtransformerConv2d	LeakyReLUencoderdeconvdecoderr?   )r   r?   channel	stack_num	patchsizeblocks_r   r   r   r   n   s<   	zInpaintGenerator.__init__c                 C   s   |  \}}}}}||| d||}| ||| |||}|  \}	}}}tj|dd}| ||||dd }| |}
t|
}
|
S )Nrh         ?scale_factorxr:   bcr   )	sizeviewry   Finterpolaterv   r{   rO   tanh)r   masked_framesmasksr   tr   hwenc_featr   outputr   r   r   forward   s    

zInpaintGenerator.forwardc           	      C   sZ   |  \}}}}|||||}tj|dd}|  \}}}}| ||d|dd }|S )Nr   r   rh   r   r   )r   r   r   r   rv   )	r   featr   r   r   r   r   r   r   r   r   r   infer   s   zInpaintGenerator.inferT)r   rB   rC   r   r   r   rD   r   r   r   r   rM   l   s    !rM   c                       s*   e Zd Z		d fdd	Zdd Z  ZS )rz   rf   r   c                    s$   t    tj|||d|d| _d S )Nrh   ri   )r   r   r/   rw   conv)r   input_channeloutput_channelrj   rl   r   r   r   r      s   
zdeconv.__init__c                 C   s    t j|dddd}| |}|S )Nrd   bilinearT)r   r*   align_corners)r   r   r   r   r   r   r   r   r      s
   
zdeconv.forward)rf   r   r   rB   rC   r   r   rD   r   r   r   r   rz      s
    rz   c                   @   s   e Zd ZdZdd ZdS )	Attentionz/
    Compute 'Scaled Dot Product Attention
    c                 C   sR   t ||ddt|d }||d tj|dd}t ||}||fS )Nr   g    edim)	rO   matmul	transposemathsqrtr   masked_fillr   softmax)r   querykeyvaluer:   scoresp_attnp_valr   r   r   r      s   zAttention.forwardN)r   rB   rC   __doc__r   r   r   r   r   r      s    r   c                       s(   e Zd ZdZ fddZdd Z  ZS )MultiHeadedAttentionz1
    Take in model size and number of heads.
    c                    s~   t    || _tj||ddd| _tj||ddd| _tj||ddd| _ttj||dddtj	ddd| _
t | _d S )Nrh   r   rq   rf   rm   Trn   )r   r   r~   r/   rw   query_embeddingvalue_embeddingkey_embeddingru   rx   output_linearr   	attention)r   r~   d_modelr   r   r   r      s    
zMultiHeadedAttention.__init__c              
   C   sp  |  \}}}}|| }	|t| j }
g }| |}| |}| |}t| jtj|t| jddtj|t| jddtj|t| jddD ]\\}}}}}|| || }}|	||	d||||}|
ddddddd 	||	| | || }|d	d
kdd|	| | d}|	||	|
||||}|
ddddddd 	||	| | |
| | }|	||	|
||||}|
ddddddd 	||	| | |
| | }|	||	|
||||}|
ddddddd 	||	| | |
| | }| ||||\}}|	||	|||
||}|
ddddddd 	||
||}|| qGt|d}| |}|S )Nrh   r   r   rf      rd   rc   r^   r   g      ?)r   lenr~   r   r   r   ziprO   chunkr   permute
contiguousmean	unsqueezerepeatr   rs   catr   )r   r   r:   r   r   btr   r   r   r   d_kr   _query_key_valuewidthheightr   r   r   out_wout_hmmyr   r   r   r      sp   





&
zMultiHeadedAttention.forwardr   rB   rC   r   r   r   rD   r   r   r   r   r      s    r   c                       s$   e Zd Z fddZdd Z  ZS )FeedForwardc              
      sV   t t|   ttj||ddddtjdddtj||dddtjddd| _d S )	Nrf   rd   )rj   rl   dilationrm   Trn   rh   rq   )r   r   r   r/   ru   rw   rx   r   )r   r   r   r   r   r     s   
zFeedForward.__init__c                 C   s   |  |}|S r
   )r   r   r   r   r   r     s   
zFeedForward.forwardr   r   r   r   r   r     s    r   c                       s*   e Zd ZdZd fdd	Zdd Z  ZS )rt   zS
    Transformer = MultiHead_Attention + Feed_Forward with sublayer connection
    rp   c                    s&   t    t||d| _t|| _d S )N)r   )r   r   r   r   r   feed_forward)r   r~   re   r   r   r   r   !  s   
zTransformerBlock.__init__c                 C   sT   |d |d |d |d f\}}}}||  |||| }|| | }||||dS )Nr   r:   r   r   r   )r   r   )r   r   r:   r   r   r   r   r   r   &  s   $zTransformerBlock.forward)rp   r   r   r   r   r   rt     s    rt   c                       s.   e Zd Z				d fdd	Zdd Z  ZS )	Discriminatorrf   FTc                    s<  t t|   || _d}tttj||d ddd| d|tjdddttj|d |d	 ddd| d
|tjdddttj|d	 |d ddd| d
|tjdddttj|d |d ddd| d
|tjdddttj|d |d ddd| d
|tjdddtj|d |d dddd| _	|r| 
  d S d S )Nrg   rh   )rf   r   r   )rh   rd   rd   )in_channelsout_channelsrj   rk   rl   r!   rm   Trn   rd   )rj   rk   rl   r!   rc   ri   )r   r   r   use_sigmoidr/   ru   spectral_normConv3drx   r   r?   )r   r   r   use_spectral_normr?   nfr   r   r   r   /  s   5zDiscriminator.__init__c                 C   sD   t |dd}|d}| |}| jrt |}t |dd}|S )Nr   rh   rd   )rO   r   r   r   r   sigmoid)r   xsxs_tr   outr   r   r   r   p  s   


zDiscriminator.forward)rf   FTTr   r   r   r   r   r   -  s    Ar   Tc                 C   s   |rt | S | S r
   )_spectral_norm)moduler*   r   r   r   r   z  s   r   r   )'r   r   numpynprO   torch.nnr/   torch.nn.functional
functionalr   torchvision.modelsmodelsmodelscope.metainfor   modelscope.modelsr   modelscope.models.baser   modelscope.models.builderr   modelscope.utils.constantr   r   modelscope.utils.loggerr   rW   Moduler	   register_modulevideo_inpaintingrF   rM   rz   r   r   r   rt   r   r   r   r   r   r   <module>   s6    :A<M