o
    ߥi;                     @   sr  d dl Z d dlmZ d dlm  mZ d dlmZ d dlm	Z	m
Z
 G dd dejZG dd dejZG dd	 d	ejZG d
d dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd deZG dd deZG dd deZG dd dejZG dd dejZG dd dejZG d d! d!ejZG d"d# d#ejZG d$d% d%ejZdS )&    N)models)cbam
mod_resnetc                       &   e Zd Zd fdd	Zdd Z  ZS )ResBlockNc                    sj   t t|   |d u r|}||krd | _n
tj||ddd| _tj||ddd| _tj||ddd| _d S N      kernel_sizepadding)superr   __init__
downsamplennConv2dconv1conv2selfindimoutdim	__class__ j/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/cv/video_object_segmentation/modules.pyr      s   zResBlock.__init__c                 C   s<   |  t|}| t|}| jd ur| |}|| S N)r   Frelur   r   r   xrr   r   r   forward   s
   

zResBlock.forwardr   __name__
__module____qualname__r   r"   __classcell__r   r   r   r   r      s    r   c                       $   e Zd Z fddZdd Z  ZS )FeatureFusionBlockc                    s2   t    t||| _t|| _t||| _d S r   )r   r   r   block1r   CBAM	attentionblock2r   r   r   r   r   '   s   
zFeatureFusionBlock.__init__c                 C   s6   t ||gd}| |}| |}| || }|S Nr	   )torchcatr*   r,   r-   )r   r    f16r!   r   r   r   r"   .   s
   

zFeatureFusionBlock.forwardr#   r   r   r   r   r)   %       r)   c                       r(   )ValueEncoderSOc                    `   t    tjddd}|j| _|j| _|j| _|j| _|j| _|j	| _	|j
| _
tdd| _d S )NFr	   
pretrained
extra_chan      r   r   r   resnet18r   bn1r   maxpoollayer1layer2layer3r)   fuserr   resnetr   r   r   r   ;      
zValueEncoderSO.__init__c                 C   sf   t ||gd}| |}| |}| |}| |}| |}| |}| |}| 	||}|S r.   
r/   r0   r   r<   r   r=   r>   r?   r@   rA   )r   imagekey_f16maskfr    r   r   r   r"   J   s   






zValueEncoderSO.forwardr#   r   r   r   r   r3   9       r3   c                       r(   )ValueEncoderc                    r4   )NF   r5   r8   r9   r:   rB   r   r   r   r   _   rD   zValueEncoder.__init__c                 C   sh   t |||gd}| |}| |}| |}| |}| |}| |}| |}| 	||}|S r.   rE   )r   rF   rG   rH   other_masksrI   r    r   r   r   r"   n   s   






zValueEncoder.forwardr#   r   r   r   r   rK   ]   rJ   rK   c                       r(   )
KeyEncoderc                    sR   t    tjdd}|j| _|j| _|j| _|j| _|j| _	|j
| _
|j| _d S )NF)r6   )r   r   r   resnet50r   r<   r   r=   r>   res2r?   r@   rB   r   r   r   r      s   
zKeyEncoder.__init__c                 C   sP   |  |}| |}| |}| |}| |}| |}| |}|||fS r   )r   r<   r   r=   rP   r?   r@   )r   rI   r    f4f8r1   r   r   r   r"      s   







zKeyEncoder.forwardr#   r   r   r   r   rN      s    rN   c                       r   )UpsampleBlockrL   c                    s4   t    tj||ddd| _t||| _|| _d S r   )r   r   r   r   	skip_convr   out_convscale_factor)r   skip_cup_cout_crV   r   r   r   r      s   

zUpsampleBlock.__init__c                 C   s0   |  |}|tj|| jddd }| |}|S )NbilinearF)rV   modealign_corners)rT   r   interpolaterV   rU   )r   skip_fup_fr    r   r   r   r"      s   

zUpsampleBlock.forwardrL   r#   r   r   r   r   rS      s    rS   c                       r(   )KeyProjectionc                    sF   t    tj||ddd| _tj| jjj tj	| jj
j d S r   )r   r   r   r   key_projinitorthogonal_weightdatazeros_bias)r   r   keydimr   r   r   r      s   
zKeyProjection.__init__c                 C   s
   |  |S r   )rb   r   r    r   r   r   r"      s   
zKeyProjection.forwardr#   r   r   r   r   ra      r2   ra   c                       s.   e Zd Z				d	 fdd	Zdd Z  ZS )
_NonLocalBlockNDNr   TFc           	   	      sj  t t|   |dv sJ || _|| _|| _|| _| jd u r+|d | _| jdkr+d| _|dkr<tj}tj	dd}tj
}n|dkrMtj}tjdd}tj}ntj}tjdd}tj}|| j| jdddd	| _|r{t|| j| jdddd	|| j| _n|| j| jdddd	| _|| j| jdddd	| _|| j| jdddd	| _|rt| j|| _t| j|| _d S d S )
N)r	   rL   r   rL   r   r	   r   )r	   rL   rL   )r   )rL   rL   )in_channelsout_channelsr   strider   )r   rk   r   	dimension
sub_samplerl   inter_channelsr   Conv3d	MaxPool3dInstanceNorm3dr   	MaxPool2dBatchNorm2dConv1d	MaxPool1dBatchNorm1dg
SequentialWthetaphi)	r   rl   rq   ro   rp   bn_layerconv_ndmax_pool_layerbnr   r   r   r      s   



	z_NonLocalBlockND.__init__c                 C   s   | d}| ||| jd}|ddd}| ||| jd}|ddd}| ||| jd}t||}| d}|| }t||}	|	ddd	 }	|	j|| jg|  dd R  }	| 
|	}
|
| }|S )z<
        :param x: (b, c, t, h, w)
        :return:
        r   rL   r	   N)sizerz   viewrq   permuter}   r~   r/   matmul
contiguousr|   )r   r    
batch_sizeg_xtheta_xphi_xrI   Nf_div_CyW_yzr   r   r   r"     s   

"
z_NonLocalBlockND.forward)Nr   TFr#   r   r   r   r   rk      s    Ork   c                       $   e Zd Z			d fdd	Z  ZS )NONLocalBlock1DNTc                       t t| j||d||d d S )Nr	   rq   ro   rp   r   )r   r   r   r   rl   rq   rp   r   r   r   r   r   ,     

zNONLocalBlock1D.__init__NTTr$   r%   r&   r   r'   r   r   r   r   r   *  
    r   c                       s$   e Zd Z			d fdd	Z  ZS )NONLocalBlock2DNTFc                    r   )NrL   r   )r   r   r   r   r   r   r   r   ;  r   zNONLocalBlock2D.__init__)NTFr   r   r   r   r   r   9  r   r   c                       r   )NONLocalBlock3DNTc                    r   )Nr   r   )r   r   r   r   r   r   r   r   J  r   zNONLocalBlock3D.__init__r   r   r   r   r   r   r   H  r   r   c                       r(   )_ASPPModule3Dc              	      s,   t t|   tj|||d||dd| _d S )Nr	   F)r   rn   r   dilationrh   )r   r   r   r   rr   atrous_conv)r   inplanesplanesr   r   r   r   r   r   r   Y  s   z_ASPPModule3D.__init__c                 C   s   |  |}tj|ddS )NTinplace)r   r   r   rj   r   r   r   r"   d  s   
z_ASPPModule3D.forwardr#   r   r   r   r   r   W  s    r   c                       r   )ASPP3D   c              	      s   t    g d}|| }t||dd|d d| _t||dd|d |d fd|d |d fd| _t||dd|d |d fd|d |d fd| _t||dd|d |d fd|d |d fd| _tj|d |dd	d
d| _	d S )N)r	   rL   r      r	   r   )r   r   r	   r   r   rL   r   r   r   r	   r	   F)r   r   rh   )
r   r   r   aspp1aspp2aspp3aspp4r   rr   r   )r   in_plane	out_plane	reduction	dilations	mid_planer   r   r   r   k  s>   
zASPP3D.__init__c                 C   sV   |  |}| |}| |}| |}tj||||fdd}tj| |dd}|S )Nr	   )dimTr   )	r   r   r   r   r/   r0   r   r   r   )r   r    x1x2x3x4r   r   r   r"     s   



zASPP3D.forward)r   r#   r   r   r   r   r   i  s    r   c                       r   )SELayerS   c              	      sd   t t|   |d }td| _ttj||| ddtjddtj|| |ddt	 | _
d S )NrL   )rL   r	   r	   F)rh   Tr   )r   r   r   r   AdaptiveAvgPool3davg_poolr{   LinearReLUSigmoidfc)r   channelr   r   r   r   r     s   

zSELayerS.__init__c                 C   sN   |  \}}}}}| ||d| }| |||ddd}||| S )NrL   r	   )r   r   r   r   	expand_as)r   r    bc_r   r   r   r   r"     s   zSELayerS.forward)r   r#   r   r   r   r   r     s    	r   c                       s0   e Zd ZdZ			d fdd	Zdd Z  ZS )	SEBasicBlockr	   N   c                    s   t t|   tj||ddd| _t|| _tjdd| _	tj||ddd| _
t|| _t||| _t|| _|| _|| _d S )Nr   r   r
   Tr   )r   r   r   r   rr   r   rt   in1r   r   r   in2r   sesin3r   rn   )r   r   r   rn   r   r   r   r   r   r     s   
zSEBasicBlock.__init__c                 C   st   |}|  |}| |}| |}| |}| |}| |}| |}| jd ur/| |}||7 }| |}|S r   )r   r   r   r   r   r   r   r   )r   r    residualoutr   r   r   r"     s   









zSEBasicBlock.forward)r	   Nr   )r$   r%   r&   	expansionr   r"   r'   r   r   r   r   r     s    r   c                       s4   e Zd ZdZd fdd	Zddd	Zd
d Z  ZS )SAMz2
    Spatio-temporal aggregation module (SAM)
    Nr   Fc                    s\   t t|   || _|| _|d u r|}|dkr| || _t||dd| _t	|dd| _
d S )Nr   r   )r   F)r   )r   r   r   r   repeatseRepeatse_blockr   r   r   	non_local)r   r   r   r   normr   r   r   r     s   zSAM.__init__rL   c                    s"   t jt  fddt|D  S )Nc                    s   g | ]	}t  j jqS r   )r   r   ).0r   r   r   r   
<listcomp>  s    z SAM.seRepeat.<locals>.<listcomp>)r   r{   
ModuleListrange)r   r   r   r   r   r     s   zSAM.seRepeatc                 C   s4   |  |}| jdkr| |}|}| || }|S )Nr   )r   r   r   r   r   r   r   r   r"     s   


zSAM.forward)Nr   Fr`   )r$   r%   r&   __doc__r   r   r"   r'   r   r   r   r   r     s
    
r   c                       s&   e Zd Zd fdd	Zdd Z  ZS )MemCrompressr   Tc                    sZ   t    tdd||d| _tdd||d| _tjddddd| _tjddddd| _d S )N@   )r   r   r9   )rL   r   r   r   r
   )	r   r   r   key_encodervalue_encoderr   rr   compress_keycompress_value)r   r   r   r   r   r   r     s   
zMemCrompress.__init__c                 C   s\   |j \}}}}}}|jddd}| | |}	| | |}
|
|||d||}
|	|
fS )Nr   r	   )	start_dimend_dim)shapeflattenr   r   r   r   r   )r   keyvaluer   OCTHr|   kvr   r   r   r"     s   zMemCrompress.forward)r   Tr#   r   r   r   r   r     s    r   )r/   torch.nnr   torch.nn.functional
functionalr   torchvisionr   .modelscope.models.cv.video_object_segmentationr   r   Moduler   r)   r3   rK   rN   rS   ra   rk   r   r   r   r   r   r   r   r   r   r   r   r   r   <module>   s,   $%l(- 