o
    ߥimC                     @   s\  d dl Zd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	m
Z
 d dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZmZ d dlmZ d d	lmZ d
dlmZ dZG dd dejZ dddZ!G dd dejZ"G dd dejZ#G dd dejZ$G dd dejZ%G dd dejZ&G dd dejZ'e( G dd deZ)dS )     N)partial)DictSequence)	rearrange)BaseBackbone)	BACKBONES)DropPathbuild_activation_layerbuild_norm_layer)
BaseModule)
_BatchNorm   )trunc_normal_gh㈵>c                       s(   e Zd Z	d fdd	Zdd Z  ZS )
ConvBNReLU   c              	      sJ   t t|   tj||||d|dd| _tj|td| _tj	dd| _
d S )Nr   Fkernel_sizestridepaddinggroupsbiasepsTinplace)superr   __init__nnConv2dconvBatchNorm2dNORM_EPSnormReLUact)selfin_channelsout_channelsr   r   r   	__class__ o/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/cv/image_classification/backbones/nextvit.pyr      s   zConvBNReLU.__init__c                 C   s"   |  |}| |}| |}|S N)r   r"   r$   r%   xr*   r*   r+   forward-   s   


zConvBNReLU.forwardr   __name__
__module____qualname__r   r/   __classcell__r*   r*   r(   r+   r      s    r   c                 C   sB   |d u r|}t |t| |d  | | }|d|  k r||7 }|S )Nr   g?)maxint)vdivisor	min_valuenew_vr*   r*   r+   _make_divisible4   s   r<   c                       s&   e Zd Zd fdd	Zdd Z  ZS )
PatchEmbedr   c                    s   t t|   ttjtd}|dkr.tjddddd| _tj	||dddd| _
||| _d S ||krIt | _tj	||dddd| _
||| _d S t | _t | _
t | _d S )	Nr   r   )r   r   TF)r   	ceil_modecount_include_padr   )r   r   r   )r   r=   r   r   r   r    r!   	AvgPool2davgpoolr   r   r"   Identity)r%   r&   r'   r   
norm_layerr(   r*   r+   r   @   s*   




zPatchEmbed.__init__c                 C   s   |  | | |S r,   )r"   r   rA   r-   r*   r*   r+   r/   U   s   zPatchEmbed.forwardr0   r1   r*   r*   r(   r+   r=   >   s    r=   c                       s(   e Zd ZdZ fddZdd Z  ZS )MHCAz,
    Multi-Head Convolutional Attention
    c              	      sj   t t|   ttjtd}tj||ddd|| dd| _||| _	tj
dd| _tj||ddd| _d S )	Nr      r   Fr   Tr   r   r   )r   rD   r   r   r   r    r!   r   group_conv3x3r"   r#   r$   
projection)r%   r'   head_dimrC   r(   r*   r+   r   ^   s    
zMHCA.__init__c                 C   s,   |  |}| |}| |}| |}|S r,   )rG   r"   r$   rH   r%   r.   outr*   r*   r+   r/   n   s
   



zMHCA.forwardr2   r3   r4   __doc__r   r/   r5   r*   r*   r(   r+   rD   Y   s    rD   c                       s.   e Zd Z				d fdd	Zdd Z  ZS )	MlpN        Tc                    sf   t    |p|}t|| d}tj||d|d| _tjdd| _tj||d|d| _t	|| _
d S )N    r   rF   Tr   )r   r   r<   r   r   conv1r#   r$   conv2Dropoutdrop)r%   in_featuresout_features	mlp_ratiorT   r   
hidden_dimr(   r*   r+   r   x   s   
zMlp.__init__c                 C   s6   |  |}| |}| |}| |}| |}|S r,   )rQ   r$   rT   rR   r-   r*   r*   r+   r/      s   




zMlp.forward)NNrO   Tr1   r*   r*   r(   r+   rN   v   s    rN   c                       s4   e Zd ZdZ					d
 fdd	Zdd	 Z  ZS )NCBz 
    Next Convolution Block
    r   r   rP   rE   c           	         s   t t|   || _|| _ttjtd}|| dksJ t	|||| _
t||| _t|| _||| _t|||dd| _t|| _d| _d S )Nr   r   T)rW   rT   r   F)r   rY   r   r&   r'   r   r   r    r!   r=   patch_embedrD   mhcar   attention_path_dropoutr"   rN   mlpmlp_path_dropoutis_bn_merged)	r%   r&   r'   r   path_dropoutrT   rI   rW   rC   r(   r*   r+   r      s   



zNCB.__init__c                 C   sV   |  |}|| | | }tj s| js| |}n|}|| | 	| }|S r,   )
rZ   r\   r[   torchonnxis_in_onnx_exportr_   r"   r^   r]   rJ   r*   r*   r+   r/      s   
zNCB.forward)r   r   r   rP   rE   rL   r*   r*   r(   r+   rY      s    rY   c                       s8   e Zd ZdZ							d fdd		Zd
d Z  ZS )E_MHSAz-
    Efficient Multi-Head Self Attention
    NrP   Tr   rO   r   c	           	         s   t    || _|d ur|n|| _| j| | _|p|d | _tj|| j|d| _tj|| j|d| _	tj|| j|d| _
t| j| j| _t|| _t|| _|| _|d | _|dkrotj| j| jd| _tj|td| _d| _d S )Ng      )r   r   r   r   r   r   F)r   r   dimout_dim	num_headsscaler   Linearqkr8   projrS   	attn_drop	proj_dropsr_ratioN_ratio	AvgPool1dsrBatchNorm1dr!   r"   is_bn_merge)	r%   rf   rg   rI   qkv_biasqk_scalern   ro   rp   r(   r*   r+   r      s&   
	

zE_MHSA.__init__c           
      C   s  |j \}}}| |}|||| jt|| j dddd}| jdkru|dd}| |}t	j
 s<| js<| |}|dd}| |}||d| jt|| j dddd}| |}||d| jt|| j dddd}n2| |}||d| jt|| j dddd}| |}||d| jt|| j dddd}|| | j }	|	jdd}	| |	}	|	| dd|||}| |}| |}|S )Nr   r   r   rE   rf   )shaperk   reshaperh   r7   permuterp   	transposers   ra   rb   rc   ru   r"   rl   r8   ri   softmaxrn   rm   ro   )
r%   r.   BNCrk   x_rl   r8   attnr*   r*   r+   r/      sX   










zE_MHSA.forward)NrP   TNr   rO   r   rL   r*   r*   r(   r+   rd      s    rd   c                       s8   e Zd ZdZ							d fdd	Zd	d
 Z  ZS )NTBz 
    Next Transformer Block
    r   r   rP         ?r   c                    s   t t|   || _|| _|| _ttjt	d}t
t|| d| _|| j | _t|| j|| _|| j| _t| j|||	|
d| _t|| | _t| j| jdd| _t| j|d| _t|d|  | _||| _t|||
d| _t|| _d| _d S )	Nr   rP   )rI   rp   rn   ro   r   )r   )rI   )rW   rT   F)r   r   r   r&   r'   mix_block_ratior   r   r    r!   r<   r7   mhsa_out_channelsmhca_out_channelsr=   rZ   norm1rd   e_mhsar   mhsa_path_dropoutrH   rD   r[   mhca_path_dropoutnorm2rN   r]   r^   r_   )r%   r&   r'   r`   r   rp   rW   rI   r   rn   rT   	norm_funcr(   r*   r+   r     s<   



zNTB.__init__c                 C   s   |  |}|j\}}}}tj s| js| |}n|}t|d}| | 	|}|t|d|d }| 
|}|| | | }tj||gdd}tj sX| jsX| |}n|}|| | | }|S )Nzb c h w -> b (h w) czb (h w) c -> b c h w)hr   ry   )rZ   rz   ra   rb   rc   r_   r   r   r   r   rH   r   r[   catr   r^   r]   )r%   r.   r   r   HWrK   r*   r*   r+   r/   0  s    


zNTB.forward)r   r   r   rP   r   r   r   rL   r*   r*   r(   r+   r      s    	-r   c                       s   e Zd Zg dg dg dg ddZg dg dg dg ddZddd	d	g d
g ddddddddddf fdd	Z fddZdd Zdd Zdd Z	d fdd	Z
  ZS ) NextViT)@   rP   r   )x_smallsmallbaselarge)r   r      r   )rE      
   rE   )rE   r      rE   )rE   r      rE   r   g?r   )r   r   r   r   )   r   r   r   rP   r    TFNrx   c           !         sD  t  j|d | j| }| j| }|| _|
| _|| _dg|d  | _dg|d d  dg | _g d|d d	  | _	d
g|d d  dg | _
| j| j| j	| j
g| _tg|d  | _tg|d d  tg | _tttttg|d d	  | _tg|d d  tg | _| j| j| j| jg| _ttd|d dddt|d |d dddt|d |d dddt|d |d ddd| _|d }g }d}dd td|t|D }tt|D ]j}|| }| j| }| j| }t|D ]Q}|| dkr|dkrd}nd}|| }|| }|tu rt||||||  ||d}|| n|tu r;t|||||  ||| ||||d	}|| |}q||7 }qtj| | _tj|td| _ t!|t"rZ|g}t!|t#sjJ dt$| dt%|D ] \}} | dk rt||  ||< || dksJ d|  qn|| _&|d urtjj'(| } d S d S )N)init_cfg`   r      r      )  r   r   r   i   r   r   i   rE   i   re   rx   c                 S   s   g | ]}|  qS r*   )item).0r.   r*   r*   r+   
<listcomp>  s    z$NextViT.__init__.<locals>.<listcomp>)r   r`   rT   rI   )r`   r   rp   rI   r   rn   rT   r   z-"out_indices" must by a sequence or int, get z	 instead.zInvalid out_indices ))r   r   stem_chsdepthsfrozen_stageswith_extra_norm	norm_evalstage1_out_channelsstage2_out_channelsstage3_out_channelsstage4_out_channelsstage_out_channelsrY   stage1_block_typesr   stage2_block_typesstage3_block_typesstage4_block_typesstage_block_typesr   
Sequentialr   stemra   linspacesumrangelenappendfeaturesr    r!   r"   
isinstancer7   r   type	enumeratestage_out_idxSyncBatchNormconvert_sync_batchnorm)!r%   archr`   rn   rT   strides	sr_ratiosrI   r   resumer   r   norm_cfgout_indicesr   r   r   r   input_channelr   idxdprstage_id	numrepeatoutput_channelsblock_typesblock_idr   output_channel
block_typelayeriindexr(   r*   r+   r   V  s   












zNextViT.__init__c                    s8   t t|   t| jtr| jd dkrd S |   d S )Nr   
Pretrained)r   r   init_weightsr   r   dict_initialize_weights)r%   r(   r*   r+   r     s
   zNextViT.init_weightsc                 C   s   |   D ]]\}}t|tjtjfr"tj|jd tj|jd qt|tj	rBt
|jdd t|drA|jd urAtj|jd qt|tjrat
|jdd t|dra|jd uratj|jd qd S )Ng      ?r   g{Gz?)stdr   )named_modulesr   r   r    rt   init	constant_weightr   rj   r   hasattrr   )r%   nmr*   r*   r+   r     s"   zNextViT._initialize_weightsc                 C   sh   t  }| |}d}t| jD ] \}}||}|| j| kr/| jr&| |}|| |d7 }qt|S )Nr   r   )	listr   r   r   r   r   r"   r   tuple)r%   r.   outputsr   r   r   r*   r*   r+   r/     s   


zNextViT.forwardc                 C   sv   | j dkr7| j  | j D ]}d|_qt| jD ]\}}|| j| j d  kr6|  | D ]}d|_q0qd S d S )Nr   Fr   )r   r   eval
parametersrequires_gradr   r   r   )r%   paramr   r   r*   r*   r+   _freeze_stages  s   

zNextViT._freeze_stagesc                    sN   t t| | |   |r!| jr#|  D ]}t|tr |  qd S d S d S r,   )	r   r   trainr   r   modulesr   r   r   )r%   moder   r(   r*   r+   r     s   

zNextViT.train)T)r2   r3   r4   r   r   r   r   r   r/   r   r   r5   r*   r*   r(   r+   r   G  s@    i	r   r,   )*collections.abccollections	itertoolsmathoswarnings	functoolsr   typingr   r   ra   torch.nnr   einopsr   $mmcls.models.backbones.base_backboner   mmcls.models.builderr   mmcv.cnn.bricksr   r	   r
   mmcv.runnerr   torch.nn.modules.batchnormr   utilsr   r!   Moduler   r<   r=   rD   rN   rY   rd   r   register_moduler   r*   r*   r*   r+   <module>   s6   

'FI