o
    پiac                     @   sD  d Z ddlmZ ddlmZmZmZmZ ddlZddl	m
  mZ ddlm
Z
 ddlmZmZ ddlmZmZmZmZmZmZ ddlmZ d	d
lmZ d	dlmZ d	dlmZmZ d	dlm Z m!Z! dgZ"d7ddZ#G dd de
j$Z%d7ddZ&G dd de
j$Z'G dd de
j$Z(G dd de
j$Z)G dd de
j$Z*G dd de
j$Z+G dd  d e
j$Z,G d!d de
j$Z-d"d# Z.d8d%d&Z/d9d(d)Z0e e0d*d+e0d*d+e0d*d+e0d*d,d-d.d/e0d*d,d-d.d/e0d*d,d-d.d/e0d*d+e0d*d+e0d*d+e0d*d,d-d.d/e0d*d,d-d.d/e0d*d,d-d.d/d0Z1e!d8d1d2Z2e!d8d3d4Z3e!d8d5d6Z4dS ):z Next-ViT

As described in https://arxiv.org/abs/2207.05501

Next-ViT model defs and weights adapted from https://github.com/bytedance/Next-ViT, original copyright below
    )partial)ListOptionalTupleUnionN)nnIMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)DropPathtrunc_normal_ConvMlpget_norm_layerget_act_layeruse_fused_attn)ClassifierHead   )build_model_with_cfg)feature_take_indices)
checkpointcheckpoint_seq)generate_default_cfgsregister_modelNextViTc                 C   s.  | j j}| jdu rtj| j|jd| }t	|| _| jj}|du rR|j
du s.J d|jdu s7J d|j|jd}||j  }|j|j |j |  }nX|j
du s[J d|jdu sdJ d|j
du smJ d|jdu svJ d|j|jd}	|j|jd}
|	|j  |
 |j  }|
|j  |j|j |j |	  |j  |j }t| tjr|| }||d|d| nDt| tjr|jd dkr|jd	 dksJ ||jd
 |jd }|| }||d|d| ||jd
 |jd dd}|| || j _|| j_dS )z/ Merge pre BN to reduce inference runtime.
    N)deviceTz2Unsupported bn_module.track_running_stats is Falsez%Unsupported bn_module.affine is False      r         r   )weightdatabiastorchzerosout_chsr   typer   	Parametertrack_running_statsaffinerunning_varaddepspowrunning_mean
isinstanceLinearmul_viewsize	expand_asConv2dshapereshapeadd_)modulepre_bn_1pre_bn_2r   r"   r    scale_invstdextra_weight
extra_biasscale_invstd_1scale_invstd_2 r?   G/home/ubuntu/.local/lib/python3.10/site-packages/timm/models/nextvit.pymerge_pre_bn   sH   

  
rA   c                       s4   e Zd Zdddejejf fdd	Zdd Z  ZS )ConvNormActr   r   c              	      s>   t t|   tj||||d|dd| _||| _| | _d S )Nr   Fkernel_sizestridepaddinggroupsr    )superrB   __init__r   r3   convnormact)selfin_chsr#   rD   rE   rG   
norm_layer	act_layer	__class__r?   r@   rI   J   s   

zConvNormAct.__init__c                 C   s"   |  |}| |}| |}|S N)rJ   rK   rL   rM   xr?   r?   r@   forward[   s   


zConvNormAct.forward)	__name__
__module____qualname__r   BatchNorm2dReLUrI   rV   __classcell__r?   r?   rQ   r@   rB   I   s    rB   c                 C   sB   |d u r|}t |t| |d  | | }|d|  k r||7 }|S )Nr   g?)maxint)vdivisor	min_valuenew_vr?   r?   r@   _make_divisibleb   s   rc   c                       s,   e Zd Zdejf fdd	Zdd Z  ZS )
PatchEmbedr   c                    s   t t|   |dkr'tjddddd| _tj||dddd| _||| _d S ||krBt	 | _tj||dddd| _||| _d S t	 | _t	 | _t	 | _d S )Nr   )r   r   TF)rE   	ceil_modecount_include_padr   )rD   rE   r    )
rH   rd   rI   r   	AvgPool2dpoolr3   rJ   rK   Identity)rM   rN   r#   rE   rO   rQ   r?   r@   rI   m   s   


zPatchEmbed.__init__c                 C   s   |  | | |S rS   )rK   rJ   rh   rT   r?   r?   r@   rV      s   zPatchEmbed.forward)rW   rX   rY   r   rZ   rI   rV   r\   r?   r?   rQ   r@   rd   l   s
    rd   c                       s2   e Zd ZdZejejf fdd	Zdd Z  Z	S )ConvAttentionz,
    Multi-Head Convolutional Attention
    c              	      sV   t t|   tj||ddd|| dd| _||| _| | _tj||ddd| _d S )Nr   r   FrC   )rD   r    )	rH   rj   rI   r   r3   group_conv3x3rK   rL   
projection)rM   r#   head_dimrO   rP   rQ   r?   r@   rI      s   
zConvAttention.__init__c                 C   s,   |  |}| |}| |}| |}|S rS   )rk   rK   rL   rl   rM   rU   outr?   r?   r@   rV      s
   



zConvAttention.forward)
rW   rX   rY   __doc__r   rZ   r[   rI   rV   r\   r?   r?   rQ   r@   rj      s    
rj   c                       sL   e Zd ZdZdddddejejf fdd	Ze	 dd	 Z
d
d Z  ZS )NextConvBlockz 
    Next Convolution Block
    r               g      @c
           
         s   t t|   || _|| _|| dksJ t||||d| _t||||	d| _t	|| _
||| _t|t|| |d|	d| _t	|| _d| _d S )Nr   )rO   )rO   rP   T)hidden_featuresdropr    rP   F)rH   rq   rI   rN   r#   rd   patch_embedrj   mhcar   attn_drop_pathrK   r   r^   mlpmlp_drop_pathis_fused)
rM   rN   r#   rE   	drop_pathru   rm   	mlp_ratiorO   rP   rQ   r?   r@   rI      s,   




zNextConvBlock.__init__c                 C   s.   | j st| jj| j t | _d| _ d S d S NT)r{   rA   ry   fc1rK   r   ri   rM   r?   r?   r@   reparameterize   s
   

zNextConvBlock.reparameterizec                 C   s@   |  |}|| | | }| |}|| | | }|S rS   )rv   rx   rw   rK   rz   ry   rn   r?   r?   r@   rV      s
   

zNextConvBlock.forwardrW   rX   rY   rp   r   rZ   r[   rI   r!   no_gradr   rV   r\   r?   r?   rQ   r@   rq      s    %
rq   c                       sL   e Zd ZU dZejje ed< dddddde	j
f fdd		Zd
d Z  ZS )EfficientAttentionz-
    Efficient Multi-Head Self Attention
    
fused_attnNrs   Trr   r   c	           	         s   t    || _|d ur|n|| _| j| | _|| _|d | _t | _t	j
|| j|d| _t	j
|| j|d| _t	j
|| j|d| _t	
| j| j| _t	|| _t	|| _|| _|d | _|dkrst	j| j| jd| _||| _d S d | _d | _d S )Nr   )r    r   r   )rD   rE   )rH   rI   dimout_dim	num_headsrm   scaler   r   r   r.   qkr_   projDropout	attn_drop	proj_dropsr_ratioN_ratio	AvgPool1dsrrK   )	rM   r   r   rm   qkv_biasr   r   r   rO   rQ   r?   r@   rI      s(   



zEfficientAttention.__init__c           	      C   s2  |j \}}}| |||| j| jdddd}| jd ur0| |dd}| |dd}| 	||d| j| jdd}| 
||d| j| jdd}| jrftj|||| jra| jjndd}n|| j }||dd }|jdd	}| |}|| }|dd|||}| |}| |}|S )
Nr   r   r   r   rr   )	dropout_pr   )r4   r   r5   r   rm   permuter   	transposerK   r   r_   r   Fscaled_dot_product_attentiontrainingr   pr   softmaxr   r   )	rM   rU   BNCr   r   r_   attnr?   r?   r@   rV      s*   &
""



zEfficientAttention.forward)rW   rX   rY   rp   r!   jitFinalbool__annotations__r   BatchNorm1drI   rV   r\   r?   r?   rQ   r@   r      s   
 #r   c                	       sP   e Zd ZdZdddddddejejf	 fdd	Ze	 d	d
 Z
dd Z  ZS )NextTransformerBlockz 
    Next Transformer Block
    r   r   rs         ?rr   c                    s   t t|   || _|| _|| _tt|| d| _|| j | _	t
|| j|| _|| j| _t| j|||	|
d| _t|| | _t
| j| j	d|d| _t| j	|||d| _t|d|  | _||| _t|t|| ||
d| _t|| _d| _d S )Nrs   )rm   r   r   r   r   )rE   rO   )rm   rO   rP   )rt   rP   ru   F)rH   r   rI   rN   r#   mix_block_ratiorc   r^   mhsa_out_chsmhca_out_chsrd   rv   norm1r   e_mhsar   mhsa_drop_pathrl   rj   rw   mhca_drop_pathnorm2r   ry   rz   r{   )rM   rN   r#   r|   rE   r   r}   rm   r   r   ru   rO   rP   rQ   r?   r@   rI   !  sB   



zNextTransformerBlock.__init__c                 C   s   | j sUt| jj| j | jjd ur.t| jj| j| jj t| jj| j| jj t	 | j_nt| jj| j t| jj| j t	 | _t| j
j| j t	 | _d| _ d S d S r~   )r{   rA   r   r   r   rK   r   r_   r   ri   ry   r   r   r   r?   r?   r@   r   V  s   


z#NextTransformerBlock.reparameterizec                 C   s   |  |}|j\}}}}| |}|||ddd}| | |}||dd|||| }| |}|| | 	| }t
j||gdd}| |}|| | | }|S )Nr   r   r   r   )rv   r4   r   r5   r   r   r   rl   r   rw   r!   catr   rz   ry   )rM   rU   r   r   HWro   r?   r?   r@   rV   g  s   



zNextTransformerBlock.forwardr   r?   r?   rQ   r@   r     s    	5
r   c                	       sN   e Zd Zdddddddejejf	 fdd	Zejj	dd	d
Z
dd Z  ZS )	NextStager   r         ?rr   rs   c                    s   t    d| _g }t|D ]Q\}}|dkr|nd}|| }|| }t|	ttfr-|	| n|	}|tu rEt||||||
||d}|| n|t	u r]t	||||||
|||||d}|| |}qt
j| | _d S )NFr   r   )rE   r|   ru   rm   rO   rP   )	r|   rE   r   rm   r   r   ru   rO   rP   )rH   rI   grad_checkpointing	enumerater-   listtuplerq   appendr   r   
Sequentialblocks)rM   rN   	block_chsblock_typesrE   r   r   ru   r   r|   rm   rO   rP   r   	block_idx
block_typer#   dprlayerrQ   r?   r@   rI   {  sJ   


zNextStage.__init__Tc                 C   s
   || _ d S rS   )r   )rM   enabler?   r?   r@   set_grad_checkpointing  s   
z NextStage.set_grad_checkpointingc                 C   s.   | j rtj st| j|}|S | |}|S rS   )r   r!   r   is_scriptingr   r   rT   r?   r?   r@   rV     s
   
zNextStage.forwardT)rW   rX   rY   r   rZ   r[   rI   r!   r   ignorer   rV   r\   r?   r?   rQ   r@   r   y  s    7r   c                       sB  e Zd Zdddddddddd	d
ejdf fdd	Zdd Zejj	d2ddZ
ejj	d3ddZejj	dejfddZd4dedee fddZ					d5dejdeeeee f  d ed!ed"ed#edeeej eejeej f f fd$d%Z	&		d6deeee f d'ed(efd)d*Zd+d, Zd2d-efd.d/Zd0d1 Z  ZS )7r     avg)@   rs   r   r      
   r   )r   r   r   r   )   r   r   r   皙?rr   rs   r   Nc                    sn  t t|   d| _|| _t|}|d u rttjdd}nt	|}dg d  dg d d  dg g d	 d
 d  dg d d  dg g| _
dd t| j
D | _tg d  tg d d  tg tttttg d
 d  tg d d  tg g| _tt||d dd
||dt|d |d dd||dt|d |d
 dd||dt|d
 |d
 dd
||d| _|d  }}g }d}dd td|t  D }tt D ]2}t|| j
| | j| || || |||
|	|| ||d}| j
| d  }}||g7 }| | 7 }q| | _| _tj| | _||| _t|||d| _ fddtt D | _ | !  d S )NFT)inplace`   r      r      )  r   r   r   i   r      i   r   i   c                 S   s0   g | ]\}}t |d  d|d  d| dqS )r   r   zstages.)num_chs	reductionr7   dict).0iscr?   r?   r@   
<listcomp>  s    

z$NextViT.__init__.<locals>.<listcomp>)rD   rE   rO   rP   r   c                 S   s   g | ]}|  qS r?   )tolist)r   rU   r?   r?   r@   r     s    )rN   r   r   rE   r   r   rm   ru   r   r|   rO   rP   )	pool_typein_featuresnum_classesc                    s$   g | ]}t  d |d  d qS )Nr   )sum)r   idxdepthsr?   r@   r     s   $ )"rH   r   rI   r   r   r   r   r   r[   r   stage_out_chsr   feature_inforq   r   stage_block_typesr   rB   stemr!   linspacer   splitrangelenr   num_featureshead_hidden_sizestagesrK   r   headstage_out_idx_initialize_weights)rM   in_chansr   global_poolstem_chsr   strides	sr_ratiosdrop_path_rateattn_drop_rate	drop_raterm   r   rO   rP   rN   r#   r   r   r   	stage_idxstagerQ   r   r@   rI     sl   "

zNextViT.__init__c                 C   s   |   D ]C\}}t|tjr(t|jdd t|dr'|jd ur'tj	|jd qt|tj
rGt|jdd t|drG|jd urGtj	|jd qd S )Ng{Gz?)stdr    r   )named_modulesr-   r   r.   r   r   hasattrr    init	constant_r3   )rM   nmr?   r?   r@   r     s   zNextViT._initialize_weightsFc                 C   s   t d|rddS ddgdS )Nz^stemz^stages\.(\d+))z^stages\.(\d+)\.blocks\.(\d+)N)z^norm)i )r   r   r   )rM   coarser?   r?   r@   group_matcher  s   zNextViT.group_matcherTc                 C   s"   || _ | jD ]}|j|d qd S )N)r   )r   r   r   )rM   r   r   r?   r?   r@   r   %  s   
zNextViT.set_grad_checkpointingreturnc                 C   s   | j jS rS   )r   fcr   r?   r?   r@   get_classifier+  s   zNextViT.get_classifierr   r   c                 C   s   || _ | jj||d d S )N)r   )r   r   reset)rM   r   r   r?   r?   r@   reset_classifier/  s   zNextViT.reset_classifierNCHWrU   indicesrK   
stop_early
output_fmtintermediates_onlyc                 C   s   |dv sJ dg }t t| j|\}}	| |}t| jd }
tj s'|s+| j}n	| jd|	d  }t|D ]2\}}| jrJtj sJt	||}n||}||v rj||
kre|r]| 
|n|}|| q8|| q8|ro|S ||
krx| 
|}||fS )a   Forward features that returns intermediates.

        Args:
            x: Input image tensor
            indices: Take last n blocks if int, all if None, select matching indices if sequence
            norm: Apply norm layer to compatible intermediates
            stop_early: Stop iterating over blocks when last desired intermediate hit
            output_fmt: Shape of intermediate feature outputs
            intermediates_only: Only return intermediate features
        Returns:

        )r  zOutput shape must be NCHW.r   N)r   r   r   r   r!   r   r   r   r   r   rK   r   )rM   rU   r  rK   r  r  r  intermediatestake_indices	max_indexlast_idxr   feat_idxr   x_interr?   r?   r@   forward_intermediates3  s.   


zNextViT.forward_intermediatesr   
prune_norm
prune_headc                 C   sJ   t t| j|\}}| jd|d  | _|rt | _|r#| dd |S )z@ Prune layers not required for specified intermediates.
        Nr   r    )r   r   r   r   ri   rK   r  )rM   r  r  r  r  r  r?   r?   r@   prune_intermediate_layersh  s   
z!NextViT.prune_intermediate_layersc                 C   s@   |  |}| jrtj st| j|}n| |}| |}|S rS   )r   r   r!   r   r   r   r   rK   rT   r?   r?   r@   forward_featuresx  s   


zNextViT.forward_features
pre_logitsc                 C   s   |r	| j ||dS |  |S )N)r  )r   )rM   rU   r  r?   r?   r@   forward_head  s   zNextViT.forward_headc                 C   s   |  |}| |}|S rS   )r  r   rT   r?   r?   r@   rV     s   

zNextViT.forwardFr   rS   )NFFr  F)r   FT)rW   rX   rY   r   rZ   rI   r   r!   r   r   r  r   Moduler  r^   r   strr  Tensorr   r   r   r   r  r  r  r   rV   r\   r?   r?   rQ   r@   r     sp    Q	 
7
	c                 C   sP   d| v r| S |  }i }t| |  | |  D ]
\}}}}|||< q|S )z$ Remap original checkpoints -> timm zhead.fc.weight)
state_dictzipkeysvalues)r%  modelDout_dictkakbvavbr?   r?   r@   checkpoint_filter_fn  s   *
r0  Fc                 K   sP   t dd t|ddD }|d|}tt| |fttd|dd|}|S )	Nc                 s   s    | ]\}}|V  qd S rS   r?   )r   r   _r?   r?   r@   	<genexpr>  s    z"_create_nextvit.<locals>.<genexpr>r   )r   r   r   r   out_indicesT)flatten_sequentialr3  )pretrained_filter_fnfeature_cfg)r   r   getpopr   r   r0  r   )variant
pretrainedkwargsdefault_out_indicesr3  r)  r?   r?   r@   _create_nextvit  s   
r=  r  c                 K   s   | dddddt tddd
|S )	Nr   )r      r>  )   r?  gffffff?bicubiczstem.0.convzhead.fc)
urlr   
input_size	pool_sizecrop_pctinterpolationmeanr   
first_conv
classifierr   )rA  r;  r?   r?   r@   _cfg  s   rI  ztimm/)	hf_hub_id)r   r   r   )   rK  r   )rJ  rB  rC  rD  )znextvit_small.bd_in1kznextvit_base.bd_in1kznextvit_large.bd_in1kznextvit_small.bd_in1k_384znextvit_base.bd_in1k_384znextvit_large.bd_in1k_384znextvit_small.bd_ssld_6m_in1kznextvit_base.bd_ssld_6m_in1kznextvit_large.bd_ssld_6m_in1kz!nextvit_small.bd_ssld_6m_in1k_384z nextvit_base.bd_ssld_6m_in1k_384z!nextvit_large.bd_ssld_6m_in1k_384c                 K   0   t ddd}t	dd| it |fi |}|S )Nr   r   r   r   nextvit_smallr:  )rN  r   r=  r:  r;  
model_argsr)  r?   r?   r@   rN       rN  c                 K   rL  )N)r   r      r   皙?rM  nextvit_baser:  )rU  rO  rP  r?   r?   r@   rU    rR  rU  c                 K   rL  )N)r   r      r   rT  rM  nextvit_larger:  )rW  rO  rP  r?   r?   r@   rW    rR  rW  rS   r!  )r  )5rp   	functoolsr   typingr   r   r   r   r!   torch.nn.functionalr   
functionalr   	timm.datar	   r
   timm.layersr   r   r   r   r   r   r   _builderr   	_featuresr   _manipulater   r   	_registryr   r   __all__rA   r"  rB   rc   rd   rj   rq   r   r   r   r   r0  r=  rI  default_cfgsrN  rU  rW  r?   r?   r?   r@   <module>   s     
/

:F]E M

/