o
    پi{\                     @   sb  d Z ddlZddlmZ ddlmZmZmZmZm	Z	 ddl
Z
ddlmZ ddlmZmZ ddlmZmZmZmZmZmZmZmZmZmZmZ ddlmZ dd	lmZ dd
l m!Z! ddl"m#Z#m$Z$ ddl%m&Z&m'Z' dgZ(G dd dej)Z*G dd dej)Z+G dd dej)Z,G dd dej)Z-G dd dej)Z.dee/e/f fddZ0e!dee/e/f dee/e/f fddZ1G dd dej)Z2G d d! d!ej)Z3G d"d# d#ej)Z4G d$d dej)Z5d=d&d'Z6d>d)d*Z7e'e7d+d,e7d-d,e7d.d,e7d/d,e7d0d,d1Z8e&d=d2e5fd3d4Z9e&d=d2e5fd5d6Z:e&d=d2e5fd7d8Z;e&d=d2e5fd9d:Z<e&d=d2e5fd;d<Z=dS )?a)   Global Context ViT

From scratch implementation of GCViT in the style of timm swin_transformer_v2_cr.py

Global Context Vision Transformers -https://arxiv.org/abs/2206.09959

@article{hatamizadeh2022global,
  title={Global Context Vision Transformers},
  author={Hatamizadeh, Ali and Yin, Hongxu and Kautz, Jan and Molchanov, Pavlo},
  journal={arXiv preprint arXiv:2206.09959},
  year={2022}
}

Free of any code related to NVIDIA GCVit impl at https://github.com/NVlabs/GCVit.
The license for this code release is Apache 2.0 with no commercial restrictions.

However, weight files adapted from NVIDIA GCVit impl ARE under a non-commercial share-alike license
(https://creativecommons.org/licenses/by-nc-sa/4.0/) until I have a chance to train new ones...

Hacked together by / Copyright 2022, Ross Wightman
    N)partial)CallableListOptionalTupleUnionIMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)DropPath	to_2tuple	to_ntupleMlpClassifierHeadLayerNorm2dget_attnget_act_layerget_norm_layer
RelPosBias_assert   )build_model_with_cfg)feature_take_indices)register_notrace_function)named_apply
checkpoint)register_modelgenerate_default_cfgsGlobalContextVitc                       s6   e Zd ZdZddddejf fdd	Zdd	 Z  ZS )
MbConvBlockzR A depthwise separable / fused mbconv style residual block with SE, `no norm.
    Ng      ?seFc           	   	      s   t    t|d}t|tr|dks|dkrd|d< d|d< t|}|p&|}t|| }tj||dd	d	||d
| _	| | _
||fi || _tj||d	d	d|d| _d S )N	act_layerr    ecag      ?rd_ratioFbias   r   )groupsr%   r   r%   )super__init__dict
isinstancestrr   intnnConv2dconv_dwactr    conv_pw)	selfin_chsout_chsexpand_ratio
attn_layerr%   r"   attn_kwargsmid_chs	__class__ E/home/ubuntu/.local/lib/python3.10/site-packages/timm/models/gcvit.pyr*   ,   s   
	
zMbConvBlock.__init__c                 C   s8   |}|  |}| |}| |}| |}|| }|S N)r1   r2   r    r3   )r4   xshortcutr=   r=   r>   forwardC   s   



zMbConvBlock.forward)	__name__
__module____qualname____doc__r/   GELUr*   rB   __classcell__r=   r=   r;   r>   r   )   s    r   c                       s0   e Zd Zddejef fdd	Zdd Z  ZS )Downsample2dNconvc                    s   t    |p|}|d ur||nt | _t||d| _|dv s#J |dkr4tj||ddddd| _n!|d	krH||ks>J tj	dddd
| _n||ksNJ tj
dd| _|d ur`||| _d S t | _d S )Nr!   )rJ   maxavgrJ   r&      r   Fr(   rK   kernel_sizestridepaddingrO   )r)   r*   r/   Identitynorm1r   
conv_blockr0   	reduction	MaxPool2d	AvgPool2dnorm2)r4   dimdim_outrV   r"   
norm_layerr;   r=   r>   r*   N   s   
$zDownsample2d.__init__c                 C   s,   |  |}| |}| |}| |}|S r?   )rT   rU   rV   rY   r4   r@   r=   r=   r>   rB   f   s
   



zDownsample2d.forward)	rC   rD   rE   r/   rG   r   r*   rB   rH   r=   r=   r;   r>   rI   M   s    rI   c                       s.   e Zd Zddejf fdd	Zdd Z  ZS )FeatureBlockr   rK   c                    s   t    |}td|}|dkrttjdd}n	ttjdddd}t | _t	|D ]%}| j
d|d  t||d |rO| j
d	|d  |  |d8 }q*d S )
Nr   rL   rM   rR   r&   rN   rJ   r!   pool)r)   r*   rK   r   r/   rX   rW   
Sequentialblocksrange
add_moduler   )r4   rZ   levelsrV   r"   
reductionspool_fnir;   r=   r>   r*   o   s   


 zFeatureBlock.__init__c                 C   s
   |  |S r?   )ra   r]   r=   r=   r>   rB      s   
zFeatureBlock.forward)rC   rD   rE   r/   rG   r*   rB   rH   r=   r=   r;   r>   r^   n   s    r^   c                	       sB   e Zd Zddejefdedededef fddZd	d
 Z	  Z
S )Stemr&   `   r5   r6   r"   r\   c                    s4   t    tj||dddd| _t|||d| _d S )Nr&   rM   r   rN   )r"   r\   )r)   r*   r/   r0   conv1rI   down)r4   r5   r6   r"   r\   r;   r=   r>   r*      s   
zStem.__init__c                 C      |  |}| |}|S r?   )rj   rk   r]   r=   r=   r>   rB         

zStem.forward)rC   rD   rE   r/   rG   r   r.   r   r*   rB   rH   r=   r=   r;   r>   rh      s    rh   c                       sb   e Zd Z				ddededeeef dededed	ef fd
dZddee	j
 fddZ  ZS )WindowAttentionGlobalT        rZ   	num_headswindow_size
use_globalqkv_bias	attn_drop	proj_dropc                    s   t    t|}|| _|| _|| | _| jd | _|| _t||d| _	| jr3t
j||d |d| _nt
j||d |d| _t
|| _t
||| _t
|| _d S )Ng      )rq   rp   rM   r(   r&   )r)   r*   r   rq   rp   head_dimscalerr   r   rel_posr/   LinearqkvDropoutrt   projru   )r4   rZ   rp   rq   rr   rs   rt   ru   r;   r=   r>   r*      s   


zWindowAttentionGlobal.__init__Nq_globalc                 C   sV  |j \}}}| jrV|d urVt|j d |j d kd | |}|||d| j| jddddd}|d\}}|	||j d  ddd}	|	||| j| jdddd}	n| |||d| j| jddddd}
|
d\}	}}|	| j
 }	|	|dd  }| |}|jdd	}| |}|| dd|||}| |}| |}|S )
Nz*x and q_global seq lengths should be equalrM   r   r&   r      )rZ   )shaperr   r   rz   reshaperp   rv   permuteunbindrepeatrw   	transpose
contiguousrx   softmaxrt   r|   ru   )r4   r@   r}   BNCkvkvqrz   attnr=   r=   r>   rB      s&   
$"*




zWindowAttentionGlobal.forward)TTro   ro   r?   )rC   rD   rE   r.   r   boolfloatr*   r   torchTensorrB   rH   r=   r=   r;   r>   rn      s*    
rn   rq   c                 C   sj   | j \}}}}| |||d  |d ||d  |d |} | dddddd d|d |d |}|S )Nr   r   r&   rM   r      r~   r   viewr   r   )r@   rq   r   HWr   windowsr=   r=   r>   window_partition   s   ,,r   img_sizec                 C   sf   |\}}| j d }| d||d  ||d  |d |d |}|dddddd d|||}|S )Nr~   r   r   r&   rM   r   r   r   )r   rq   r   r   r   r   r@   r=   r=   r>   window_reverse   s
   
,$r   c                       s&   e Zd Zd fdd	Zdd Z  ZS )
LayerScaleh㈵>Fc                    s*   t    || _t|t| | _d S r?   )r)   r*   inplacer/   	Parameterr   onesgamma)r4   rZ   init_valuesr   r;   r=   r>   r*      s   
zLayerScale.__init__c                 C   s   | j r	|| jS || j S r?   )r   mul_r   r]   r=   r=   r>   rB         zLayerScale.forward)r   F)rC   rD   rE   r*   rB   rH   r=   r=   r;   r>   r      s    r   c                       s   e Zd Zddddddddeejejfdedeeef ded	ed
e	de
de
dee	 de	de	de	dededef fddZddeej fddZddeej fddZ  ZS )GlobalContextVitBlock         @TNro   rZ   	feat_sizerp   rq   	mlp_ratiorr   rs   layer_scaleru   rt   	drop_pathr8   r"   r\   c              	      s  t    t|}t|}|| _t|d |d  |d |d   | _||| _|||||||
|	d| _|d ur=t||nt	
 | _|dkrJt|nt	
 | _||| _t|t|| ||	d| _|d urjt||nt	
 | _|dkrzt|| _d S t	
 | _d S )Nr   r   )rp   rq   rr   rs   rt   ru   ro   )in_featureshidden_featuresr"   drop)r)   r*   r   rq   r.   num_windowsrT   r   r   r/   rS   ls1r   
drop_path1rY   r   mlpls2
drop_path2)r4   rZ   r   rp   rq   r   rr   rs   r   ru   rt   r   r8   r"   r\   r;   r=   r>   r*      s*   
&
	
$zGlobalContextVitBlock.__init__r}   c           	      C   sZ   |j \}}}}t|| j}|d| jd | jd  |}| ||}t|| j||f}|S )Nr~   r   r   )r   r   rq   r   r   r   )	r4   r@   r}   r   r   r   r   x_winattn_winr=   r=   r>   _window_attn  s   z"GlobalContextVitBlock._window_attnc              
   C   sF   ||  | | | || }|| | | | | }|S r?   )r   r   r   rT   r   r   r   rY   )r4   r@   r}   r=   r=   r>   rB     s   " zGlobalContextVitBlock.forwardr?   )rC   rD   rE   rn   r/   rG   	LayerNormr.   r   r   r   r   r   r*   r   r   r   rB   rH   r=   r=   r;   r>   r      sV    
	
)r   c                !       s   e Zd Zdddddddddejejefdededeeef d	eeef d
e	de	de	de
de	dee
 de
de
deee
 e
f dededef  fddZdd Z  ZS )GlobalContextVitStageTFr   Nro   depthrp   r   rq   
downsampleglobal_norm
stage_normr   rs   r   ru   rt   r   r"   r\   norm_layer_clc                    s   t    |r"td |d| _d d d d d fnt | _| _ttt	
tt }t|| _|rGnt | _t 	
fddt|D | _|rlnt | _| _| _d| _d S )NrM   )rZ   r[   r\   r   r   c                    sH   g | ] }t 
|d  dk	ttr| n dqS )rM   r   )rZ   rp   r   rq   r   rs   rr   r   ru   rt   r   r"   r\   )r   r,   list).0rg   r"   rt   rZ   r   r   r   r   r   rp   ru   rs   rq   r=   r>   
<listcomp>K  s$    
z2GlobalContextVitStage.__init__.<locals>.<listcomp>F)r)   r*   rI   r   r/   rS   r   r   r.   mathlog2minr^   global_blockr   
ModuleListrb   ra   normrZ   grad_checkpointing)r4   rZ   r   rp   r   rq   r   r   r   r   rs   r   ru   rt   r   r"   r\   r   feat_levelsr;   r   r>   r*   %  s,   

$

zGlobalContextVitStage.__init__c                 C   s   |  |}| |}|dddd}| |dddd}| jD ]}| jr1tj s1t	|||}q |||}q | 
|}|dddd }|S )Nr   rM   r&   r   )r   r   r   r   ra   r   r   jitis_scriptingr   r   r   )r4   r@   global_queryblkr=   r=   r>   rB   b  s   



zGlobalContextVitStage.forward)rC   rD   rE   r/   rG   r   r   r.   r   r   r   r   r   r   r   r*   rB   rH   r=   r=   r;   r>   r   $  s^    

	
=r   c                )       s  e Zd Z											
											dMdedededeeef deedf deedf dedeedf deedf dededee ded ed!ed"ed#ed$ed%ed&ef( fd'd(Z	dNd*d+Z
ejjd,d- ZejjdOd/d0ZejjdPd1d2Zejjd3ejfd4d5ZdQdedee fd6d7Z		.	.	8	.dRd9ejd:eeeee f  d;ed<ed=ed>ed3eeej eejeej f f fd?d@Z	A	.	dSd:eeee f dBedCefdDdEZd9ejd3ejfdFdGZdOdHefdIdJZd9ejd3ejfdKdLZ  ZS )Tr   r&     rL          r      r   N@   r&   r      r   rM   r      r         @Tro    gelulayernorm2d	layernormr   in_chansnum_classesglobal_poolr   window_ratio.rq   	embed_dimdepthsrp   r   rs   r   	drop_rateproj_drop_rateattn_drop_ratedrop_path_rater"   r\   r   norm_epsc              
      s8  t    t|}tt||d}tt||d}g | _t  tdd  D }|| _|| _	|| _
t|}t|d|d    | _| _|d urOt||}n|d usUJ t fddt||D }t||||d| _d	d td
|t||D }g }t|D ]v}||d k}dt|d d
 }|td i d|| d|| d|	| d|d
 | |d | fd|| d|d
kd|d|
d|d|d|d|d|| d|d|d| |  jt|d jd|d  d| dg7  _qtj| | _t| j|||d| _ |rt!t| j"|d|  d S d S )!N)epsc                 s   s    | ]}|d  V  qdS )r   Nr=   )r   dr=   r=   r>   	<genexpr>      z,GlobalContextVit.__init__.<locals>.<genexpr>rM   r   c                    s$   g | ]} d  |  d | fqS )r   r   r=   )r   rr   r=   r>   r     s   $ z-GlobalContextVit.__init__.<locals>.<listcomp>)r5   r6   r"   r\   c                 S   s   g | ]}|  qS r=   )tolist)r   r@   r=   r=   r>   r     s    r   rZ   r   rp   r   rq   r   r   r   rs   r   ru   rt   r   r"   r\   r   r~   zstages.)num_chsrV   module	pool_typer   )schemer=   )#r)   r*   r   r   r   feature_infor   tupler   r   r   lenr.   num_featureshead_hidden_sizer   rh   stemr   linspacesumsplitrb   rK   appendr   r+   rZ   r/   r`   stagesr   headr   _init_weights)r4   r   r   r   r   r   rq   r   r   rp   r   rs   r   r   r   r   r   weight_initr"   r\   r   r   r   
num_stagesdprr  rg   
last_stagestage_scaler;   r   r>   r*   u  s   
"	
0zGlobalContextVit.__init__vitc                 C   s   |dkr2t |tjr.tj|j |jd ur0d|v r%tjj|jdd d S tj|j d S d S d S t |tjrOtjj|jdd |jd urQtj|j d S d S d S )Nr	  r   gư>)stdg{Gz?)	r,   r/   ry   initxavier_uniform_weightr%   normal_zeros_)r4   r   namer   r=   r=   r>   r    s   

zGlobalContextVit._init_weightsc                 C   s   dd |   D S )Nc                    s*   h | ]\ }t  fd ddD r qS )c                 3   s    | ]}| v V  qd S r?   r=   )r   nr   r=   r>   r     r   z=GlobalContextVit.no_weight_decay.<locals>.<setcomp>.<genexpr>)relative_position_bias_tablezrel_pos.mlp)any)r   _r=   r  r>   	<setcomp>  s    z3GlobalContextVit.no_weight_decay.<locals>.<setcomp>)named_parametersr4   r=   r=   r>   no_weight_decay  s   z GlobalContextVit.no_weight_decayFc                 C   s   t ddd}|S )Nz^stemz^stages\.(\d+))r   ra   )r+   )r4   coarsematcherr=   r=   r>   group_matcher  s
   zGlobalContextVit.group_matcherc                 C   s   | j D ]}||_qd S r?   )r  r   )r4   enablesr=   r=   r>   set_grad_checkpointing  s   
z'GlobalContextVit.set_grad_checkpointingreturnc                 C   s   | j jS r?   )r  fcr  r=   r=   r>   get_classifier  s   zGlobalContextVit.get_classifierc                 C   s2   || _ |d u r| jjj}t| j||| jd| _d S )Nr   )r   r  r   r   r   r   r   )r4   r   r   r=   r=   r>   reset_classifier  s   
z!GlobalContextVit.reset_classifierNCHWr@   indicesr   
stop_early
output_fmtintermediates_onlyc                 C   s   |dv sJ dg }t t| j|\}}	| |}tj s |s$| j}
n	| jd|	d  }
t|
D ]\}}||}||v rB|| q1|rG|S ||fS )a   Forward features that returns intermediates.

        Args:
            x: Input image tensor
            indices: Take last n blocks if int, all if None, select matching indices if sequence
            norm: Apply norm layer to compatible intermediates
            stop_early: Stop iterating over blocks when last desired intermediate hit
            output_fmt: Shape of intermediate feature outputs
            intermediates_only: Only return intermediate features
        Returns:

        )r$  zOutput shape must be NCHW.Nr   )	r   r   r  r   r   r   r   	enumerater   )r4   r@   r%  r   r&  r'  r(  intermediatestake_indices	max_indexr  feat_idxstager=   r=   r>   forward_intermediates  s   

z&GlobalContextVit.forward_intermediatesr   
prune_norm
prune_headc                 C   s<   t t| j|\}}| jd|d  | _|r| dd |S )z@ Prune layers not required for specified intermediates.
        Nr   r   r   )r   r   r  r#  )r4   r%  r0  r1  r+  r,  r=   r=   r>   prune_intermediate_layers  s
   z*GlobalContextVit.prune_intermediate_layersc                 C   rl   r?   )r   r  r]   r=   r=   r>   forward_features+  rm   z!GlobalContextVit.forward_features
pre_logitsc                 C   s   |r	| j ||dS |  |S )N)r4  )r  )r4   r@   r4  r=   r=   r>   forward_head0  r   zGlobalContextVit.forward_headc                 C   rl   r?   )r3  r5  r]   r=   r=   r>   rB   3  rm   zGlobalContextVit.forward)r&   r   rL   r   r   Nr   r   r   r   TNro   ro   ro   ro   r   r   r   r   r   )r	  F)Tr?   )NFFr$  F)r   FT)rC   rD   rE   r.   r-   r   r   r   r   r*   r  r   r   ignorer  r  r  r/   Moduler"  r#  r   r   r   r/  r2  r3  r5  rB   rH   r=   r=   r;   r>   r   t  s    



	


Q
	 
,
Fc                 K   s$   t t| |fdtdddi|}|S )Nfeature_cfg)r   r   rM   r&   T)out_indicesflatten_sequential)r   r   r+   )variant
pretrainedkwargsmodelr=   r=   r>   _create_gcvit9  s   
r@  r   c                 K   s    | dddddt tdddd	|S )
Nr   )r&   r   r   )r   r   g      ?bicubicz
stem.conv1zhead.fcT)urlr   
input_size	pool_sizecrop_pctinterpolationmeanr
  
first_conv
classifierfixed_input_sizer   )rB  r>  r=   r=   r>   _cfgB  s   rK  z}https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-morevit/gcvit_xxtiny_224_nvidia-d1d86009.pth)rB  z|https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-morevit/gcvit_xtiny_224_nvidia-274b92b7.pthz{https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-morevit/gcvit_tiny_224_nvidia-ac783954.pthz|https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-morevit/gcvit_small_224_nvidia-4e98afa2.pthz{https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-morevit/gcvit_base_224_nvidia-f009139b.pth)zgcvit_xxtiny.in1kzgcvit_xtiny.in1kzgcvit_tiny.in1kzgcvit_small.in1kzgcvit_base.in1kr   c                 K   &   t dddd|}tdd| i|S )N)rM   rM      rM   r   r   rp   gcvit_xxtinyr=  r=   )rO  r+   r@  r=  r>  model_kwargsr=   r=   r>   rO  [     rO  c                 K   rL  )N)r&   r   rM  r   r   rN  gcvit_xtinyr=  r=   )rT  rP  rQ  r=   r=   r>   rT  d  rS  rT  c                 K   rL  )Nr   r   rN  
gcvit_tinyr=  r=   )rU  rP  rQ  r=   r=   r>   rU  m  rS  rU  c                 K   ,   t d	dddddd|}td
d| i|S )Nr   )r&   rM        ri   rM   r   r   rp   r   r   r   gcvit_smallr=  r=   )rZ  rP  rQ  r=   r=   r>   rZ  v     rZ  c                 K   rV  )Nr   )r   r   r   r      rM   r   rY  
gcvit_baser=  r=   )r]  rP  rQ  r=   r=   r>   r]    r[  r]  r6  )r   )>rF   r   	functoolsr   typingr   r   r   r   r   r   torch.nnr/   	timm.datar	   r
   timm.layersr   r   r   r   r   r   r   r   r   r   r   _builderr   	_featuresr   _features_fxr   _manipulater   r   	_registryr   r   __all__r8  r   rI   r^   rh   rn   r.   r   r   r   r   r   r   r@  rK  default_cfgsrO  rT  rU  rZ  r]  r=   r=   r=   r>   <module>   sp    4$!8$
8P 
F
	