o
    پi
                    @   s  d Z ddlZddlmZ ddlmZmZmZ ddlm	Z	 ddl
mZmZmZmZmZmZmZmZ ddlZddlmZ ddlmZ dd	lmZmZ dd
lmZmZmZmZmZm Z  ddlm!Z!m"Z"m#Z#m$Z$m%Z%m&Z& ddlm'Z'm(Z(m)Z)m*Z*m+Z+ ddlm,Z,m-Z-m.Z.m/Z/m0Z0 ddl1m2Z2 ddl3m4Z4 ddl5m6Z6 ddl7m8Z8m9Z9 ddl:m;Z;m<Z< g dZ=eG dd dZ>eG dd dZ?eG dd dZ@G dd dejAZBG dd dejAZCG dd  d ejAZDG d!d" d"ejAZEG d#d$ d$ejAZFdd&ejAd'eGd(eGd)dfd*d+ZHG d,d- d-ejAZIdd&ejAd'eGd(eGd)dfd.d/ZJd0eeK d1eKd)eKfd2d3ZLG d4d5 d5ejAZMG d6d7 d7ejAZNd8ejOd9eeK d)ejOfd:d;ZPe6d<ejOd9eeK d=eeK d)ejOfd>d?ZQd8ejOd@eeK d)ejOfdAdBZRe6d<ejOd@eeK d=eeK d)ejOfdCdDZSdEe>d9eeKeKf d)ee fdFdGZTG dHdI dIejAZUG dJdK dKejAZVd8ejOd9eeK d)ejOfdLdMZWe6d<ejOd9eeK d=eeK d)ejOfdNdOZXd8ejOd@eeK d)ejOfdPdQZYe6d<ejOd@eeK d=eeK d)ejOfdRdSZZG dTdU dUejAZ[G dVdW dWejAZ\G dXdY dYejAZ]G dZd[ d[ejAZ^G d\d] d]ejAZ_dEe>d=eeKeKf d)e>fd^d_Z`dEe@d`ed)e@fdadbZaG dcdd ddejAZb	e	f	g	g	h	%	i	j	k		l	mddneGdoeGdpecdqecdreGdseGdtecdueGdveGdweed dxeGdyeKd)eeGef fdzd{Ze	|	f	g	}	%	j	k		~		l	mddneGdoeGdpecdeddseGdueGdveGd9eeeKeKf  deKdweed dxeGdyeKd)eeGef fddZf	|	f	j	k	j	k		g			mddneGdoeGdseGdeGdueGdveGd9eeeKeKf  decdweedeededf f dxeGdyeKd)eeGef fddZgd)eeGef fddZheidi de@dddddefdiddde@dddddefdediddde@dddddeedidgdde@dddddeed|didgdde@dddddeed|ddde@dddddeed|dddde@dddddeed|didgddde@dddddefdiddddde@dddddeed|ddde@dddddeeddidgdddde@dddddeed|ddmdde@dddddeed|ddddde@dddddeed|ddddde@ddddddee de@ddddddegdlddde@dddddde@dddddde@dddddde@dddddde@dddddde@dddddde@ddddddҜef de@ddddddҜef de@ddddddҜef de@ddddddҜef de@ddddddҜefddٍde@ddddddҜefddٍde@ddddddҜefddٍde@ddddddҜefdddde@dddddddޜefddٍde@dddddddeg de@ddddddҜeg de@ddddddҜeg de@dddddddegdidldde@ddddddҜegdidde@dddddddޜegdidde@ddddddidmdeh de@ddddddiddeh de@ddddddiddeh de@ddddddiddeh de@ddddddiddeh ZjdeeGejOf dejAd)eeGejOf fddZkddeGdeeG decd`ed)ebf
ddZlddeGd`ed)eeGef fddZme;i d emd%ddemdddddemddd	d
emddd	demdddemdddemdddemdddddddemddeedddemdddddemd%ddemddd	demdd d	d!emd%dd"emd%dd#emdd$ddd%emdd&d'i d(emdd&d'd)emdd&d'd*emdd&d'd+emd%dd,emd%dd-emd%dd.emd%dd/emd%dd0emd%dd1emd%d2d3d4d5emdd6d2d3d7d8emdd9d	d:emd%d2d3d4d;emd%d2d3d4d<emdd=d2d3d7d>emdd?d2d3d7d@emddAd2d3d7i dBemddCdddDemd%d2d3d4dEemdddFemdddddddGemdd&d'dHemddId2d3d7dJemd%d2d3d4dKemddLd2d3d7dMemdd2d3dNdOemdddPemdddddddQemd%ddRemdd&d'dSemdeedTdUemdddddddVemddWdXddddYemdeedTi dZemddddddd[emddWdXdddd\emdeedTd]emddddddd^emddWdXdddd_emdeedTd`emdddddddaemddWdXddddbemddcd'ddemdddddddeemddWdXddddfemddcd'dgemdddddddhemddWdddidjemddcd'dkemdddddddlemddWdXdddZne<ddecd`ed)ebfdmdnZoe<ddecd`ed)ebfdodpZpe<ddecd`ed)ebfdqdrZqe<ddecd`ed)ebfdsdtZre<ddecd`ed)ebfdudvZse<ddecd`ed)ebfdwdxZte<ddecd`ed)ebfdydzZue<ddecd`ed)ebfd{d|Zve<ddecd`ed)ebfd}d~Zwe<ddecd`ed)ebfddZxe<ddecd`ed)ebfddZye<ddecd`ed)ebfddZze<ddecd`ed)ebfddZ{e<ddecd`ed)ebfddZ|e<ddecd`ed)ebfddZ}e<ddecd`ed)ebfddZ~e<ddecd`ed)ebfddZe<ddecd`ed)ebfddZe<ddecd`ed)ebfddZe<ddecd`ed)ebfddZe<ddecd`ed)ebfddZe<ddecd`ed)ebfddZe<ddecd`ed)ebfddZe<ddecd`ed)ebfddZe<ddecd`ed)ebfddZe<ddecd`ed)ebfddZe<ddecd`ed)ebfddZe<ddecd`ed)ebfddZe<ddecd`ed)ebfddZe<ddecd`ed)ebfddZe<ddecd`ed)ebfddZe<ddecd`ed)ebfddZe<ddecd`ed)ebfddZe<ddecd`ed)ebfddZe<ddecd`ed)ebfddZe<ddecd`ed)ebfddZe<ddecd`ed)ebfddZe<ddecd`ed)ebfddZe<ddecd`ed)ebfddZe<ddecd`ed)ebfddZe<ddecd`ed)ebfddZe<ddecd`ed)ebfddZe<ddecd`ed)ebfddZe<ddecd`ed)ebfdÐdĄZe<ddecd`ed)ebfdŐdƄZe<ddecd`ed)ebfdǐdȄZe<ddecd`ed)ebfdɐdʄZe<ddecd`ed)ebfdːd̄Ze<ddecd`ed)ebfd͐d΄Ze<ddecd`ed)ebfdϐdЄZe<ddecd`ed)ebfdѐd҄Ze<ddecd`ed)ebfdӐdԄZe<ddecd`ed)ebfdՐdքZe<ddecd`ed)ebfdאd؄Ze<ddecd`ed)ebfdِdڄZe<ddecd`ed)ebfdېd܄ZdS (  a   MaxVit and CoAtNet Vision Transformer - CNN Hybrids in PyTorch

This is a from-scratch implementation of both CoAtNet and MaxVit in PyTorch.

99% of the implementation was done from papers, however last minute some adjustments were made
based on the (as yet unfinished?) public code release https://github.com/google-research/maxvit

There are multiple sets of models defined for both architectures. Typically, names with a
 `_rw` suffix are my own original configs prior to referencing https://github.com/google-research/maxvit.
These configs work well and appear to be a bit faster / lower resource than the paper.

The models without extra prefix / suffix' (coatnet_0_224, maxvit_tiny_224, etc), are intended to
match paper, BUT, without any official pretrained weights it's difficult to confirm a 100% match.

Papers:

MaxViT: Multi-Axis Vision Transformer - https://arxiv.org/abs/2204.01697
@article{tu2022maxvit,
  title={MaxViT: Multi-Axis Vision Transformer},
  author={Tu, Zhengzhong and Talebi, Hossein and Zhang, Han and Yang, Feng and Milanfar, Peyman and Bovik, Alan and Li, Yinxiao},
  journal={ECCV},
  year={2022},
}

CoAtNet: Marrying Convolution and Attention for All Data Sizes - https://arxiv.org/abs/2106.04803
@article{DBLP:journals/corr/abs-2106-04803,
  author    = {Zihang Dai and Hanxiao Liu and Quoc V. Le and Mingxing Tan},
  title     = {CoAtNet: Marrying Convolution and Attention for All Data Sizes},
  journal   = {CoRR},
  volume    = {abs/2106.04803},
  year      = {2021}
}

Hacked together by / Copyright 2022, Ross Wightman
    N)OrderedDict)	dataclassreplacefield)partial)AnyCallableDictListOptionalSetTupleUnion)nn)Final)IMAGENET_DEFAULT_MEANIMAGENET_DEFAULT_STD)MlpConvMlpDropPath	LayerNormClassifierHeadNormMlpClassifierHead)create_attnget_act_layerget_norm_layerget_norm_act_layercreate_conv2dcreate_pool2d)trunc_normal_tf_	to_2tupleextend_tuplemake_divisible_assert)	RelPosMlp
RelPosBiasRelPosBiasTfuse_fused_attnresize_rel_pos_bias_table   )build_model_with_cfg)feature_take_indices)register_notrace_function)named_applycheckpoint_seq)generate_default_cfgsregister_model)
MaxxVitCfgMaxxVitConvCfgMaxxVitTransformerCfgMaxxVitc                   @   s2  e Zd ZU dZdZeed< dZeed< dZ	e
ed< dZeed< dZeed	< dZeed
< dZe
ed< dZe
ed< dZeed< dZeed< dZeed< dZeed< dZeeeef  ed< dZeeeef  ed< dZeed< dZeed< dZee
 ed< dZeed< dZeed< d Zeed!< d"Ze
ed#< d$d% Z dS )&r3   z-Configuration for MaxxVit transformer blocks.    dim_headT
head_first      @expand_ratioexpand_firstshortcut_bias	attn_bias        	attn_drop	proj_dropavg2	pool_typebiasrel_pos_type   rel_pos_dimpartition_ratioNwindow_size	grid_sizeFno_block_attnuse_nchw_attninit_valuesgelu	act_layerlayernorm2d
norm_layer	layernormnorm_layer_clư>norm_epsc                 C   sJ   | j d urt| j | _ | jd ur!t| j| _| j d u r#| j| _ d S d S d S N)rH   r    rG   self rW   G/home/ubuntu/.local/lib/python3.10/site-packages/timm/models/maxxvit.py__post_init__V   s   


z#MaxxVitTransformerCfg.__post_init__)!__name__
__module____qualname____doc__r6   int__annotations__r7   boolr9   floatr:   r;   r<   r>   r?   rA   strrC   rE   rF   rG   r   r   rH   rI   rJ   rK   rM   rO   rQ   rS   rY   rW   rW   rW   rX   r3   =   s0   
 r3   c                   @   s  e Zd ZU dZdZeed< dZeed< dZ	e
ed< dZeed	< d
Zeed< dZe
ed< dZe
ed< dZeed< dZeed< dZeed< dZeed< dZe
ed< dZeed< dZeed< dZeed< dZee ed< dZeed < dZeed!< dZeed"< d#Zee ed$< d%d& Zd#S )'r2   z-Configuration for MaxxVit convolution blocks.mbconv
block_typer8   r9   Texpand_output   kernel_sizer)   
group_sizeFpre_norm_actoutput_biasdwstride_moder@   rA   downsample_pool_type padding
attn_earlyse
attn_layersiluattn_act_layer      ?
attn_ratiorR   rK   rL   rM   rO   rQ   NrS   c                 C   sf   | j dv sJ | j dk}| js|rdnd| _| js|sd| _| jd u r*|r'dnd| _| jp/| j| _d S )N)rc   convnextrc   batchnorm2drN   rP   h㈵>rR   )rd   rO   rQ   rS   rm   rA   )rV   
use_mbconvrW   rW   rX   rY   w   s   


zMaxxVitConvCfg.__post_init__)rZ   r[   r\   r]   rd   rb   r_   r9   ra   re   r`   rg   r^   rh   ri   rj   rl   rA   rm   ro   rp   rr   rt   rv   rK   r   rM   rO   rQ   rS   rY   rW   rW   rW   rX   r2   _   s.   
 r2   c                   @   s   e Zd ZU dZdZeedf ed< dZeedf ed< dZ	ee
eeedf f df ed< d	Ze
eeeef f ed
< dZeed< eedZeed< eedZeed< dZee ed< dZeed< dS )r1   z!Configuration for MaxxVit models.`           .	embed_dim   rf      r   depths)Cr   Tr   rd   @   
stem_widthF	stem_bias)default_factoryconv_cfgtransformer_cfgNhead_hidden_sizevit_effweight_init)rZ   r[   r\   r]   r   r   r^   r_   r   rd   r   rb   r   r   r`   r   r2   r   r3   r   r   r   r   rW   rW   rW   rX   r1      s   
 $r1   c                          e Zd ZU dZee ed< 								ddedee d	ed
edededee	 de
de
f fddZddejdeej dejfddZ  ZS )Attention2dz)Multi-head attention for 2D NCHW tensors.
fused_attnNr5   Tr=   dimdim_outr6   rB   r:   r7   rel_pos_clsr>   r?   c
                    s   t    |p|}|r|n|}
|
| | _|| _|| _|d | _t | _tj	||
d d|d| _
|r7|| jdnd| _t|| _tj	|
|d|d| _t|	| _dS )  
        Args:
            dim: Input dimension.
            dim_out: Output dimension (defaults to input dimension).
            dim_head: Dimension per attention head.
            bias: Whether to use bias in qkv and projection.
            expand_first: Whether to expand channels before or after qkv.
            head_first: Whether heads are first in tensor layout.
            rel_pos_cls: Relative position class to use.
            attn_drop: Attention dropout rate.
            proj_drop: Projection dropout rate.
              rf   r)   rB   	num_headsN)super__init__r   r6   r7   scaler'   r   r   Conv2dqkvrel_posDropoutr>   projr?   rV   r   r   r6   rB   r:   r7   r   r>   r?   dim_attn	__class__rW   rX   r      s   


zAttention2d.__init__xshared_rel_posreturnc                 C   s  |j \}}}}| jr"| ||| j| jd djddd\}}}	n| ||d| j| jdd\}}}	| j	r|d }
| j
d urF| j
 }
n|d urL|}
tjjj|dd |dd |	dd |
| jrm| jjndddd|d||}n9|| j }|dd| }| j
d ur| 
|}n|d ur|| }|jdd}| |}|	|dd |d||}| |}| |}|S )	Nrf   r   r   r)   r=   	attn_mask	dropout_p)shaper7   r   viewr   r6   chunkreshapeunbindr   r   get_biastorchr   
functionalscaled_dot_product_attention	transpose
contiguoustrainingr>   pr   softmaxr   r?   )rV   r   r   Br   HWqkvr<   attnrW   rW   rX   forward   s>   0(





zAttention2d.forwardNr5   TTTNr=   r=   rT   rZ   r[   r\   r]   r   r`   r_   r^   r   r   ra   r   r   Tensorr   __classcell__rW   rW   r   rX   r      s>   
 	
*'r   c                       r   )AttentionClz/Channels-last multi-head attention (B, ..., C).r   Nr5   Tr=   r   r   r6   rB   r:   r7   r   r>   r?   c
                    s   t    |p|}|r||kr|n|}
|
| dksJ d|
| | _|| _|| _|d | _t | _tj	||
d |d| _
|rD|| jdnd| _t|| _tj	|
||d| _t|	| _dS )r   r   z(attn dim should be divisible by head_dimr   rf   r   r   N)r   r   r   r6   r7   r   r'   r   r   Linearr   r   r   r>   r   r?   r   r   rW   rX   r      s   


zAttentionCl.__init__r   r   r   c           
      C   sj  |j d }|j d d }| jr+| ||d| j| jd ddjddd\}}}n| ||dd| j| jdd	d\}}}| j
rmd }| jd urS| j }n|d urY|}tjjj||||| jrh| jjndd}n1|| j }||d	d }	| jd ur| j|	|d
}	n|d ur|	| }	|	jdd}	| |	}	|	| }|dd|d }| |}| |}|S )Nr   r   rf   r)   r   r   r=   r   r   r   )r   )r   r7   r   r   r   r6   r   r   r   r   r   r   r   r   r   r   r   r   r>   r   r   r   r   r?   )
rV   r   r   r   restore_shaper   r   r   r<   r   rW   rW   rX   r     s:   
80





zAttentionCl.forwardr   rT   r   rW   rW   r   rX   r      s>   
 	
*(r   c                       F   e Zd ZdZddededef fddZd	ej	d
ej	fddZ
  ZS )
LayerScalezPer-channel scaling layer.ry   Fr   rK   inplacec                    *   t    || _t|t| | _dS z
        Args:
            dim: Number of channels.
            init_values: Initial scaling value.
            inplace: Whether to perform inplace operations.
        Nr   r   r   r   	Parameterr   onesgammarV   r   rK   r   r   rW   rX   r   8     
zLayerScale.__init__r   r   c                 C   s   | j }| jr||S || S rT   )r   r   mul_rV   r   r   rW   rW   rX   r   C  s   zLayerScale.forwardry   FrZ   r[   r\   r]   r^   ra   r`   r   r   r   r   r   rW   rW   r   rX   r   5      r   c                       r   )LayerScale2dz)Per-channel scaling layer for 2D tensors.ry   Fr   rK   r   c                    r   r   r   r   r   rW   rX   r   K  r   zLayerScale2d.__init__r   r   c                 C   s*   | j dddd}| jr||S || S )Nr)   r   )r   r   r   r   r   rW   rW   rX   r   V  s   zLayerScale2d.forwardr   r   rW   rW   r   rX   r   H  r   r   c                       sT   e Zd ZdZ			ddedededed	ef
 fd
dZdej	dej	fddZ
  ZS )Downsample2da5  A downsample pooling module supporting several maxpool and avgpool modes.

    * 'max' - MaxPool2d w/ kernel_size 3, stride 2, padding 1
    * 'max2' - MaxPool2d w/ kernel_size = stride = 2
    * 'avg' - AvgPool2d w/ kernel_size 3, stride 2, padding 1
    * 'avg2' - AvgPool2d w/ kernel_size = stride = 2
    r@   rn   Tr   r   rA   ro   rB   c                    s   t    |dv sJ |dkrtddd|pdd| _n*|dkr*tdd|p%dd	| _n|d
kr;td
ddd|p6dd| _n
td
d|pAdd	| _||krUtj||d|d| _dS t | _dS )z
        Args:
            dim: Input dimension.
            dim_out: Output dimension.
            pool_type: Type of pooling operation.
            padding: Padding mode.
            bias: Whether to use bias in expansion conv.
        )maxmax2avgr@   r   rf   r   r)   )rg   stridero   r   r   )ro   r   F)rg   r   count_include_padro   r   N)r   r   r   poolr   r   expandIdentity)rV   r   r   rA   ro   rB   r   rW   rX   r   d  s   

zDownsample2d.__init__r   r   c                 C      |  |}| |}|S rT   )r   r   rV   r   rW   rW   rX   r        

zDownsample2d.forward)r@   rn   T)rZ   r[   r\   r]   r^   rb   r`   r   r   r   r   r   rW   rW   r   rX   r   [  s"    !r   rn   modulenameschemer   c                 C   s  t | tjtjfr|dkr&tjj| jdd | jdur$tj| j dS dS |dkrAt	| jdd | jdur?tj| j dS dS |dkr\tj
| j | jdurZtj| j dS dS tj| j | jdurd|v rwtjj| jdd dS tj| j dS dS dS )	z&Initialize transformer module weights.normal{Gz?stdNtrunc_normalxavier_normalmlprR   )
isinstancer   r   r   initnormal_weightrB   zeros_r   xavier_normal_xavier_uniform_)r   r   r   rW   rW   rX   _init_transformer  s.   



r   c                       s   e Zd ZdZdde dfdedededee d	ed
ef fddZ	dde
ddfddZddejdeej dejfddZ  ZS )TransformerBlock2daY  Transformer block with 2D downsampling.

    '2D' NCHW tensor layout

    Some gains can be seen on GPU using a 1D / CL block, BUT w/ the need to switch back/forth to NCHW
    for spatial pooling, the benefit is minimal so ended up using just this variant for CoAt configs.

    This impl was faster on TPU w/ PT XLA than the 1D experiment.
    r)   Nr=   r   r   r   r   cfg	drop_pathc           	   
      sZ  t    tt|j|jd}t|j}|dkr:t|||j	|j
d| _ttd||fdt|||j	dfg| _n||ks@J t | _||| _t|||j|j|j||j|jd| _|jrft||jdnt | _|d	krst|nt | _||| _t|t||j  ||jd
| _!|jrt||jdnt | _"|d	krt|| _#dS t | _#dS )a  
        Args:
            dim: Input dimension.
            dim_out: Output dimension.
            stride: Stride for downsampling.
            rel_pos_cls: Relative position class.
            cfg: Transformer block configuration.
            drop_path: Drop path rate.
        epsr   )rA   rB   normdownrA   )r6   r:   rB   r   r>   r?   rK   r=   in_featureshidden_featuresrM   dropN)$r   r   r   r   rO   rS   r   rM   r   rA   r;   shortcutr   
Sequentialr   norm1r   r   r6   r:   r<   r>   r?   r   rK   r   ls1r   
drop_path1norm2r   r^   r9   r   ls2
drop_path2)	rV   r   r   r   r   r   r   rO   rM   r   rW   rX   r     sB   






$zTransformerBlock2d.__init__rn   r   r   c                 C      t tt|d|  d S Nr   )r-   r   r   rV   r   rW   rW   rX   init_weights     zTransformerBlock2d.init_weightsr   r   c              
   C   sN   |  || | | j| ||d }|| | | | | }|S )Nr   )	r
  r  r  r   r  r  r  r   r  )rV   r   r   rW   rW   rX   r     s   * zTransformerBlock2d.forwardrn   rT   )rZ   r[   r\   r]   r3   r^   r   r   ra   r   rb   r  r   r   r   r   rW   rW   r   rX   r     s*    7*r   c                 C   s  t | tjr|dkr#tjj| jdd | jdur!tj| j dS dS |dkr>t| jdd | jdur<tj| j dS dS |dkrYtj	| j | jdurWtj| j dS dS | j
d | j
d  | j }|| j }tj| jdtd	|  | jdurtj| j dS dS dS )
z&Initialize convolution module weights.r   r   r   Nr   r   r   r)   g       @)r   r   r   r   r   r   rB   r   r   r   rg   out_channelsgroupsmathsqrt)r   r   r   fan_outrW   rW   rX   
_init_conv  s.   




r  rh   channelsc                 C   s    | sdS ||  dksJ ||  S )z3Calculate number of groups for grouped convolution.r)   r   rW   )rh   r  rW   rW   rX   
num_groups  s   r   c                       sx   e Zd ZdZdde dfdedededeeef d	ed
ef fddZdde	ddfddZ
dejdejfddZ  ZS )MbConvBlockzGPre-Norm Conv Block - 1x1 - kxk - 1x1, w/ inverted bottleneck (expand).r)   r)   r)   r=   in_chsout_chsr   dilationr   r   c              	      s  t t|   tt|j|j|jd}t|j	r|n||j
 }t|j|}	|dkr7t|||j|j|jd| _nt | _|jdv sCJ d\}
}}|jdkrU||d }
}n|jdkrb||d }}n||d	 }}|||jd
| _|
dkrt|||j|jd| _nt | _t||d|d| _||| _t|||j|||	|jd| _i }t|jt r|jdks|jdkr|j!|d< t"|j#|j	r|n| |d< |j$rt%|j|fi || _&||| _'d| _(nd| _&||| _'t%|j|fi || _(t||d|jd| _)|dkrt*|| _+dS t | _+dS )a  
        Args:
            in_chs: Input channels.
            out_chs: Output channels.
            stride: Stride for conv.
            dilation: Dilation for conv.
            cfg: Convolution block configuration.
            drop_path: Drop path rate.
        r   r   )rA   rB   ro   )r   1x1rk   )r)   r)   r)   r   r)   r&  r   )	apply_act)rA   ro   )r   )r   r%  r  ro   rq   ecarM   rd_channelsNr   r=   ),r   r!  r   r   r   rO   rM   rS   r"   re   r9   r   rh   r   rA   rj   ro   r
  r   r   rl   ri   pre_normrm   r  r   	conv1_1x1r  rg   	conv2_kxkr   rr   rb   rt   r^   rv   rp   r   se_earlyr  rq   	conv3_1x1r   r   )rV   r#  r$  r   r%  r   r   norm_act_layermid_chsr  stride_poolstride_1stride_2
dilation_2attn_kwargsr   rW   rX   r     sP   










&zMbConvBlock.__init__rn   r   r   Nc                 C   r  r  r-   r   r  r  rW   rW   rX   r  \  r  zMbConvBlock.init_weightsr   c                 C   s   |  |}| |}| |}| |}| |}| |}| jd ur(| |}| |}| jd ur7| |}| 	|}| 
|| }|S rT   )r
  r*  r  r+  r  r,  r-  r  rq   r.  r   rV   r   r
  rW   rW   rX   r   _  s   











zMbConvBlock.forwardr  )rZ   r[   r\   r]   r2   r^   r   ra   r   rb   r  r   r   r   r   rW   rW   r   rX   r!    s*    
Hr!  c                       sv   e Zd ZdZdddde ddfded	ee d
ededeeef dedede	f fddZ
dejdejfddZ  ZS )ConvNeXtBlockzConvNeXt Block.N   r)   r"  Tr=   r#  r$  rg   r   r%  r   conv_mlpr   c	              	      s  t    |p|}t|j}	|rtt|j|jd}
t}nd|jv s$J t	}
t
}|| _|dkr6t||| _n||krFtj||d|jd| _nt | _|jdv sRJ d\}}|jdkr^|}n|}|dkrnt|||jd	| _nt | _t|||||d d
|jd| _|
|| _||t|j| |j|	d| _|r|jrt||jnt | _n|jrt||jnt | _|dkrt|| _ dS t | _ dS )ay  
        Args:
            in_chs: Input channels.
            out_chs: Output channels.
            kernel_size: Kernel size for depthwise conv.
            stride: Stride for conv.
            dilation: Dilation for conv.
            cfg: Convolution block configuration.
            conv_mlp: Whether to use convolutional MLP.
            drop_path: Drop path rate.
        r   rP   r   r)   )rg   rB   )r   rk   r"  r   r  T)rg   r   r%  	depthwiserB   )rB   rM   r=   N)!r   r   r   rM   r   r   rO   rS   r   r   r   use_conv_mlpr   r
  r   r   rj   r   rl   rm   r  r   conv_dwr  r^   r9   r   rK   r   lsr   r   r   )rV   r#  r$  rg   r   r%  r   r:  r   rM   rO   	mlp_layerr1  	stride_dwr   rW   rX   r   y  sB   





$zConvNeXtBlock.__init__r   r   c                 C   s   |  |}| |}| |}| jr"| |}| |}| |}n|dddd}| |}| |}| |}|dddd}| || }|S Nr   r   rf   r)   )	r
  r  r=  r<  r  r   r>  permuter   r7  rW   rW   rX   r     s   







zConvNeXtBlock.forward)rZ   r[   r\   r]   r2   r^   r   r   r`   ra   r   r   r   r   r   rW   rW   r   rX   r8  v  s6    
	Ar8  r   rG   c                 C   s   | j \}}}}t||d  dkd| d|d  d t||d  dkd| d|d  d | |||d  |d ||d  |d |} | ddddd	d
 d|d |d |}|S )z'Partition into non-overlapping windows.r   height () must be divisible by window ()r)   width (rf   r      r   r   r   r#   r   rB  r   )r   rG   r   r   r   r   windowsrW   rW   rX   window_partition     ((,,rJ  rI  img_sizec                 C   sf   |\}}| j d }| d||d  ||d  |d |d |}|dddddd d|||}|S )zReverse window partition.r   r   r)   rf   r   rG  r   r   r   rB  r   rI  rG   rL  r   r   r   r   rW   rW   rX   window_reverse  
   
,$rO  rH   c              	   C   s   | j \}}}}t||d  dkd| d|d   t||d  dkd| d|d   | ||d ||d  |d ||d  |} | dddddd	 d
|d |d |}|S )z6Partition into overlapping windows with grid striding.r   height  must be divisible by grid r)   width r   rG  rf   r   r   rH  )r   rH   r   r   r   r   rI  rW   rW   rX   grid_partition     &&,,rT  c                 C   sf   |\}}| j d }| d||d  ||d  |d |d |}|dddddd d|||}|S )zReverse grid partition.r   r   r)   rf   rG  r   r   rM  rI  rH   rL  r   r   r   r   rW   rW   rX   grid_reverse  rP  rW  r   c                 C   sV   d}| j dkrtt|| jd}|S | j dkrtt|d}|S | j dkr)tt|d}|S )z,Get relative position class based on config.Nr   )rG   
hidden_dimrB   )rG   bias_tf)rC   r   r$   rE   r%   r&   )r   rG   r   rW   rW   rX   get_rel_pos_cls  s   


rZ  c                	       sL   e Zd ZdZde dfdedededef fdd	Zd
d Z	dd Z
  ZS )PartitionAttentionClzRGrid or Block partition + Attn + FFN.

    NxC 'channels last' tensor layout.
    blockr=   r   partition_typer   r   c              
      s&  t    tt|j|jd}t|j}|dk| _t	| jr |j
n|j| _t|| j}||| _t|||j|j|j||j|jd| _|jrLt||jdnt | _|dkrYt|nt | _||| _t|t||j  ||jd| _!|jr|t||jdnt | _"|dkrt|| _#d S t | _#d S )Nr   r\  r6   rB   r7   r   r>   r?   r  r=   r  )$r   r   r   r   rQ   rS   r   rM   partition_blockr    rG   rH   partition_sizerZ  r  r   r6   r<   r7   r>   r?   r   rK   r   r   r   r  r   r  r  r   r^   r9   r   r  r  rV   r   r]  r   r   rO   rM   r   r   rW   rX   r     s8   





$zPartitionAttentionCl.__init__c                 C   sb   |j dd }| jrt|| j}nt|| j}| |}| jr(t|| j|}|S t|| j|}|S )Nr)   rf   )r   r_  rJ  r`  rT  r   rO  rW  rV   r   rL  partitionedrW   rW   rX   _partition_attn-     
z$PartitionAttentionCl._partition_attnc              
   C   D   ||  | | | | }|| | | | | }|S rT   r  r  rd  r  r  r  r   r  r   rW   rW   rX   r   <       zPartitionAttentionCl.forward)rZ   r[   r\   r]   r3   r^   rb   ra   r   rd  r   r   rW   rW   r   rX   r[    s     &r[  c                       sb   e Zd ZdZe dfdededef fddZdej	d	ej	fd
dZ
dej	d	ej	fddZ  ZS )ParallelPartitionAttentionzQExperimental. Grid and Block partition + single FFN.

    NxC tensor layout.
    r=   r   r   r   c              
      s^  t    |d dksJ tt|j|jd}t|j}|j|j	ks$J t
|j| _t|| j}||| _t||d |j|j|j||j|jd| _t||d |j|j|j||j|jd| _|jrgt||jdnt | _|dkrtt|nt | _||| _t|t||j  |||jd| _!|jrt||jdnt | _"|dkrt|| _#dS t | _#dS )	z
        Args:
            dim: Input dimension.
            cfg: Transformer block configuration.
            drop_path: Drop path rate.
        r   r   r   r^  r  r=   )r  r  out_featuresrM   r	  N)$r   r   r   r   rQ   rS   r   rM   rG   rH   r    r`  rZ  r  r   r6   r<   r7   r>   r?   
attn_block	attn_gridrK   r   r   r   r  r   r  r  r   r^   r9   r   r  r  )rV   r   r   r   rO   rM   r   r   rW   rX   r   H  sP   





$z#ParallelPartitionAttention.__init__r   r   c                 C   sh   |j dd }t|| j}| |}t|| j|}t|| j}| |}t|| j|}tj	||gddS )Nr)   rf   r   r   )
r   rJ  r`  rk  rO  rT  rl  rW  r   cat)rV   r   rL  partitioned_blockx_windowpartitioned_gridx_gridrW   rW   rX   rd    s   

z*ParallelPartitionAttention._partition_attnc              
   C   rf  rT   rg  r   rW   rW   rX   r     rh  z"ParallelPartitionAttention.forward)rZ   r[   r\   r]   r3   r^   ra   r   r   r   rd  r   r   rW   rW   r   rX   ri  B  s    7ri  c              	   C   s   | j \}}}}t||d  dkd| d|d  d t||d  dkd| d|d  d | ||||d  |d ||d  |d } | ddddd	d
 d||d |d }|S )z#Partition windows for NCHW tensors.r   rC  rD  rE  r)   rF  r   rG  rf   r   r   rH  )r   rG   r   r   r   r   rI  rW   rW   rX   window_partition_nchw  rK  rr  c              	   C   sf   |\}}| j d }| d||d  ||d  ||d |d }|dddddd d|||}|S )z*Reverse window partition for NCHW tensors.r)   r   r   rf   rG  r   r   rM  rN  rW   rW   rX   window_reverse_nchw  rP  rs  c              
   C   s   | j \}}}}t||d  dkd| d|d   t||d  dkd| d|d   | |||d ||d  |d ||d  } | dddddd	 d
||d |d }|S )z Grid partition for NCHW tensors.r   rQ  rR  r)   rS  rf   r   r   rG  r   rH  )r   rH   r   r   r   r   rI  rW   rW   rX   grid_partition_nchw  rU  rt  c              	   C   sf   |\}}| j d }| d||d  ||d  ||d |d }|dddddd d|||}|S )z(Reverse grid partition for NCHW tensors.r)   r   r   rf   rG  r   r   rM  rV  rW   rW   rX   grid_reverse_nchw  rP  ru  c                	       sh   e Zd ZdZde dfdedededef fdd	Zd
e	j
de	j
fddZd
e	j
de	j
fddZ  ZS )PartitionAttention2dzHGrid or Block partition + Attn + FFN.

    '2D' NCHW tensor layout.
    r\  r=   r   r]  r   r   c              
      s&  t    tt|j|jd}t|j}|dk| _t	| jr |j
n|j| _t|| j}||| _t|||j|j|j||j|jd| _|jrLt||jdnt | _|dkrYt|nt | _||| _t|t||j  ||jd| _!|jr|t||jdnt | _"|dkrt|| _#dS t | _#dS )z
        Args:
            dim: Input dimension.
            partition_type: Partition type ('block' or 'grid').
            cfg: Transformer block configuration.
            drop_path: Drop path rate.
        r   r\  r^  r  r=   r  N)$r   r   r   r   rO   rS   r   rM   r_  r    rG   rH   r`  rZ  r  r   r6   r<   r7   r>   r?   r   rK   r   r   r   r  r   r  r  r   r^   r9   r   r  r  ra  r   rW   rX   r     s8   





$zPartitionAttention2d.__init__r   r   c                 C   sb   |j dd  }| jrt|| j}nt|| j}| |}| jr(t|| j|}|S t|| j|}|S )Nr   )r   r_  rr  r`  rt  r   rs  ru  rb  rW   rW   rX   rd    re  z$PartitionAttention2d._partition_attnc              
   C   rf  rT   rg  r   rW   rW   rX   r     rh  zPartitionAttention2d.forward)rZ   r[   r\   r]   r3   r^   rb   ra   r   r   r   rd  r   r   rW   rW   r   rX   rv    s     -rv  c                       sZ   e Zd ZdZde e dfdededededed	ef fd
dZdddZ	dd Z
  ZS )MaxxVitBlockz;MaxVit conv, window partition + FFN , grid partition + FFN.r)   r=   r   r   r   r   r   r   c           
         s   t    |j| _|jdkrtnt}||||||d| _t|||d}| jr(t	nt
}	|jr/dn|	di || _|	dddi|| _dS )a^  Initialize MaxxVitBlock.

        Args:
            dim: Input channel dimension.
            dim_out: Output channel dimension.
            stride: Stride for downsampling.
            conv_cfg: Configuration for convolutional blocks.
            transformer_cfg: Configuration for transformer blocks.
            drop_path: Drop path rate.
        rw   r   r   r   r   r   r   Nr]  gridrW   )r   r   rJ   	nchw_attnrd   r8  r!  convdictrv  r[  rI   rk  rl  )
rV   r   r   r   r   r   r   conv_clsr5  partition_layerr   rW   rX   r     s   
zMaxxVitBlock.__init__rn   c                 C   sJ   | j d urttt|d| j  ttt|d| j ttt|d| j d S r  )rk  r-   r   r   rl  r  r|  r  rW   rW   rX   r  #  s   
zMaxxVitBlock.init_weightsc                 C   sX   |  |}| js|dddd}| jd ur| |}| |}| js*|dddd}|S rA  )r|  r{  rB  rk  rl  r   rW   rW   rX   r   )  s   



zMaxxVitBlock.forwardr  )rZ   r[   r\   r]   r2   r3   r^   ra   r   r  r   r   rW   rW   r   rX   rw    s*    
rw  c                       sx   e Zd ZdZdde e dfdedededed	ed
edef fddZdde	ddfddZ
dejdejfddZ  ZS )ParallelMaxxVitBlockzYMaxVit block with parallel cat(window + grid), one FF.

    Experimental timm block.
    r)   r   r=   r   r   r   num_convr   r   r   c           
         s   t    |jdkrtnt}|dkr2||||||dg}	|	|||||dg|d  7 }	tj|	 | _n
||||||d| _t|||d| _	dS )aa  
        Args:
            dim: Input dimension.
            dim_out: Output dimension.
            stride: Stride for first conv block.
            num_conv: Number of convolution blocks.
            conv_cfg: Convolution block configuration.
            transformer_cfg: Transformer block configuration.
            drop_path: Drop path rate.
        rw   r)   rx  )r   r   ry  N)
r   r   rd   r8  r!  r   r  r|  ri  r   )
rV   r   r   r   r  r   r   r   r~  convsr   rW   rX   r   =  s   
zParallelMaxxVitBlock.__init__rn   r   r   Nc                 C   s,   t tt|d| j t tt|d| j d S r  )r-   r   r   r   r  r|  r  rW   rW   rX   r  \  s   z!ParallelMaxxVitBlock.init_weightsr   c                 C   s8   |  |}|dddd}| |}|dddd}|S rA  )r|  rB  r   r   rW   rW   rX   r   `  s
   

zParallelMaxxVitBlock.forwardr  )rZ   r[   r\   r]   r2   r3   r^   ra   r   rb   r  r   r   r   r   rW   rW   r   rX   r  7  s0    	r  c                       s   e Zd ZdZdddde e dfdeded	ed
edeeef dee	ee	 f dededee
ee
 f f fddZdejdejfddZ  ZS )MaxxVitStagezEMaxxVit stage consisting of mixed convolution and transformer blocks.r   rG  )   r  r   r=   r#  r$  r   depth	feat_sizeblock_typesr   r   r   c
              
      s  t    d| _t||}g }
t|D ]p\}}|dkr|nd}|dv s%J |dkrA|jdkr0tnt}|
||||||	| dg7 }
n@|dkrZt||}|
t	||||||	| d	g7 }
n'|d
krn|
t
||||||	| dg7 }
n|dkr|
t||||||	| dg7 }
|}qtj|
 | _dS )a  
        Args:
            in_chs: Input channels.
            out_chs: Output channels.
            stride: Stride for first block.
            depth: Number of blocks in stage.
            feat_size: Feature map size.
            block_types: Block types ('C' for conv, 'T' for transformer, etc).
            transformer_cfg: Transformer block configuration.
            conv_cfg: Convolution block configuration.
            drop_path: Drop path rate(s).
        Fr   r)   )r   r   MPMr   rw   rx  r   )r   r   r   r   r  )r   r   r   r   r  N)r   r   grad_checkpointingr!   	enumeraterd   r8  r!  rZ  r   rw  r  r   r  blocks)rV   r#  r$  r   r  r  r  r   r   r   r  itblock_strider~  r   r   rW   rX   r   k  s\   



zMaxxVitStage.__init__r   r   c                 C   s.   | j rtj st| j|}|S | |}|S rT   )r  r   jitis_scriptingr.   r  r   rW   rW   rX   r     s
   
zMaxxVitStage.forward)rZ   r[   r\   r]   r3   r2   r^   r   r   rb   ra   r
   r   r   r   r   r   rW   rW   r   rX   r  h  s:    
	
Hr  c                       sz   e Zd ZdZ						dded	ed
edededededef fddZddeddfddZ	de
jde
jfddZ  ZS )Stemz"Stem layer for feature extraction.rf   rn   FrL   rx   ry   r#  r$  rg   ro   rB   rM   rO   rS   c	           
         s   t    t|ttfst|}tt|||d}	|d | _d| _	t
||d |d||d| _|	|d | _t
|d |d |d||d| _dS )ae  
        Args:
            in_chs: Input channels.
            out_chs: Output channels.
            kernel_size: Kernel size for convolutions.
            padding: Padding mode.
            bias: Whether to use bias.
            act_layer: Activation layer.
            norm_layer: Normalization layer.
            norm_eps: Normalization epsilon.
        r   r   r   r   )r   ro   rB   r)   N)r   r   r   listtupler    r   r   r$  r   r   conv1r  conv2)
rV   r#  r$  rg   ro   rB   rM   rO   rS   r/  r   rW   rX   r     s   

"zStem.__init__r   r   Nc                 C   r  r  r6  r  rW   rW   rX   r    r  zStem.init_weightsr   c                 C   "   |  |}| |}| |}|S rT   )r  r  r  r   rW   rW   rX   r        


zStem.forward)rf   rn   FrL   rx   ry   r  )rZ   r[   r\   r]   r^   rb   r`   ra   r   r  r   r   r   r   rW   rW   r   rX   r    s6    	"r  c                 C   sF   | j dur| js
J | S |d | j |d | j f}t| ||d} | S )z>Configure window size based on image size and partition ratio.Nr   r)   )rG   rH   )rG   rH   rF   r   )r   rL  r`  rW   rW   rX   cfg_window_size  s   

r  kwargsc                 K   s   i }i }i }|  D ]$\}}|dr|||dd< q
|dr*|||dd< q
|||< q
t| ft| jfi |t| jfi |d|} | S )z-Overlay keyword arguments onto configuration.transformer_rn   conv_)r   r   )items
startswithr   r   r   )r   r  transformer_kwargsconv_kwargsbase_kwargsr   r   rW   rW   rX   _overlay_kwargs  s$   


r  c                       s  e Zd ZdZ						d=dedeeeeef f d	ed
edede	de	de
f fddZd>dejdededdfddZejjdee fddZejjd?dedeee
f fddZejjd@d eddfd!d"Zejjdejfd#d$ZdAd
edee ddfd%d&Z				'	dBd(ejd)eeeee f  d*ed+ed,ed-edeeej eejeej f f fd.d/Z	0		dCd)eeee f d1ed2edeed3f fd4d5Zd(ejdejfd6d7Zd?d(ejd8edejfd9d:Z d(ejdejfd;d<Z!  Z"S )Dr4   z{CoaTNet + MaxVit base model.

    Highly configurable for different block compositions, tensor layouts, pooling types.
       rf     r   r=   r   rL  in_chansnum_classesglobal_pool	drop_ratedrop_path_rater  c                    sd  t    t|}|rt|fi |}t|j|}	|| _|| _|jd  | _	| _|| _
d| _g | _t||j|jj|j|jj|jj|jjd| _| jj}
|  jt| jjdddg7  _tdd t|t|
D }t|j}t|j|ksvJ d	d td
|t|j |jD }| jj}g }t!|D ]B}d |j| }t fdd|D }|t"|||j| |j#| |j|	||| dg7 }|
 9 }
|}|  jt||
d| dg7  _qt$j%| | _&t't(|jj|jjd}|j)rt$* | _+|j)| _)t,| j	|| j)|||d| _-n| j	| _)|| j	| _+t.| j	|||d| _-|j/dv sJ |j/r0t0t'| j1|j/d|  dS dS )a  
        Args:
            cfg: Model configuration.
            img_size: Input image size.
            in_chans: Number of input channels.
            num_classes: Number of classification classes.
            global_pool: Global pooling type.
            drop_rate: Dropout rate.
            drop_path_rate: Drop path rate.
            **kwargs: Additional keyword arguments to overlay on config.
        r   F)r#  r$  ro   rB   rM   rO   rS   r   stem)num_chs	reductionr   c                 S   s   g | ]\}}|| qS rW   rW   ).0r  srW   rW   rX   
<listcomp><  s    z$MaxxVit.__init__.<locals>.<listcomp>c                 S   s   g | ]}|  qS rW   )tolist)r  r   rW   rW   rX   r  @  s    r   c                    s   g | ]
}|d    d  qS )r)   rW   )r  rstage_striderW   rX   r  F  s    )r  r  r   r   r  r   zstages.r   )hidden_sizerA   r  rO   )rA   r  )rn   r   r   r   r   r  N)2r   r   r    r  r  r   r  r  r   num_featuresr  r  feature_infor  r   r   ro   r   rM   rO   rS   r  r   r}  r$  r  ziplenr   r   linspacesumsplitranger  rd   r   r  stagesr   r   r   r   r  r   headr   r   r-   _init_weights)rV   r   rL  r  r  r  r  r  r  r   r   r  
num_stagesdprr#  r  r  r$  final_norm_layerr   r  rX   r     s   
	
&


"


zMaxxVit.__init__rn   r   r   r   r   Nc                 C   s>   t |drz	|j|d W d S  ty   |  Y d S w d S )Nr  r  )hasattrr  	TypeError)rV   r   r   r   rW   rW   rX   r  m  s   
zMaxxVit._init_weightsc                 C   s   dd |   D S )Nc                    s*   h | ]\ }t  fd ddD r qS )c                 3   s    | ]}| v V  qd S rT   rW   )r  nr   rW   rX   	<genexpr>x  s    z4MaxxVit.no_weight_decay.<locals>.<setcomp>.<genexpr>)relative_position_bias_tablezrel_pos.mlp)any)r  _rW   r  rX   	<setcomp>v  s    z*MaxxVit.no_weight_decay.<locals>.<setcomp>)named_parametersrU   rW   rW   rX   no_weight_decayt  s   zMaxxVit.no_weight_decayFcoarsec                 C   s   t dddgd}|S )Nz^stem)z^stages\.(\d+)N)z^norm)i )r  r  )r}  )rV   r  matcherrW   rW   rX   group_matcherz  s
   zMaxxVit.group_matcherTenablec                 C   s   | j D ]}||_qd S rT   )r  r  )rV   r  r  rW   rW   rX   set_grad_checkpointing  s   
zMaxxVit.set_grad_checkpointingc                 C   s   | j jS rT   )r  fcrU   rW   rW   rX   get_classifier  s   zMaxxVit.get_classifierc                 C   s   || _ | j|| d S rT   )r  r  reset)rV   r  r  rW   rW   rX   reset_classifier  s   zMaxxVit.reset_classifierNCHWr   indicesr  
stop_early
output_fmtintermediates_onlyc                 C   s   |dv sJ dg }t t| jd |\}}	d}
| |}|
|v r&|| t| j}tj s2|s6| j}n| jd|	 }|D ]!}|
d7 }
||}|
|v r`|rY|
|krY| |}n|}|| q?|re|S |
|krn| |}||fS )a   Forward features that returns intermediates.

        Args:
            x: Input image tensor
            indices: Take last n blocks if int, all if None, select matching indices if sequence
            norm: Apply norm layer to compatible intermediates
            stop_early: Stop iterating over blocks when last desired intermediate hit
            output_fmt: Shape of intermediate feature outputs
            intermediates_only: Only return intermediate features
        Returns:

        )r  zOutput shape must be NCHW.r)   r   N)	r+   r  r  r  appendr   r  r  r  )rV   r   r  r  r  r  r  intermediatestake_indices	max_indexfeat_idxlast_idxr  stagex_interrW   rW   rX   forward_intermediates  s2   




zMaxxVit.forward_intermediatesr)   
prune_norm
prune_head.c                 C   sL   t t| jd |\}}| jd| | _|rt | _|r$| dd| _|S )z6Prune layers not required for specified intermediates.r)   Nr   rn   )r+   r  r  r   r   r  r  r  )rV   r  r  r  r  r  rW   rW   rX   prune_intermediate_layers  s   
z!MaxxVit.prune_intermediate_layersc                 C   r  rT   )r  r  r  r   rW   rW   rX   forward_features  r  zMaxxVit.forward_features
pre_logitsc                 C   s   |r	| j ||dS |  |S )N)r  )r  )rV   r   r  rW   rW   rX   forward_head  s   zMaxxVit.forward_headc                 C   r   rT   )r  r  r   rW   rW   rX   r     r   zMaxxVit.forward)r  rf   r  r   r=   r=   r  F)TrT   )NFFr  F)r)   FT)#rZ   r[   r\   r]   r1   r   r^   r   rb   ra   r   r   r   Moduler  r   r  ignorer   r  r`   r	   r  r  r  r   r  r   r
   r  r  r  r  r   r   rW   rW   r   rX   r4   	  s    	^ 
8

r4   r   r@   FreluTrN   rP   rB   rD   rl   rA   conv_output_biasconv_attn_earlyconv_attn_act_layerconv_norm_layertransformer_shortcut_biastransformer_norm_layertransformer_norm_layer_clrK   rC   rE   c                 C   s6   t t| |dd|||d|d	td|||	|||
|ddS )as  RW variant configuration for CoAtNet models.

    These models were created and trained before seeing https://github.com/google-research/maxvit

    Common differences for initial timm models:
      - pre-norm layer in MZBConv included an activation after norm
      - mbconv expansion calculated from input instead of output chs
      - mbconv shortcut and final 1x1 conv did not have a bias
      - SE act layer was relu, not silu
      - mbconv uses silu in timm, not gelu
      - expansion in attention block done via output proj, not input proj

    Variable differences (evolved over training initial models):
      - avg pool with kernel_size=2 favoured downsampling (instead of maxpool for coat)
      - SE attention was between conv2 and norm/act
      - default to avg pool for mbconv downsample instead of 1x1 or dw conv
      - transformer block shortcut has no bias
    TFrs   )	rl   rA   ri   re   rj   rp   rt   rM   rO   )r:   r;   rA   rK   rO   rQ   rC   rE   r   r   r}  r2   r3   )rl   rA   r  r  r  r  r  r  r  rK   rC   rE   rW   rW   rX   _rw_coat_cfg  s.    r  rk         ?r5   conv_attn_ratior6   c                 C   s4   t t| |d||d|dtd||||	|||
|d	dS )a  RW variant configuration for MaxViT models.

    These models were created and trained before seeing https://github.com/google-research/maxvit

    Differences of initial timm models:
      - mbconv expansion calculated from input instead of output chs
      - mbconv shortcut and final 1x1 conv did not have a bias
      - mbconv uses silu in timm, not gelu
      - expansion in attention block done via output proj, not input proj
    Frs   )rl   rA   re   rj   rv   rM   rO   )	r:   rA   r6   rG   rK   rO   rQ   rC   rE   r  r  )rl   rA   r  r  r  r  r  rG   r6   rK   rC   rE   rW   rW   rX   _rw_max_cfg  s,   	r  rR   r   conv_norm_layer_clrI   c                 C   sD   t |}ttd| |d|d ||dtd||||d |||	|
d	dS )z=Configuration for experimental ConvNeXt-based MaxxViT models.rw   Fr   )rd   rl   rA   re   rK   rO   rQ   r)   )	r:   rA   rG   rI   rK   rO   rQ   rC   rE   r  )r    r}  r2   r3   )rl   rA   r  r  r  r  rG   rI   rK   rC   rE   rW   rW   rX   	_next_cfgL  s.   	r  c                   C   s"   t tddddtdddddd	S )
z0Configuration matching TensorFlow MaxViT models.gMbP?	gelu_tanhsame)rS   rM   ro   ry   FrY  )rS   rM   r7   rC   r  r  rW   rW   rW   rX   _tf_cfgs  s   r   coatnet_pico_rw)r         rD   r   )r5   r   )r   r   r   ru   )r  r  coatnet_nano_rw)rf   rG     rf   )rl   r  r  coatnet_0_rwr{   )r   rf   r9  r   )r  r  coatnet_1_rw)r   r  r  r   )rl   r  r  coatnet_2_rw)r  r  rD      )r   r  rs   )rl   r  coatnet_3_rw)r}   r~   r      )r|   r}   )rl   r  rK   coatnet_bn_0_rwrx   )rl   r  r  r  coatnet_rmlp_nano_rwr~   )r  r  rC   rE   coatnet_rmlp_0_rw)rl   rC   coatnet_rmlp_1_rwr   )rA   r  r  rC   rE   coatnet_rmlp_1_rw2)rl   rC   rE   coatnet_rmlp_2_rw)rl   r  rK   rC   coatnet_rmlp_3_rwcoatnet_nano_cc)r   r   r   r   r  )r   r   r   rd   coatnext_nano_rwr   )r   r   r   r   )ry   N)rC   rK   	coatnet_0r   r   )r   r   r   r   	coatnet_1	coatnet_2r  r	  	coatnet_3r}   r  	coatnet_4)r         r   	coatnet_5)r  rD         r  maxvit_pico_rw)r5   r   r  r  )r   r   r   r   )r  r  r  r  )   r5   )r   r   rd   r   maxvit_nano_rw)r)   r   rf   r)   maxvit_tiny_rwmaxvit_tiny_pm)r  r  r  r  maxvit_rmlp_pico_rw)rC   maxvit_rmlp_nano_rwmaxvit_rmlp_tiny_rwmaxvit_rmlp_small_rwmaxvit_rmlp_base_rw)r   r   rd   r   r   maxxvit_rmlp_nano_rw)r   r   rd   r   r   maxxvit_rmlp_tiny_rwmaxxvit_rmlp_small_rw)0   r|   maxxvitv2_nano_rw)rI   rC   maxxvitv2_rmlp_base_rw)r   r  r  r   )rI   maxxvitv2_rmlp_large_rw)   i@  i  r  )r   r     r   )P   r1  r  maxvit_tiny_tf)r   r   rd   r   r   r   maxvit_small_tfmaxvit_base_tfmaxvit_large_tfmaxvit_xlarge_tf
state_dictmodelc                 C   s   |  }i }|  D ]V\}}|dr7||dd }|j|jjks-|jd |jd kr7t||j|jjd}||v r\|j|| jkr\|	 || 	 kr\|jdv sTJ |
|| j}|||< q
|S )z/Filter checkpoint state dict for compatibility.r  Nir   r)   )new_window_sizenew_bias_shape)r   rG  )r9  r  endswithget_submoduler   r  rG   r(   ndimnumelr   )r9  r:  model_state_dictout_dictr   r   mrW   rW   rX   checkpoint_filter_fn  s    
",
rD  variantcfg_variant
pretrainedc                 K   sT   |du r| t v r| }nd| ddd }tt| |ft | tddtd|S )zCreate a MaxxVit model variant.Nr  r   T)flatten_sequential)	model_cfgfeature_cfgpretrained_filter_fn)
model_cfgsjoinr  r*   r4   r}  rD  )rE  rF  rG  r  rW   rW   rX   _create_maxxvit  s   rN  urlc                 K   s    | dddddddddd	d
|S )z$Create a default configuration dict.r  )rf   r  r  )r9  r9  ffffff?bicubic)      ?rR  rR  z
stem.conv1zhead.fcT)rO  r  
input_size	pool_sizecrop_pctinterpolationmeanr   
first_conv
classifierfixed_input_sizerW   )rO  r  rW   rW   rX   _cfg  s   r[  zcoatnet_pico_rw_224.untrained)rO  zcoatnet_nano_rw_224.sw_in1kztimm/zyhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/coatnet_nano_rw_224_sw-f53093b4.pthg?)	hf_hub_idrO  rU  zcoatnet_0_rw_224.sw_in1kzvhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/coatnet_0_rw_224_sw-a6439706.pth)r\  rO  zcoatnet_1_rw_224.sw_in1kzvhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/coatnet_1_rw_224_sw-5cae1ea8.pthz!coatnet_2_rw_224.sw_in12k_ft_in1k)r\  z'coatnet_rmlp_1_rw2_224.sw_in12k_ft_in1kz&coatnet_rmlp_2_rw_224.sw_in12k_ft_in1kz&coatnet_rmlp_2_rw_384.sw_in12k_ft_in1k)rf   r~   r~   )r  r  g      ?squash)r\  rS  rT  rU  	crop_modezcoatnet_bn_0_rw_224.sw_in1kzyhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/coatnet_bn_0_rw_224_sw-c228e218.pthrP  )r\  rO  rW  r   rU  z coatnet_rmlp_nano_rw_224.sw_in1kz~https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/coatnet_rmlp_nano_rw_224_sw-bd1d51b3.pthzcoatnet_rmlp_0_rw_224.untrainedzcoatnet_rmlp_1_rw_224.sw_in1kz{https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/coatnet_rmlp_1_rw_224_sw-9051e6c3.pthzcoatnet_rmlp_2_rw_224.sw_in1kz{https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/coatnet_rmlp_2_rw_224_sw-5ccfac55.pthzcoatnet_rmlp_3_rw_224.untrainedzcoatnet_nano_cc_224.untrainedzcoatnext_nano_rw_224.sw_in1kzzhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/coatnext_nano_rw_224_ad-22cb71c2.pthzcoatnet_2_rw_224.sw_in12ki-.  )r\  r  zcoatnet_3_rw_224.sw_in12kzcoatnet_rmlp_1_rw2_224.sw_in12kzcoatnet_rmlp_2_rw_224.sw_in12kzcoatnet_0_224.untrainedzcoatnet_1_224.untrainedzcoatnet_2_224.untrainedzcoatnet_3_224.untrainedzcoatnet_4_224.untrainedzcoatnet_5_224.untrainedzmaxvit_pico_rw_256.untrained)rf   r  r  )   r_  )rO  rS  rT  zmaxvit_nano_rw_256.sw_in1kzxhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/maxvit_nano_rw_256_sw-fb127241.pth)r\  rO  rS  rT  zmaxvit_tiny_rw_224.sw_in1kzxhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/maxvit_tiny_rw_224_sw-7d0dffeb.pthzmaxvit_tiny_rw_256.untrainedzmaxvit_tiny_pm_256.untrainedzmaxvit_rmlp_pico_rw_256.sw_in1kz}https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/maxvit_rmlp_pico_rw_256_sw-8d82f2c6.pthzmaxvit_rmlp_nano_rw_256.sw_in1kz}https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/maxvit_rmlp_nano_rw_256_sw-c17bb0d6.pthzmaxvit_rmlp_tiny_rw_256.sw_in1kz}https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/maxvit_rmlp_tiny_rw_256_sw-bbef0ff5.pthz maxvit_rmlp_small_rw_224.sw_in1kz~https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/maxvit_rmlp_small_rw_224_sw-6ef0ae4f.pthz"maxvit_rmlp_small_rw_256.untrainedz(maxvit_rmlp_base_rw_224.sw_in12k_ft_in1kz(maxvit_rmlp_base_rw_384.sw_in12k_ft_in1kz maxvit_rmlp_base_rw_224.sw_in12kz maxxvit_rmlp_nano_rw_256.sw_in1kz~https://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/maxxvit_rmlp_nano_rw_256_sw-0325d459.pthz"maxxvit_rmlp_tiny_rw_256.untrainedz!maxxvit_rmlp_small_rw_256.sw_in1kzhttps://github.com/rwightman/pytorch-image-models/releases/download/v0.1-weights-maxx/maxxvit_rmlp_small_rw_256_sw-37e217ff.pthzmaxxvitv2_nano_rw_256.sw_in1k)r\  rS  rT  z+maxxvitv2_rmlp_base_rw_224.sw_in12k_ft_in1kz+maxxvitv2_rmlp_base_rw_384.sw_in12k_ft_in1kz%maxxvitv2_rmlp_large_rw_224.untrainedz#maxxvitv2_rmlp_base_rw_224.sw_in12kzmaxvit_tiny_tf_224.in1k)r\  rW  r   zmaxvit_tiny_tf_384.in1kzmaxvit_tiny_tf_512.in1k)rf   rD   rD   )r2  r2  zmaxvit_small_tf_224.in1kzmaxvit_small_tf_384.in1kzmaxvit_small_tf_512.in1kzmaxvit_base_tf_224.in1kzmaxvit_base_tf_384.in1kzmaxvit_base_tf_512.in1kzmaxvit_large_tf_224.in1kzmaxvit_large_tf_384.in1kzmaxvit_large_tf_512.in1kzmaxvit_base_tf_224.in21kiSU  z maxvit_base_tf_384.in21k_ft_in1kz maxvit_base_tf_512.in21k_ft_in1kzmaxvit_large_tf_224.in21kz!maxvit_large_tf_384.in21k_ft_in1kz!maxvit_large_tf_512.in21k_ft_in1k)r\  rS  rU  r^  zmaxvit_xlarge_tf_224.in21kz"maxvit_xlarge_tf_384.in21k_ft_in1kz"maxvit_xlarge_tf_512.in21k_ft_in1kc                 K      t dd| i|S )z)CoatNet Pico model with RW configuration.coatnet_pico_rw_224rG  N)ra  rN  rG  r  rW   rW   rX   ra  	     ra  c                 K   r`  )z)CoatNet Nano model with RW configuration.coatnet_nano_rw_224rG  N)re  rb  rc  rW   rW   rX   re  	  rd  re  c                 K   r`  )z&CoatNet-0 model with RW configuration.coatnet_0_rw_224rG  N)rf  rb  rc  rW   rW   rX   rf  	  rd  rf  c                 K   r`  )z&CoatNet-1 model with RW configuration.coatnet_1_rw_224rG  N)rg  rb  rc  rW   rW   rX   rg  	  rd  rg  c                 K   r`  )z&CoatNet-2 model with RW configuration.coatnet_2_rw_224rG  N)rh  rb  rc  rW   rW   rX   rh  	  rd  rh  c                 K   r`  )z&CoatNet-3 model with RW configuration.coatnet_3_rw_224rG  N)ri  rb  rc  rW   rW   rX   ri  	  rd  ri  c                 K   r`  )z4CoatNet-0 model with BatchNorm and RW configuration.coatnet_bn_0_rw_224rG  N)rj  rb  rc  rW   rW   rX   rj  %	  rd  rj  c                 K   r`  )z.CoatNet Nano model with Relative Position MLP.coatnet_rmlp_nano_rw_224rG  N)rk  rb  rc  rW   rW   rX   rk  +	  rd  rk  c                 K   r`  )z+CoatNet-0 model with Relative Position MLP.coatnet_rmlp_0_rw_224rG  N)rl  rb  rc  rW   rW   rX   rl  1	  rd  rl  c                 K   r`  )z+CoatNet-1 model with Relative Position MLP.coatnet_rmlp_1_rw_224rG  N)rm  rb  rc  rW   rW   rX   rm  7	  rd  rm  c                 K   r`  )z.CoatNet-1 model with Relative Position MLP v2.coatnet_rmlp_1_rw2_224rG  N)rn  rb  rc  rW   rW   rX   rn  =	  rd  rn  c                 K   r`  )z+CoatNet-2 model with Relative Position MLP.coatnet_rmlp_2_rw_224rG  N)ro  rb  rc  rW   rW   rX   ro  C	  rd  ro  c                 K   r`  )z6CoatNet-2 model with Relative Position MLP at 384x384.coatnet_rmlp_2_rw_384rG  N)rp  rb  rc  rW   rW   rX   rp  I	  rd  rp  c                 K   r`  )z+CoatNet-3 model with Relative Position MLP.coatnet_rmlp_3_rw_224rG  N)rq  rb  rc  rW   rW   rX   rq  O	  rd  rq  c                 K   r`  )z(CoatNet Nano model with ConvNeXt blocks.coatnet_nano_cc_224rG  N)rr  rb  rc  rW   rW   rX   rr  U	  rd  rr  c                 K   r`  )z*CoAtNeXt Nano model with RW configuration.coatnext_nano_rw_224rG  N)rs  rb  rc  rW   rW   rX   rs  [	  rd  rs  c                 K   r`  )zCoatNet-0 model.coatnet_0_224rG  N)rt  rb  rc  rW   rW   rX   rt  a	  rd  rt  c                 K   r`  )zCoatNet-1 model.coatnet_1_224rG  N)ru  rb  rc  rW   rW   rX   ru  g	  rd  ru  c                 K   r`  )zCoatNet-2 model.coatnet_2_224rG  N)rv  rb  rc  rW   rW   rX   rv  m	  rd  rv  c                 K   r`  )zCoatNet-3 model.coatnet_3_224rG  N)rw  rb  rc  rW   rW   rX   rw  s	  rd  rw  c                 K   r`  )zCoatNet-4 model.coatnet_4_224rG  N)rx  rb  rc  rW   rW   rX   rx  y	  rd  rx  c                 K   r`  )zCoatNet-5 model.coatnet_5_224rG  N)ry  rb  rc  rW   rW   rX   ry  	  rd  ry  c                 K   r`  )z(MaxViT Pico model with RW configuration.maxvit_pico_rw_256rG  N)rz  rb  rc  rW   rW   rX   rz  	  rd  rz  c                 K   r`  )z(MaxViT Nano model with RW configuration.maxvit_nano_rw_256rG  N)r{  rb  rc  rW   rW   rX   r{  	  rd  r{  c                 K   r`  )z(MaxViT Tiny model with RW configuration.maxvit_tiny_rw_224rG  N)r|  rb  rc  rW   rW   rX   r|  	  rd  r|  c                 K   r`  )z3MaxViT Tiny model with RW configuration at 256x256.maxvit_tiny_rw_256rG  N)r}  rb  rc  rW   rW   rX   r}  	  rd  r}  c                 K   r`  )z3MaxViT Relative Position MLP Pico RW 256x256 model.maxvit_rmlp_pico_rw_256rG  N)r~  rb  rc  rW   rW   rX   r~  	  rd  r~  c                 K   r`  )z3MaxViT Relative Position MLP Nano RW 256x256 model.maxvit_rmlp_nano_rw_256rG  N)r  rb  rc  rW   rW   rX   r  	  rd  r  c                 K   r`  )z3MaxViT Relative Position MLP Tiny RW 256x256 model.maxvit_rmlp_tiny_rw_256rG  N)r  rb  rc  rW   rW   rX   r  	  rd  r  c                 K   r`  )z4MaxViT Relative Position MLP Small RW 224x224 model.maxvit_rmlp_small_rw_224rG  N)r  rb  rc  rW   rW   rX   r  	  rd  r  c                 K   r`  )z9MaxViT Small model with Relative Position MLP at 256x256.maxvit_rmlp_small_rw_256rG  N)r  rb  rc  rW   rW   rX   r  	  rd  r  c                 K   r`  )z-MaxViT Base model with Relative Position MLP.maxvit_rmlp_base_rw_224rG  N)r  rb  rc  rW   rW   rX   r  	  rd  r  c                 K   r`  )z8MaxViT Base model with Relative Position MLP at 384x384.maxvit_rmlp_base_rw_384rG  N)r  rb  rc  rW   rW   rX   r  	  rd  r  c                 K   r`  )z'MaxViT Tiny model with parallel blocks.maxvit_tiny_pm_256rG  N)r  rb  rc  rW   rW   rX   r  	  rd  r  c                 K   r`  )z4MaxxViT Relative Position MLP Nano RW 256x256 model.maxxvit_rmlp_nano_rw_256rG  N)r  rb  rc  rW   rW   rX   r  	  rd  r  c                 K   r`  )z.MaxxViT Tiny model with Relative Position MLP.maxxvit_rmlp_tiny_rw_256rG  N)r  rb  rc  rW   rW   rX   r  	  rd  r  c                 K   r`  )z/MaxxViT Small model with Relative Position MLP.maxxvit_rmlp_small_rw_256rG  N)r  rb  rc  rW   rW   rX   r  	  rd  r  c                 K   r`  )zMaxxViT-V2 Nano model.maxxvitv2_nano_rw_256rG  N)r  rb  rc  rW   rW   rX   r  	  rd  r  c                 K   r`  )z1MaxxViT-V2 Base model with Relative Position MLP.maxxvitv2_rmlp_base_rw_224rG  N)r  rb  rc  rW   rW   rX   r  	  rd  r  c                 K   r`  )z<MaxxViT-V2 Base model with Relative Position MLP at 384x384.maxxvitv2_rmlp_base_rw_384rG  N)r  rb  rc  rW   rW   rX   r  	  rd  r  c                 K   r`  )z2MaxxViT-V2 Large model with Relative Position MLP.maxxvitv2_rmlp_large_rw_224rG  N)r  rb  rc  rW   rW   rX   r  	  rd  r  c                 K      t dd| i|S )z"MaxViT Tiny model from TensorFlow.maxvit_tiny_tf_224r4  rG  N)r  r4  rb  rc  rW   rW   rX   r  	  rd  r  c                 K   r  )z-MaxViT Tiny model from TensorFlow at 384x384.maxvit_tiny_tf_384r4  rG  N)r  r4  rb  rc  rW   rW   rX   r  	  rd  r  c                 K   r  )z-MaxViT Tiny model from TensorFlow at 512x512.maxvit_tiny_tf_512r4  rG  N)r  r4  rb  rc  rW   rW   rX   r  
  rd  r  c                 K   r  )z#MaxViT Small model from TensorFlow.maxvit_small_tf_224r5  rG  N)r  r5  rb  rc  rW   rW   rX   r  	
  rd  r  c                 K   r  )z.MaxViT Small model from TensorFlow at 384x384.maxvit_small_tf_384r5  rG  N)r  r5  rb  rc  rW   rW   rX   r  
  rd  r  c                 K   r  )z.MaxViT Small model from TensorFlow at 512x512.maxvit_small_tf_512r5  rG  N)r  r5  rb  rc  rW   rW   rX   r  
  rd  r  c                 K   r  )z"MaxViT Base model from TensorFlow.maxvit_base_tf_224r6  rG  N)r  r6  rb  rc  rW   rW   rX   r  
  rd  r  c                 K   r  )z-MaxViT Base model from TensorFlow at 384x384.maxvit_base_tf_384r6  rG  N)r  r6  rb  rc  rW   rW   rX   r  !
  rd  r  c                 K   r  )z-MaxViT Base model from TensorFlow at 512x512.maxvit_base_tf_512r6  rG  N)r  r6  rb  rc  rW   rW   rX   r  '
  rd  r  c                 K   r  )z#MaxViT Large model from TensorFlow.maxvit_large_tf_224r7  rG  N)r  r7  rb  rc  rW   rW   rX   r  -
  rd  r  c                 K   r  )z.MaxViT Large model from TensorFlow at 384x384.maxvit_large_tf_384r7  rG  N)r  r7  rb  rc  rW   rW   rX   r  3
  rd  r  c                 K   r  )z.MaxViT Large model from TensorFlow at 512x512.maxvit_large_tf_512r7  rG  N)r  r7  rb  rc  rW   rW   rX   r  9
  rd  r  c                 K   r  )z$MaxViT XLarge model from TensorFlow.maxvit_xlarge_tf_224r8  rG  N)r  r8  rb  rc  rW   rW   rX   r  ?
  rd  r  c                 K   r  )z/MaxViT XLarge model from TensorFlow at 384x384.maxvit_xlarge_tf_384r8  rG  N)r  r8  rb  rc  rW   rW   rX   r  E
  rd  r  c                 K   r  )z/MaxViT XLarge model from TensorFlow at 512x512.maxvit_xlarge_tf_512r8  rG  N)r  r8  rb  rc  rW   rW   rX   r  K
  rd  r  r  )r   r@   FFr  rn   TrN   rP   NrB   rD   )rk   r@   Fr  rn   rN   rP   Nr5   NrB   rD   )rk   r@   rN   rP   rN   rP   NFrR   r   rD   rW   )NFr  )r]   r  collectionsr   dataclassesr   r   r   	functoolsr   typingr   r   r	   r
   r   r   r   r   r   r   	torch.jitr   	timm.datar   r   timm.layersr   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   _builderr*   	_featuresr+   _features_fxr,   _manipulater-   r.   	_registryr/   r0   __all__r3   r2   r1   r  r   r   r   r   r   rb   r   r   r  r^   r   r!  r8  r   rJ  rO  rT  rW  rZ  r[  ri  rr  rs  rt  ru  rv  rw  r  r  r  r  r  r4   r`   ra   r  r  r  r   r}  rL  rD  rN  r[  default_cfgsra  re  rf  rg  rh  ri  rj  rk  rl  rm  rn  ro  rp  rq  rr  rs  rt  ru  rv  rw  rx  ry  rz  r{  r|  r}  r~  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  rW   rW   rW   rX   <module>   s	   $(  !$QR 0 K
eW
(	
("	AP
(	
(	H51S/
 \	


:	


1	


'
(2>IT]is~       $  *  0  6  <  D  K  R  Y  a  h  o  v                   #    .    7    C    L    U    ^    g    ,s&"
#'(+./069<?DEFGHILMQTWZ^bfkpsx~     
                "  %  (  +  .  1  4  7  :  >  A  D  G  J  M  P  S  V  \                                                       $