o
    i                  	   @   sF  d Z ddlZddlmZmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZmZmZmZ dd
lmZ ddlmZmZ ddlmZmZmZ ddlmZ eeZd@de de dee  de fddZ!G dd dej"Z#G dd dej"Z$G dd dej"Z%G dd dej"Z&G dd dej"Z'G d d! d!ej"Z(G d"d# d#ej"Z)G d$d% d%ej"Z*G d&d' d'ej"Z+G d(d) d)ej"Z,G d*d+ d+eZ-G d,d- d-ej"Z.eG d.d/ d/eZ/eG d0d1 d1e/Z0ed2d3G d4d5 d5e/Z1G d6d7 d7ej"Z2G d8d9 d9ej"Z3G d:d; d;ej"Z4ed<d3G d=d> d>e/Z5g d?Z6dS )AzPyTorch MobileViT model.    N)OptionalUnion)nn)CrossEntropyLoss   )ACT2FN)GradientCheckpointingLayer)BaseModelOutputWithNoAttention(BaseModelOutputWithPoolingAndNoAttention$ImageClassifierOutputWithNoAttentionSemanticSegmenterOutput)PreTrainedModel) find_pruneable_heads_and_indicesprune_linear_layer)auto_docstringlogging	torch_int   )MobileViTConfig   valuedivisor	min_valuereturnc                 C   sF   |du r|}t |t| |d  | | }|d|  k r||7 }t|S )a  
    Ensure that all layers have a channel count that is divisible by `divisor`. This function is taken from the
    original TensorFlow repo. It can be seen here:
    https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py
    N   g?)maxint)r   r   r   	new_value r   m/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/transformers/models/mobilevit/modeling_mobilevit.pymake_divisible+   s   r    c                       sv   e Zd Z						ddededededed	ed
edededeeef ddf fddZde	j
de	j
fddZ  ZS )MobileViTConvLayerr   FTconfigin_channelsout_channelskernel_sizestridegroupsbiasdilationuse_normalizationuse_activationr   Nc                    s   t    t|d d | }|| dkr td| d| d|| dkr1td| d| dtj||||||||dd		| _|	rNtj|d
dddd| _nd | _|
rst	|
t
r_t|
 | _d S t	|jt
rmt|j | _d S |j| _d S d | _d S )Nr   r   r   zInput channels (z) are not divisible by z groups.zOutput channels (zeros)	r#   r$   r%   r&   paddingr)   r'   r(   padding_modegh㈵>g?T)num_featuresepsmomentumaffinetrack_running_stats)super__init__r   
ValueErrorr   Conv2dconvolutionBatchNorm2dnormalization
isinstancestrr   
activation
hidden_act)selfr"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r-   	__class__r   r   r5   ;   sB   



zMobileViTConvLayer.__init__featuresc                 C   s6   |  |}| jd ur| |}| jd ur| |}|S N)r8   r:   r=   )r?   rB   r   r   r   forwardq   s   




zMobileViTConvLayer.forward)r   r   Fr   TT)__name__
__module____qualname__r   r   boolr   r<   r5   torchTensorrD   __classcell__r   r   r@   r   r!   :   s>    	

6r!   c                       sT   e Zd ZdZ	ddedededededd	f fd
dZdejdejfddZ	  Z
S )MobileViTInvertedResidualzY
    Inverted residual block (MobileNetv2): https://huggingface.co/papers/1801.04381
    r   r"   r#   r$   r&   r)   r   Nc              	      s   t    ttt||j d}|dvrtd| d|dko$||k| _t|||dd| _	t|||d|||d| _
t|||dd	d
| _d S )Nr   )r   r   zInvalid stride .r   r#   r$   r%   r   )r#   r$   r%   r&   r'   r)   Fr#   r$   r%   r+   )r4   r5   r    r   roundexpand_ratior6   use_residualr!   
expand_1x1conv_3x3
reduce_1x1)r?   r"   r#   r$   r&   r)   expanded_channelsr@   r   r   r5      s0   

z"MobileViTInvertedResidual.__init__rB   c                 C   s4   |}|  |}| |}| |}| jr|| S |S rC   )rS   rT   rU   rR   )r?   rB   residualr   r   r   rD      s
   


z!MobileViTInvertedResidual.forwardr   )rE   rF   rG   __doc__r   r   r5   rI   rJ   rD   rK   r   r   r@   r   rL   z   s"    !rL   c                       sP   e Zd Z	ddedededededdf fd	d
ZdejdejfddZ  Z	S )MobileViTMobileNetLayerr   r"   r#   r$   r&   
num_stagesr   Nc                    sR   t    t | _t|D ]}t||||dkr|ndd}| j| |}qd S )Nr   r   )r#   r$   r&   )r4   r5   r   
ModuleListlayerrangerL   append)r?   r"   r#   r$   r&   r[   ir]   r@   r   r   r5      s   

z MobileViTMobileNetLayer.__init__rB   c                 C      | j D ]}||}q|S rC   r]   )r?   rB   layer_moduler   r   r   rD         

zMobileViTMobileNetLayer.forward)r   r   
rE   rF   rG   r   r   r5   rI   rJ   rD   rK   r   r   r@   r   rZ      s     rZ   c                       @   e Zd Zdededdf fddZdejdejfdd	Z  Z	S )
MobileViTSelfAttentionr"   hidden_sizer   Nc                    s   t    ||j dkrtd| d|j d|j| _t||j | _| j| j | _tj|| j|j	d| _
tj|| j|j	d| _tj|| j|j	d| _t|j| _d S )Nr   zThe hidden size z4 is not a multiple of the number of attention heads rM   )r(   )r4   r5   num_attention_headsr6   r   attention_head_sizeall_head_sizer   Linearqkv_biasquerykeyr   Dropoutattention_probs_dropout_probdropoutr?   r"   rh   r@   r   r   r5      s   
zMobileViTSelfAttention.__init__hidden_statesc                 C   s   |j \}}}| ||d| j| jdd}| ||d| j| jdd}| ||d| j| jdd}t	||dd}|t
| j }tjj|dd}	| |	}	t	|	|}
|
dddd }
|
 d d | jf }|
j| }
|
S )Nr   r   dimr   r   )shapern   viewri   rj   	transposero   r   rI   matmulmathsqrtr   
functionalsoftmaxrr   permute
contiguoussizerk   )r?   rt   
batch_size
seq_length_query_layer	key_layervalue_layerattention_scoresattention_probscontext_layernew_context_layer_shaper   r   r   rD      s,   

zMobileViTSelfAttention.forwardre   r   r   r@   r   rg      s    rg   c                       rf   )
MobileViTSelfOutputr"   rh   r   Nc                    s*   t    t||| _t|j| _d S rC   r4   r5   r   rl   denserp   hidden_dropout_probrr   rs   r@   r   r   r5         
zMobileViTSelfOutput.__init__rt   c                 C      |  |}| |}|S rC   r   rr   r?   rt   r   r   r   rD      rd   zMobileViTSelfOutput.forwardre   r   r   r@   r   r      s    r   c                       sV   e Zd Zdededdf fddZdee ddfdd	Zd
ej	dej	fddZ
  ZS )MobileViTAttentionr"   rh   r   Nc                    s.   t    t||| _t||| _t | _d S rC   )r4   r5   rg   	attentionr   outputsetpruned_headsrs   r@   r   r   r5     s   
zMobileViTAttention.__init__headsc                 C   s   t |dkrd S t|| jj| jj| j\}}t| jj|| j_t| jj|| j_t| jj	|| j_	t| j
j|dd| j
_| jjt | | j_| jj| jj | j_| j|| _d S )Nr   r   rw   )lenr   r   ri   rj   r   r   rn   ro   r   r   r   rk   union)r?   r   indexr   r   r   prune_heads  s   zMobileViTAttention.prune_headsrt   c                 C   s   |  |}| |}|S rC   )r   r   )r?   rt   self_outputsattention_outputr   r   r   rD     rd   zMobileViTAttention.forward)rE   rF   rG   r   r   r5   r   r   rI   rJ   rD   rK   r   r   r@   r   r     s    r   c                       D   e Zd Zdedededdf fddZdejdejfd	d
Z  Z	S )MobileViTIntermediater"   rh   intermediate_sizer   Nc                    s@   t    t||| _t|jtrt|j | _	d S |j| _	d S rC   )
r4   r5   r   rl   r   r;   r>   r<   r   intermediate_act_fnr?   r"   rh   r   r@   r   r   r5   &  s
   
zMobileViTIntermediate.__init__rt   c                 C   r   rC   )r   r   r   r   r   r   rD   .  rd   zMobileViTIntermediate.forwardre   r   r   r@   r   r   %      r   c                       sJ   e Zd Zdedededdf fddZdejd	ejdejfd
dZ  Z	S )MobileViTOutputr"   rh   r   r   Nc                    s*   t    t||| _t|j| _d S rC   r   r   r@   r   r   r5   5  r   zMobileViTOutput.__init__rt   input_tensorc                 C   s    |  |}| |}|| }|S rC   r   )r?   rt   r   r   r   r   rD   :  s   

zMobileViTOutput.forwardre   r   r   r@   r   r   4  s    $r   c                       r   )MobileViTTransformerLayerr"   rh   r   r   Nc                    sZ   t    t||| _t|||| _t|||| _tj	||j
d| _tj	||j
d| _d S )Nr0   )r4   r5   r   r   r   intermediater   r   r   	LayerNormlayer_norm_epslayernorm_beforelayernorm_afterr   r@   r   r   r5   B  s   
z"MobileViTTransformerLayer.__init__rt   c                 C   s<   |  | |}|| }| |}| |}| ||}|S rC   )r   r   r   r   r   )r?   rt   r   layer_outputr   r   r   rD   J  s   

z!MobileViTTransformerLayer.forwardre   r   r   r@   r   r   A  r   r   c                       r   )MobileViTTransformerr"   rh   r[   r   Nc                    sJ   t    t | _t|D ]}t||t||j d}| j	| qd S )N)rh   r   )
r4   r5   r   r\   r]   r^   r   r   	mlp_ratior_   )r?   r"   rh   r[   r   transformer_layerr@   r   r   r5   U  s   

zMobileViTTransformer.__init__rt   c                 C   ra   rC   rb   )r?   rt   rc   r   r   r   rD   a  rd   zMobileViTTransformer.forwardre   r   r   r@   r   r   T  s    r   c                       s   e Zd ZdZ	ddedededededed	ed
df fddZdejd
e	eje
f fddZdejde
d
ejfddZdejd
ejfddZ  ZS )MobileViTLayerzC
    MobileViT block: https://huggingface.co/papers/2110.02178
    r   r"   r#   r$   r&   rh   r[   r)   r   Nc                    s   t    |j| _|j| _|dkr,t||||dkr|nd|dkr$|d ndd| _|}nd | _t||||jd| _	t|||dddd| _
t|||d| _tj||jd| _t|||dd| _t|d| ||jd| _d S )	Nr   r   )r#   r$   r&   r)   rN   F)r#   r$   r%   r*   r+   )rh   r[   r   )r4   r5   
patch_sizepatch_widthpatch_heightrL   downsampling_layerr!   conv_kernel_sizeconv_kxkconv_1x1r   transformerr   r   r   	layernormconv_projectionfusion)r?   r"   r#   r$   r&   rh   r[   r)   r@   r   r   r5   l  sN   

	zMobileViTLayer.__init__rB   c                 C   sN  | j | j}}t|| }|j\}}}}tj r$tt|| | n
tt	|| | }	tj r?tt|| | n
tt	|| | }
d}|
|ksT|	|krbt
jj||	|
fddd}d}|
| }|	| }|| }||| | |||}|dd}|||||}|dd}||| |d}||f||||||d	}||fS )
NFbilinearr   modealign_cornersTr   r   r   ru   )	orig_sizer   channelsinterpolatenum_patchesnum_patches_widthnum_patches_height)r   r   r   ry   rI   jit
is_tracingr   ceilr}   r   r   r   reshaper{   )r?   rB   r   r   
patch_arear   r   orig_height
orig_width
new_height	new_widthr   num_patch_widthnum_patch_heightr   patches	info_dictr   r   r   	unfolding  sH   	zMobileViTLayer.unfoldingr   r   c                 C   s   | j | j}}t|| }|d }|d }|d }|d }	|d }
| |||d}|dd}||| |	 |
||}|dd	}||||	| |
| }|d
 r_tjj	||d ddd}|S )Nr   r   r   r   r   ru   r   r   r   r   r   r   Fr   )
r   r   r   r   rz   r{   r   r   r   r   )r?   r   r   r   r   r   r   r   r   r   r   rB   r   r   r   folding  s*   zMobileViTLayer.foldingc                 C   s|   | j r|  |}|}| |}| |}| |\}}| |}| |}| ||}| |}| t	j
||fdd}|S Nr   rw   )r   r   r   r   r   r   r   r   r   rI   cat)r?   rB   rW   r   r   r   r   r   rD     s   





zMobileViTLayer.forwardrX   )rE   rF   rG   rY   r   r   r5   rI   rJ   tupledictr   r   rD   rK   r   r   r@   r   r   g  s.    	:3r   c                       sP   e Zd Zdeddf fddZ		ddejd	ed
edee	e
f fddZ  ZS )MobileViTEncoderr"   r   Nc           
   	      sX  t    || _t | _d| _d }}|jdkrd}d}n|jdkr%d}d}t||j	d |j	d ddd}| j
| t||j	d |j	d dd	d}| j
| t||j	d |j	d	 d|jd dd
}| j
| |rp|d9 }t||j	d	 |j	d d|jd d|d}| j
| |r|d9 }t||j	d |j	d d|jd d	|d}	| j
|	 d S )NFr   T   r   r   )r#   r$   r&   r[   r   r   )r#   r$   r&   rh   r[      )r#   r$   r&   rh   r[   r)      )r4   r5   r"   r   r\   r]   gradient_checkpointingoutput_striderZ   neck_hidden_sizesr_   r   hidden_sizes)
r?   r"   dilate_layer_4dilate_layer_5r)   layer_1layer_2layer_3layer_4layer_5r@   r   r   r5     sx   



		zMobileViTEncoder.__init__FTrt   output_hidden_statesreturn_dictc                 C   s\   |rdnd }t | jD ]\}}||}|r||f }q|s(tdd ||fD S t||dS )Nr   c                 s   s    | ]	}|d ur|V  qd S rC   r   ).0vr   r   r   	<genexpr>j  s    z+MobileViTEncoder.forward.<locals>.<genexpr>)last_hidden_statert   )	enumerater]   r   r	   )r?   rt   r   r   all_hidden_statesr`   rc   r   r   r   rD   [  s   
zMobileViTEncoder.forward)FT)rE   rF   rG   r   r5   rI   rJ   rH   r   r   r	   rD   rK   r   r   r@   r   r     s    M
r   c                   @   s<   e Zd ZU eed< dZdZdZdgZde	j
ddfd	d
ZdS )MobileViTPreTrainedModelr"   	mobilevitpixel_valuesTr   moduler   Nc                 C   sx   t |tjtjtjfr%|jjjd| jj	d |j
dur#|j
j  dS dS t |tjr:|j
j  |jjd dS dS )zInitialize the weightsg        )meanstdNg      ?)r;   r   rl   r7   r9   weightdatanormal_r"   initializer_ranger(   zero_r   fill_)r?   r   r   r   r   _init_weightsw  s   
z&MobileViTPreTrainedModel._init_weights)rE   rF   rG   r   __annotations__base_model_prefixmain_input_namesupports_gradient_checkpointing_no_split_modulesr   Moduler  r   r   r   r   r   o  s   
 r   c                       sl   e Zd Zddedef fddZdd Ze			dd	ee	j
 d
ee dee deeef fddZ  ZS )MobileViTModelTr"   expand_outputc                    sn   t  | || _|| _t||j|jd ddd| _t|| _	| jr1t||jd |jd dd| _
|   d	S )
aE  
        expand_output (`bool`, *optional*, defaults to `True`):
            Whether to expand the output of the model using a 1x1 convolution. If `True`, the model will apply an additional
            1x1 convolution to expand the output channels from `config.neck_hidden_sizes[5]` to `config.neck_hidden_sizes[6]`.
        r   r   r   )r#   r$   r%   r&   r      r   rN   N)r4   r5   r"   r	  r!   num_channelsr   	conv_stemr   encoderconv_1x1_exp	post_init)r?   r"   r	  r@   r   r   r5     s&   
zMobileViTModel.__init__c                 C   sF   |  D ]\}}| jj| }t|tr |jjD ]}|j| qqdS )zPrunes heads of the model.
        heads_to_prune: dict of {layer_num: list of heads to prune in this layer} See base class PreTrainedModel
        N)itemsr  r]   r;   r   r   r   r   )r?   heads_to_prunelayer_indexr   mobilevit_layerr   r   r   r   _prune_heads  s   
zMobileViTModel._prune_headsNr   r   r   r   c           	      C   s   |d ur|n| j j}|d ur|n| j j}|d u rtd| |}| j|||d}| jr>| |d }tj	|ddgdd}n|d }d }|sY|d urN||fn|f}||dd   S t
|||jd	S )
Nz You have to specify pixel_valuesr   r   r   rv   ru   F)rx   keepdimr   )r   pooler_outputrt   )r"   r   use_return_dictr6   r  r  r	  r  rI   r   r
   rt   )	r?   r   r   r   embedding_outputencoder_outputsr   pooled_outputr   r   r   r   rD     s0   
zMobileViTModel.forward)T)NNN)rE   rF   rG   r   rH   r5   r  r   r   rI   rJ   r   r   r
   rD   rK   r   r   r@   r   r    s     

r  z
    MobileViT model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    )custom_introc                       sn   e Zd Zdeddf fddZe				ddeej dee	 deej d	ee	 de
eef f
d
dZ  ZS )MobileViTForImageClassificationr"   r   Nc                    sd   t  | |j| _t|| _tj|jdd| _|jdkr't	|j
d |jnt | _|   d S )NT)inplacer   ru   )r4   r5   
num_labelsr  r   r   rp   classifier_dropout_probrr   rl   r   Identity
classifierr  r?   r"   r@   r   r   r5     s   
$z(MobileViTForImageClassification.__init__r   r   labelsr   c           
      C   s   |dur|n| j j}| j|||d}|r|jn|d }| | |}d}|dur1| ||| j }|sG|f|dd  }	|durE|f|	 S |	S t|||jdS )a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss). If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr  r   r   )losslogitsrt   )	r"   r  r   r  r"  rr   loss_functionr   rt   )
r?   r   r   r$  r   outputsr  r&  r%  r   r   r   r   rD     s   z'MobileViTForImageClassification.forwardNNNN)rE   rF   rG   r   r5   r   r   rI   rJ   rH   r   r   r   rD   rK   r   r   r@   r   r    s$    
r  c                       r   )MobileViTASPPPoolingr"   r#   r$   r   Nc              	      s4   t    tjdd| _t|||ddddd| _d S )Nr   )output_sizeTrelu)r#   r$   r%   r&   r*   r+   )r4   r5   r   AdaptiveAvgPool2dglobal_poolr!   r   )r?   r"   r#   r$   r@   r   r   r5     s   
zMobileViTASPPPooling.__init__rB   c                 C   s:   |j dd  }| |}| |}tjj||ddd}|S )Nrv   r   Fr   )ry   r.  r   r   r   r   )r?   rB   spatial_sizer   r   r   rD   %  s
   

zMobileViTASPPPooling.forwardre   r   r   r@   r   r*    s    r*  c                       @   e Zd ZdZdeddf fddZdejdejfdd	Z  Z	S )
MobileViTASPPz
    ASPP module defined in DeepLab papers: https://huggingface.co/papers/1606.00915, https://huggingface.co/papers/1706.05587
    r"   r   Nc                    s   t     jd  jt jdkrtdt | _	t
 ddd}| j	| | j	 fdd jD  t }| j	| t
 d	 ddd| _tj jd
| _d S )Nrv   r   z"Expected 3 values for atrous_ratesr   r,  rO   c              
      s    g | ]}t  d |ddqS )r   r,  )r#   r$   r%   r)   r+   )r!   )r   rater"   r#   r$   r   r   
<listcomp>G  s    	z*MobileViTASPP.__init__.<locals>.<listcomp>r   )p)r4   r5   r   aspp_out_channelsr   atrous_ratesr6   r   r\   convsr!   r_   extendr*  projectrp   aspp_dropout_probrr   )r?   r"   in_projection
pool_layerr@   r3  r   r5   2  s2   


	zMobileViTASPP.__init__rB   c                 C   sD   g }| j D ]	}||| qtj|dd}| |}| |}|S r   )r8  r_   rI   r   r:  rr   )r?   rB   pyramidconvpooled_featuresr   r   r   rD   ]  s   


zMobileViTASPP.forward
rE   rF   rG   rY   r   r5   rI   rJ   rD   rK   r   r   r@   r   r1  -  s    +r1  c                       r0  )
MobileViTDeepLabV3zJ
    DeepLabv3 architecture: https://huggingface.co/papers/1706.05587
    r"   r   Nc              	      sB   t    t|| _t|j| _t||j	|j
ddddd| _d S )Nr   FT)r#   r$   r%   r*   r+   r(   )r4   r5   r1  asppr   	Dropout2dr   rr   r!   r6  r  r"  r#  r@   r   r   r5   m  s   

zMobileViTDeepLabV3.__init__rt   c                 C   s&   |  |d }| |}| |}|S )Nru   )rC  rr   r"  )r?   rt   rB   r   r   r   rD   }  s   

zMobileViTDeepLabV3.forwardrA  r   r   r@   r   rB  h  s    rB  zX
    MobileViT model with a semantic segmentation head on top, e.g. for Pascal VOC.
    c                       sn   e Zd Zdeddf fddZe				ddeej deej dee	 d	ee	 de
eef f
d
dZ  ZS ) MobileViTForSemanticSegmentationr"   r   Nc                    s8   t  | |j| _t|dd| _t|| _|   d S )NF)r	  )r4   r5   r  r  r   rB  segmentation_headr  r#  r@   r   r   r5     s
   
z)MobileViTForSemanticSegmentation.__init__r   r$  r   r   c                 C   s  |dur|n| j j}|dur|n| j j}|dur"| j jdkr"td| j|d|d}|r/|jn|d }| |}d}|durYtj	j
||jdd ddd	}	t| j jd
}
|
|	|}|s{|rg|f|dd  }n	|f|dd  }|dury|f| S |S t|||r|jddS dddS )a{  
        labels (`torch.LongTensor` of shape `(batch_size, height, width)`, *optional*):
            Ground truth semantic segmentation maps for computing the loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels > 1`, a classification loss is computed (Cross-Entropy).

        Examples:

        ```python
        >>> import requests
        >>> import torch
        >>> from PIL import Image
        >>> from transformers import AutoImageProcessor, MobileViTForSemanticSegmentation

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> image_processor = AutoImageProcessor.from_pretrained("apple/deeplabv3-mobilevit-small")
        >>> model = MobileViTForSemanticSegmentation.from_pretrained("apple/deeplabv3-mobilevit-small")

        >>> inputs = image_processor(images=image, return_tensors="pt")

        >>> with torch.no_grad():
        ...     outputs = model(**inputs)

        >>> # logits are of shape (batch_size, num_labels, height, width)
        >>> logits = outputs.logits
        ```Nr   z/The number of labels should be greater than oneTr  rv   r   Fr   )ignore_indexr   )r%  r&  rt   
attentions)r"   r   r  r  r6   r   rt   rF  r   r   r   ry   r   semantic_loss_ignore_indexr   )r?   r   r$  r   r   r(  encoder_hidden_statesr&  r%  upsampled_logitsloss_fctr   r   r   r   rD     sB   $

z(MobileViTForSemanticSegmentation.forwardr)  )rE   rF   rG   r   r5   r   r   rI   rJ   rH   r   r   r   rD   rK   r   r   r@   r   rE    s$    

rE  )r  rE  r  r   )r   N)7rY   r}   typingr   r   rI   r   torch.nnr   activationsr   modeling_layersr   modeling_outputsr	   r
   r   r   modeling_utilsr   pytorch_utilsr   r   utilsr   r   r   configuration_mobilevitr   
get_loggerrE   loggerr   r    r  r!   rL   rZ   rg   r   r   r   r   r   r   r   r   r   r  r  r*  r1  rB  rE  __all__r   r   r   r   <module>   sX   
 @09 *_U5;X