o
    i>                     @   sz  d Z ddlZddlmZ ddlZddlmZmZ ddlmZ ddl	m
Z
mZmZmZ ddlmZ dd	lmZmZ dd
lmZ ddlmZ eeZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZG dd dejZ G dd dejZ!eG dd deZ"eG dd de"Z#edd G d!d" d"e"Z$ed#d G d$d% d%e"eZ%g d&Z&dS )'zPyTorch ResNet model.    N)Optional)Tensornn   )ACT2FN)BackboneOutputBaseModelOutputWithNoAttention(BaseModelOutputWithPoolingAndNoAttention$ImageClassifierOutputWithNoAttention)PreTrainedModel)auto_docstringlogging)BackboneMixin   )ResNetConfigc                       sH   e Zd Z	ddededededef
 fd	d
ZdedefddZ  ZS )ResNetConvLayerr   r   reluin_channelsout_channelskernel_sizestride
activationc                    sV   t    tj|||||d dd| _t|| _|d ur$t| | _	d S t | _	d S )N   F)r   r   paddingbias)
super__init__r   Conv2dconvolutionBatchNorm2dnormalizationr   Identityr   )selfr   r   r   r   r   	__class__ g/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/transformers/models/resnet/modeling_resnet.pyr   (   s   
$zResNetConvLayer.__init__inputreturnc                 C   s"   |  |}| |}| |}|S N)r   r    r   r"   r'   hidden_stater%   r%   r&   forward2   s   


zResNetConvLayer.forward)r   r   r   )	__name__
__module____qualname__intstrr   r   r,   __classcell__r%   r%   r#   r&   r   '   s    
r   c                       s8   e Zd ZdZdef fddZdedefddZ  ZS )	ResNetEmbeddingszO
    ResNet Embeddings (stem) composed of a single aggressive convolution.
    configc                    sB   t    t|j|jdd|jd| _tjdddd| _	|j| _d S )N   r   )r   r   r   r   r   )r   r   r   )
r   r   r   num_channelsembedding_size
hidden_actembedderr   	MaxPool2dpoolerr"   r4   r#   r%   r&   r   >   s   
zResNetEmbeddings.__init__pixel_valuesr(   c                 C   s4   |j d }|| jkrtd| |}| |}|S )Nr   zeMake sure that the channel dimension of the pixel values match with the one set in the configuration.)shaper6   
ValueErrorr9   r;   )r"   r=   r6   	embeddingr%   r%   r&   r,   F   s   



zResNetEmbeddings.forward)	r-   r.   r/   __doc__r   r   r   r,   r2   r%   r%   r#   r&   r3   9   s    r3   c                       sB   e Zd ZdZddededef fddZded	efd
dZ  ZS )ResNetShortCutz
    ResNet shortcut, used to project the residual features to the correct size. If needed, it is also used to
    downsample the input using `stride=2`.
    r   r   r   r   c                    s0   t    tj||d|dd| _t|| _d S )Nr   F)r   r   r   )r   r   r   r   r   r   r    )r"   r   r   r   r#   r%   r&   r   W   s   
zResNetShortCut.__init__r'   r(   c                 C   s   |  |}| |}|S r)   )r   r    r*   r%   r%   r&   r,   \   s   

zResNetShortCut.forward)r   )	r-   r.   r/   rA   r0   r   r   r,   r2   r%   r%   r#   r&   rB   Q   s    rB   c                	       s<   e Zd ZdZddedededef fdd	Zd
d Z  ZS )ResNetBasicLayerzO
    A classic ResNet's residual layer composed by two `3x3` convolutions.
    r   r   r   r   r   r   c                    sf   t    ||kp|dk}|rt|||dnt | _tt|||dt||d d| _t	| | _
d S )Nr   r   r   r   r   rB   r   r!   shortcut
Sequentialr   layerr   r   )r"   r   r   r   r   should_apply_shortcutr#   r%   r&   r   g   s   
zResNetBasicLayer.__init__c                 C   .   |}|  |}| |}||7 }| |}|S r)   rI   rG   r   r"   r+   residualr%   r%   r&   r,   s      


zResNetBasicLayer.forward)r   r   )	r-   r.   r/   rA   r0   r1   r   r,   r2   r%   r%   r#   r&   rC   b   s     rC   c                       sL   e Zd ZdZ				ddededed	ed
edef fddZdd Z  Z	S )ResNetBottleNeckLayera  
    A classic ResNet's bottleneck layer composed by three `3x3` convolutions.

    The first `1x1` convolution reduces the input by a factor of `reduction` in order to make the second `3x3`
    convolution faster. The last `1x1` convolution remaps the reduced features to `out_channels`. If
    `downsample_in_bottleneck` is true, downsample will be in the first layer instead of the second layer.
    r   r      Fr   r   r   r   	reductiondownsample_in_bottleneckc           	   
      s   t    ||kp|dk}|| }|rt|||dnt | _tt||d|r)|nddt|||s3|nddt||dd d| _t	| | _
d S )Nr   rD   )r   r   )r   r   rF   )	r"   r   r   r   r   rR   rS   rJ   reduces_channelsr#   r%   r&   r      s   
	zResNetBottleNeckLayer.__init__c                 C   rK   r)   rL   rM   r%   r%   r&   r,      rO   zResNetBottleNeckLayer.forward)r   r   rQ   F)
r-   r.   r/   rA   r0   r1   boolr   r,   r2   r%   r%   r#   r&   rP   |   s(    rP   c                       sN   e Zd ZdZ		ddededededef
 fdd	Zd
edefddZ  Z	S )ResNetStagez4
    A ResNet stage composed by stacked layers.
    r   r4   r   r   r   depthc                    s   t     jdkrtnt jdkr|| j jd}n	|| jd}tj|g fddt	|d D R  | _
d S )N
bottleneck)r   r   rS   )r   r   c                    s   g | ]
} j d qS )rE   )r8   ).0_r4   rI   r   r%   r&   
<listcomp>   s    z(ResNetStage.__init__.<locals>.<listcomp>r   )r   r   
layer_typerP   rC   r8   rS   r   rH   rangelayers)r"   r4   r   r   r   rW   first_layerr#   r[   r&   r      s    

zResNetStage.__init__r'   r(   c                 C   s   |}| j D ]}||}q|S r)   )r_   )r"   r'   r+   rI   r%   r%   r&   r,      s   

zResNetStage.forward)r   r   )
r-   r.   r/   rA   r   r0   r   r   r,   r2   r%   r%   r#   r&   rV      s     	rV   c                	       s@   e Zd Zdef fddZ	ddededed	efd
dZ  Z	S )ResNetEncoderr4   c              	      s   t    tg | _| jt||j|jd |j	rdnd|j
d d t|j|jdd  }t||j
dd  D ]\\}}}| jt||||d q9d S )Nr   r   r   )r   rW   )rW   )r   r   r   
ModuleListstagesappendrV   r7   hidden_sizesdownsample_in_first_stagedepthszip)r"   r4   in_out_channelsr   r   rW   r#   r%   r&   r      s   
	 zResNetEncoder.__init__FTr+   output_hidden_statesreturn_dictr(   c                 C   sb   |rdnd }| j D ]}|r||f }||}q	|r||f }|s+tdd ||fD S t||dS )Nr%   c                 s   s    | ]	}|d ur|V  qd S r)   r%   )rY   vr%   r%   r&   	<genexpr>   s    z(ResNetEncoder.forward.<locals>.<genexpr>)last_hidden_statehidden_states)rc   tupler   )r"   r+   rj   rk   ro   stage_moduler%   r%   r&   r,      s   



zResNetEncoder.forward)FT)
r-   r.   r/   r   r   r   rU   r   r,   r2   r%   r%   r#   r&   ra      s    ra   c                   @   s.   e Zd ZU eed< dZdZddgZdd ZdS )	ResNetPreTrainedModelr4   resnetr=   r   rB   c                 C   s   t |tjrtjj|jddd d S t |tjrMtjj|jt	dd |j
d urKtj|j\}}|dkr=dt	| nd}tj|j
| | d S d S t |tjtjfrhtj|jd tj|j
d d S d S )Nfan_outr   )modenonlinearity   )ar   r   )
isinstancer   r   initkaiming_normal_weightLinearkaiming_uniform_mathsqrtr   _calculate_fan_in_and_fan_outuniform_r   	GroupNorm	constant_)r"   modulefan_inrZ   boundr%   r%   r&   _init_weights   s   
z#ResNetPreTrainedModel._init_weightsN)	r-   r.   r/   r   __annotations__base_model_prefixmain_input_name_no_split_modulesr   r%   r%   r%   r&   rr      s   
 rr   c                
       sF   e Zd Z fddZe	d
dedee dee defdd	Z	  Z
S )ResNetModelc                    s>   t  | || _t|| _t|| _td| _	| 
  d S )N)r   r   )r   r   r4   r3   r9   ra   encoderr   AdaptiveAvgPool2dr;   	post_initr<   r#   r%   r&   r     s   

zResNetModel.__init__Nr=   rj   rk   r(   c                 C   s|   |d ur|n| j j}|d ur|n| j j}| |}| j|||d}|d }| |}|s6||f|dd   S t|||jdS )Nrj   rk   r   r   )rn   pooler_outputro   )r4   rj   use_return_dictr9   r   r;   r	   ro   )r"   r=   rj   rk   embedding_outputencoder_outputsrn   pooled_outputr%   r%   r&   r,     s    

zResNetModel.forwardNN)r-   r.   r/   r   r   r   r   rU   r	   r,   r2   r%   r%   r#   r&   r     s    	r   z
    ResNet Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    )custom_introc                       s\   e Zd Z fddZe				ddeej deej dee	 dee	 de
f
d	d
Z  ZS )ResNetForImageClassificationc                    s^   t  | |j| _t|| _tt |jdkr#t|j	d |jnt
 | _|   d S )Nr   )r   r   
num_labelsr   rs   r   rH   Flattenr}   re   r!   
classifierr   r<   r#   r%   r&   r   :  s   
$z%ResNetForImageClassification.__init__Nr=   labelsrj   rk   r(   c           
      C   s   |dur|n| j j}| j|||d}|r|jn|d }| |}d}|dur.| ||| j }|sD|f|dd  }	|durB|f|	 S |	S t|||jdS )a0  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr   r   r   )losslogitsro   )r4   r   rs   r   r   loss_functionr
   ro   )
r"   r=   r   rj   rk   outputsr   r   r   outputr%   r%   r&   r,   F  s   
z$ResNetForImageClassification.forward)NNNN)r-   r.   r/   r   r   r   torchFloatTensor
LongTensorrU   r
   r,   r2   r%   r%   r#   r&   r   3  s$    r   zO
    ResNet backbone, to be used with frameworks like DETR and MaskFormer.
    c                
       sJ   e Zd ZdZ fddZe	ddedee dee de	fd	d
Z
  ZS )ResNetBackboneFc                    sH   t  | t  | |jg|j | _t|| _t|| _	| 
  d S r)   )r   r   _init_backboner7   re   num_featuresr3   r9   ra   r   r   r<   r#   r%   r&   r   o  s   

zResNetBackbone.__init__Nr=   rj   rk   r(   c                 C   s   |dur|n| j j}|dur|n| j j}| |}| j|ddd}|j}d}t| jD ]\}}	|	| jv r;||| f7 }q+|sK|f}
|rI|
|jf7 }
|
S t	||rU|jddS dddS )a!  
        Examples:

        ```python
        >>> from transformers import AutoImageProcessor, AutoBackbone
        >>> import torch
        >>> from PIL import Image
        >>> import requests

        >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
        >>> image = Image.open(requests.get(url, stream=True).raw)

        >>> processor = AutoImageProcessor.from_pretrained("microsoft/resnet-50")
        >>> model = AutoBackbone.from_pretrained(
        ...     "microsoft/resnet-50", out_features=["stage1", "stage2", "stage3", "stage4"]
        ... )

        >>> inputs = processor(image, return_tensors="pt")

        >>> outputs = model(**inputs)
        >>> feature_maps = outputs.feature_maps
        >>> list(feature_maps[-1].shape)
        [1, 2048, 7, 7]
        ```NTr   r%   )feature_mapsro   
attentions)
r4   r   rj   r9   r   ro   	enumeratestage_namesout_featuresr   )r"   r=   rj   rk   r   r   ro   r   idxstager   r%   r%   r&   r,   z  s0   

zResNetBackbone.forwardr   )r-   r.   r/   has_attentionsr   r   r   r   rU   r   r,   r2   r%   r%   r#   r&   r   g  s    r   )r   r   rr   r   )'rA   r   typingr   r   r   r   activationsr   modeling_outputsr   r   r	   r
   modeling_utilsr   utilsr   r   utils.backbone_utilsr   configuration_resnetr   
get_loggerr-   loggerModuler   r3   rB   rC   rP   rV   ra   rr   r   r   r   __all__r%   r%   r%   r&   <module>   s@   
*&)'.G