o
    wi;                     @   s|  d Z ddlZddlmZ ddlZddlZddlmZmZ ddlm	Z	m
Z
mZ ddlmZ ddlmZmZmZ dd	lmZ dd
lmZmZ ddlmZ eeZG dd dejZG dd dejZG dd dejZG dd dejZ G dd dejZ!G dd dejZ"G dd dejZ#G dd dejZ$eG dd deZ%eG dd  d e%Z&ed!d"G d#d$ d$e%Z'g d%Z(dS )&zPyTorch RegNet model.    N)Optional)Tensornn)BCEWithLogitsLossCrossEntropyLossMSELoss   )ACT2FN)BaseModelOutputWithNoAttention(BaseModelOutputWithPoolingAndNoAttention$ImageClassifierOutputWithNoAttention)PreTrainedModel)auto_docstringlogging   )RegNetConfigc                       sL   e Zd Z				ddededededed	ee f fd
dZdd Z  ZS )RegNetConvLayerr   r   reluin_channelsout_channelskernel_sizestridegroups
activationc              	      sX   t    tj|||||d |dd| _t|| _|d ur%t| | _	d S t | _	d S )N   F)r   r   paddingr   bias)
super__init__r   Conv2dconvolutionBatchNorm2dnormalizationr	   Identityr   )selfr   r   r   r   r   r   	__class__ g/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/transformers/models/regnet/modeling_regnet.pyr   (   s   
		$zRegNetConvLayer.__init__c                 C   s"   |  |}| |}| |}|S N)r    r"   r   r$   hidden_stater'   r'   r(   forward>   s   


zRegNetConvLayer.forward)r   r   r   r   )	__name__
__module____qualname__intr   strr   r,   __classcell__r'   r'   r%   r(   r   '   s&    r   c                       s.   e Zd ZdZdef fddZdd Z  ZS )RegNetEmbeddingszO
    RegNet Embeddings (stem) composed of a single aggressive convolution.
    configc                    s0   t    t|j|jdd|jd| _|j| _d S )Nr   r   )r   r   r   )r   r   r   num_channelsembedding_size
hidden_actembedderr$   r4   r%   r'   r(   r   J   s
   
zRegNetEmbeddings.__init__c                 C   s*   |j d }|| jkrtd| |}|S )Nr   zeMake sure that the channel dimension of the pixel values match with the one set in the configuration.)shaper5   
ValueErrorr8   )r$   pixel_valuesr5   r+   r'   r'   r(   r,   Q   s   


zRegNetEmbeddings.forward)r-   r.   r/   __doc__r   r   r,   r2   r'   r'   r%   r(   r3   E   s    r3   c                       sB   e Zd ZdZddededef fddZded	efd
dZ  ZS )RegNetShortCutz
    RegNet shortcut, used to project the residual features to the correct size. If needed, it is also used to
    downsample the input using `stride=2`.
    r   r   r   r   c                    s0   t    tj||d|dd| _t|| _d S )Nr   F)r   r   r   )r   r   r   r   r    r!   r"   )r$   r   r   r   r%   r'   r(   r   b   s   
zRegNetShortCut.__init__inputreturnc                 C   s   |  |}| |}|S r)   )r    r"   )r$   r?   r+   r'   r'   r(   r,   g   s   

zRegNetShortCut.forward)r   )	r-   r.   r/   r=   r0   r   r   r,   r2   r'   r'   r%   r(   r>   \   s    r>   c                       s2   e Zd ZdZdedef fddZdd Z  ZS )RegNetSELayerz
    Squeeze and Excitation layer (SE) proposed in [Squeeze-and-Excitation Networks](https://huggingface.co/papers/1709.01507).
    r   reduced_channelsc              	      sL   t    td| _ttj||ddt tj||ddt | _	d S )Nr   r   r   )r   )
r   r   r   AdaptiveAvgPool2dpooler
Sequentialr   ReLUSigmoid	attention)r$   r   rB   r%   r'   r(   r   r   s   

zRegNetSELayer.__init__c                 C   s    |  |}| |}|| }|S r)   )rE   rI   )r$   r+   pooledrI   r'   r'   r(   r,   }   s   

zRegNetSELayer.forward)r-   r.   r/   r=   r0   r   r,   r2   r'   r'   r%   r(   rA   m   s    rA   c                	       <   e Zd ZdZddedededef fddZd	d
 Z  ZS )RegNetXLayerzt
    RegNet's layer composed by three `3x3` convolutions, same as a ResNet bottleneck layer with reduction = 1.
    r   r4   r   r   r   c              
      s   t    ||kp|dk}td||j }|rt|||dnt | _tt	||d|j
dt	|||||j
dt	||dd d| _t|j
 | _d S )Nr   r   r   r   r   r   r   )r   r   maxgroups_widthr>   r   r#   shortcutrF   r   r7   layerr	   r   r$   r4   r   r   r   should_apply_shortcutr   r%   r'   r(   r      s   
zRegNetXLayer.__init__c                 C   .   |}|  |}| |}||7 }| |}|S r)   rS   rR   r   r$   r+   residualr'   r'   r(   r,         


zRegNetXLayer.forwardr   	r-   r.   r/   r=   r   r0   r   r,   r2   r'   r'   r%   r(   rL      s     rL   c                	       rK   )RegNetYLayerzC
    RegNet's Y layer: an X layer with Squeeze and Excitation.
    r   r4   r   r   r   c                    s   t    ||kp|dk}td||j }|rt|||dnt | _tt	||d|j
dt	|||||j
dt|tt|d dt	||dd d| _t|j
 | _d S )Nr   rM   rN   rO      )rB   )r   r   rP   rQ   r>   r   r#   rR   rF   r   r7   rA   r0   roundrS   r	   r   rT   r%   r'   r(   r      s   
zRegNetYLayer.__init__c                 C   rV   r)   rW   rX   r'   r'   r(   r,      rZ   zRegNetYLayer.forwardr[   r\   r'   r'   r%   r(   r]      s     r]   c                       sD   e Zd ZdZ		ddededededef
 fdd	Zd
d Z  ZS )RegNetStagez4
    A RegNet stage composed by stacked layers.
    r   r4   r   r   r   depthc                    sZ   t     jdkrtnttj ||dg fddt|d D R  | _d S )NxrM   c                    s   g | ]} qS r'   r'   ).0_r4   rS   r   r'   r(   
<listcomp>   s    z(RegNetStage.__init__.<locals>.<listcomp>r   )	r   r   
layer_typerL   r]   r   rF   rangelayers)r$   r4   r   r   r   ra   r%   re   r(   r      s   
zRegNetStage.__init__c                 C   s   |  |}|S r)   )ri   r*   r'   r'   r(   r,      s   
zRegNetStage.forward)r   r   r\   r'   r'   r%   r(   r`      s     	r`   c                	       s@   e Zd Zdef fddZ	ddededed	efd
dZ  Z	S )RegNetEncoderr4   c              	      s   t    tg | _| jt||j|jd |j	rdnd|j
d d t|j|jdd  }t||j
dd  D ]\\}}}| jt||||d q9d S )Nr   r   r   )r   ra   )ra   )r   r   r   
ModuleListstagesappendr`   r6   hidden_sizesdownsample_in_first_stagedepthszip)r$   r4   in_out_channelsr   r   ra   r%   r'   r(   r      s   
	 zRegNetEncoder.__init__FTr+   output_hidden_statesreturn_dictr@   c                 C   sb   |rdnd }| j D ]}|r||f }||}q	|r||f }|s+tdd ||fD S t||dS )Nr'   c                 s   s    | ]	}|d ur|V  qd S r)   r'   )rc   vr'   r'   r(   	<genexpr>   s    z(RegNetEncoder.forward.<locals>.<genexpr>)last_hidden_statehidden_states)rl   tupler
   )r$   r+   rs   rt   rx   stage_moduler'   r'   r(   r,      s   



zRegNetEncoder.forward)FT)
r-   r.   r/   r   r   r   boolr
   r,   r2   r'   r'   r%   r(   rj      s    rj   c                   @   s&   e Zd ZeZdZdZdgZdd ZdS )RegNetPreTrainedModelregnetr<   r]   c                 C   s   t |tjrtjj|jddd d S t |tjrMtjj|jt	dd |j
d urKtj|j\}}|dkr=dt	| nd}tj|j
| | d S d S t |tjtjfrhtj|jd tj|j
d d S d S )Nfan_outr   )modenonlinearity   )ar   r   )
isinstancer   r   initkaiming_normal_weightLinearkaiming_uniform_mathsqrtr   _calculate_fan_in_and_fan_outuniform_r!   	GroupNorm	constant_)r$   modulefan_inrd   boundr'   r'   r(   _init_weights  s   
z#RegNetPreTrainedModel._init_weightsN)	r-   r.   r/   r   config_classbase_model_prefixmain_input_name_no_split_modulesr   r'   r'   r'   r(   r|     s    r|   c                
       sF   e Zd Z fddZe	d
dedee dee defdd	Z	  Z
S )RegNetModelc                    s>   t  | || _t|| _t|| _td| _	| 
  d S )NrC   )r   r   r4   r3   r8   rj   encoderr   rD   rE   	post_initr9   r%   r'   r(   r     s   

zRegNetModel.__init__Nr<   rs   rt   r@   c                 C   s|   |d ur|n| j j}|d ur|n| j j}| |}| j|||d}|d }| |}|s6||f|dd   S t|||jdS )Nrs   rt   r   r   )rw   pooler_outputrx   )r4   rs   use_return_dictr8   r   rE   r   rx   )r$   r<   rs   rt   embedding_outputencoder_outputsrw   pooled_outputr'   r'   r(   r,   (  s    

zRegNetModel.forward)NN)r-   r.   r/   r   r   r   r   r{   r   r,   r2   r'   r'   r%   r(   r     s    	r   z
    RegNet Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    )custom_introc                       s\   e Zd Z fddZe				ddeej deej dee	 dee	 de
f
d	d
Z  ZS )RegNetForImageClassificationc                    s^   t  | |j| _t|| _tt |jdkr#t|j	d |jnt
 | _|   d S )Nr   )r   r   
num_labelsr   r}   r   rF   Flattenr   rn   r#   
classifierr   r9   r%   r'   r(   r   M  s   
$z%RegNetForImageClassification.__init__Nr<   labelsrs   rt   r@   c                 C   sb  |dur|n| j j}| j|||d}|r|jn|d }| |}d}|dur| j jdu rP| jdkr6d| j _n| jdkrL|jtj	ksG|jtj
krLd| j _nd| j _| j jdkrnt }	| jdkrh|	| | }n+|	||}n%| j jdkrt }	|	|d| j|d}n| j jdkrt }	|	||}|s|f|dd  }
|dur|f|
 S |
S t|||jd	S )
a0  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr   r   
regressionsingle_label_classificationmulti_label_classificationr   r   )losslogitsrx   )r4   r   r}   r   r   problem_typer   dtypetorchlongr0   r   squeezer   viewr   r   rx   )r$   r<   r   rs   rt   outputsr   r   r   loss_fctoutputr'   r'   r(   r,   Y  s6   


"


z$RegNetForImageClassification.forward)NNNN)r-   r.   r/   r   r   r   r   FloatTensor
LongTensorr{   r   r,   r2   r'   r'   r%   r(   r   E  s$    r   )r   r   r|   ))r=   r   typingr   r   torch.utils.checkpointr   r   torch.nnr   r   r   activationsr	   modeling_outputsr
   r   r   modeling_utilsr   utilsr   r   configuration_regnetr   
get_loggerr-   loggerModuler   r3   r>   rA   rL   r]   r`   rj   r|   r   r   __all__r'   r'   r'   r(   <module>   s<   
!&'@