o
    iO                     @   s  d Z ddlZddlmZmZ ddlZddlmZ ddlmZ ddl	m
Z
mZmZ ddlmZ dd	lmZmZ d
dlmZ eeZdedefddZd.deeef defddZG dd dejZG dd dejZG dd dejZ G dd dejZ!G dd dejZ"G dd  d ejZ#G d!d" d"ejZ$G d#d$ d$ejZ%eG d%d& d&eZ&eG d'd( d(e&Z'ed)d*G d+d, d,e&Z(g d-Z)dS )/zPyTorch EfficientNet model.    N)OptionalUnion)nn   )ACT2FN)BaseModelOutputWithNoAttention(BaseModelOutputWithPoolingAndNoAttention$ImageClassifierOutputWithNoAttention)PreTrainedModel)auto_docstringlogging   )EfficientNetConfigconfignum_channelsc                 C   sJ   | j }|| j9 }t|t||d  | | }|d| k r!||7 }t|S )z<
    Round number of filters based on depth multiplier.
       g?)depth_divisorwidth_coefficientmaxint)r   r   divisornew_dim r   j/home/ubuntu/.local/lib/python3.10/site-packages/transformers/models/efficientnet/modeling_efficientnet.pyround_filters%   s   
r   Tkernel_sizeadjustc                 C   sn   t | tr	| | f} | d d | d d f}|r)|d d |d |d d |d fS |d |d |d |d fS )aJ  
    Utility function to get the tuple padding value for the depthwise convolution.

    Args:
        kernel_size (`int` or `tuple`):
            Kernel size of the convolution layers.
        adjust (`bool`, *optional*, defaults to `True`):
            Adjusts padding value to apply to right and bottom sides of the input.
    r   r   r   )
isinstancer   )r   r   correctr   r   r   correct_pad4   s   

$r   c                       s<   e Zd ZdZdef fddZdejdejfddZ  Z	S )	EfficientNetEmbeddingszL
    A module that corresponds to the stem module of the original work.
    r   c                    sh   t    t|d| _tjdd| _tj|j| jddddd| _	tj
| j|j|jd	| _t|j | _d S )
N    )r   r   r   r   paddingr   r   validFr   strider#   bias)epsmomentum)super__init__r   out_dimr   	ZeroPad2dr#   Conv2dr   convolutionBatchNorm2dbatch_norm_epsbatch_norm_momentum	batchnormr   
hidden_act
activationselfr   	__class__r   r   r+   M   s   
zEfficientNetEmbeddings.__init__pixel_valuesreturnc                 C   s,   |  |}| |}| |}| |}|S N)r#   r/   r3   r5   )r7   r:   featuresr   r   r   forwardX   s
   



zEfficientNetEmbeddings.forward)
__name__
__module____qualname____doc__r   r+   torchTensorr>   __classcell__r   r   r8   r   r    H   s    r    c                       s,   e Zd Z							d fdd	Z  ZS )	EfficientNetDepthwiseConv2dr   r   r   Tzerosc	           
         s*   || }	t  j||	|||||||d	 d S )N)	in_channelsout_channelsr   r&   r#   dilationgroupsr'   padding_mode)r*   r+   )
r7   rH   depth_multiplierr   r&   r#   rJ   r'   rL   rI   r8   r   r   r+   b   s   
z$EfficientNetDepthwiseConv2d.__init__)r   r   r   r   r   TrG   )r?   r@   rA   r+   rE   r   r   r8   r   rF   a   s    rF   c                       sH   e Zd ZdZdedededef fddZdejd	ej	fd
dZ
  ZS )EfficientNetExpansionLayerz_
    This corresponds to the expansion phase of each block in the original implementation.
    r   in_dimr,   r&   c                    sB   t    tj||dddd| _tj||jd| _t|j	 | _
d S )Nr   sameFrH   rI   r   r#   r'   )num_featuresr(   )r*   r+   r   r.   expand_convr0   r1   	expand_bnr   r4   
expand_act)r7   r   rO   r,   r&   r8   r   r   r+      s   
z#EfficientNetExpansionLayer.__init__hidden_statesr;   c                 C   s"   |  |}| |}| |}|S r<   )rS   rT   rU   r7   rV   r   r   r   r>      s   


z"EfficientNetExpansionLayer.forward)r?   r@   rA   rB   r   r   r+   rC   FloatTensorrD   r>   rE   r   r   r8   r   rN   {   s    rN   c                
       sL   e Zd ZdZdededededef
 fddZd	ej	d
ej
fddZ  ZS )EfficientNetDepthwiseLayerzk
    This corresponds to the depthwise convolution phase of each block in the original implementation.
    r   rO   r&   r   adjust_paddingc                    sv   t    || _| jdkrdnd}t||d}tj|d| _t||||dd| _tj	||j
|jd| _t|j | _d S )	Nr   r$   rP   )r   r"   Fr%   rR   r(   r)   )r*   r+   r&   r   r   r-   depthwise_conv_padrF   depthwise_convr0   r1   r2   depthwise_normr   r4   depthwise_act)r7   r   rO   r&   r   rZ   conv_padr#   r8   r   r   r+      s   


z#EfficientNetDepthwiseLayer.__init__rV   r;   c                 C   s6   | j dkr
| |}| |}| |}| |}|S )Nr   )r&   r\   r]   r^   r_   rW   r   r   r   r>      s   




z"EfficientNetDepthwiseLayer.forwardr?   r@   rA   rB   r   r   boolr+   rC   rX   rD   r>   rE   r   r   r8   r   rY      s    rY   c                	       sJ   e Zd ZdZddedededef fddZd	ej	d
ej
fddZ  ZS )EfficientNetSqueezeExciteLayerzl
    This corresponds to the Squeeze and Excitement phase of each block in the original implementation.
    Fr   rO   
expand_dimexpandc                    s   t    |r	|n|| _tdt||j | _tjdd| _	tj
| j| jddd| _tj
| j| jddd| _t|j | _t | _d S )Nr   )output_sizerP   )rH   rI   r   r#   )r*   r+   dimr   r   squeeze_expansion_ratiodim_ser   AdaptiveAvgPool2dsqueezer.   reducere   r   r4   
act_reduceSigmoid
act_expand)r7   r   rO   rd   re   r8   r   r   r+      s$   
z'EfficientNetSqueezeExciteLayer.__init__rV   r;   c                 C   sF   |}|  |}| |}| |}| |}| |}t||}|S r<   )rk   rl   rm   re   ro   rC   mul)r7   rV   inputsr   r   r   r>      s   




z&EfficientNetSqueezeExciteLayer.forward)Fra   r   r   r8   r   rc      s     rc   c                       sV   e Zd ZdZdedededededef fdd	Zd
e	j
de	j
de	jfddZ  ZS )EfficientNetFinalBlockLayerz[
    This corresponds to the final phase of each block in the original implementation.
    r   rO   r,   r&   	drop_rateid_skipc                    sX   t    |dko| | _tj||dddd| _tj||j|jd| _	tj
|d| _d S )Nr   rP   FrQ   r[   p)r*   r+   apply_dropoutr   r.   project_convr0   r1   r2   
project_bnDropoutdropout)r7   r   rO   r,   r&   rs   rt   r8   r   r   r+      s   

z$EfficientNetFinalBlockLayer.__init__
embeddingsrV   r;   c                 C   s0   |  |}| |}| jr| |}|| }|S r<   )rx   ry   rw   r{   )r7   r|   rV   r   r   r   r>      s   


z#EfficientNetFinalBlockLayer.forwardr?   r@   rA   rB   r   r   floatrb   r+   rC   rX   rD   r>   rE   r   r   r8   r   rr      s     $rr   c                       s\   e Zd ZdZdededededededed	ed
ef fddZde	j
de	jfddZ  ZS )EfficientNetBlocka  
    This corresponds to the expansion and depthwise convolution phase of each block in the original implementation.

    Args:
        config ([`EfficientNetConfig`]):
            Model configuration class.
        in_dim (`int`):
            Number of input channels.
        out_dim (`int`):
            Number of output channels.
        stride (`int`):
            Stride size to be used in convolution layers.
        expand_ratio (`int`):
            Expand ratio to set the output dimensions for the expansion and squeeze-excite layers.
        kernel_size (`int`):
            Kernel size for the depthwise convolution layer.
        drop_rate (`float`):
            Dropout rate to be used in the final phase of each block.
        id_skip (`bool`):
            Whether to apply dropout and sum the final hidden states with the input embeddings during the final phase
            of each block. Set to `True` for the first block of each stage.
        adjust_padding (`bool`):
            Whether to apply padding to only right and bottom side of the input kernel before the depthwise convolution
            operation, set to `True` for inputs with odd input sizes.
    r   rO   r,   r&   expand_ratior   rs   rt   rZ   c
                    s   t    || _| jdk| _|| }
| jrt|||
|d| _t|| jr%|
n||||	d| _t|||
| jd| _	t
|| jr>|
n|||||d| _d S )Nr   )r   rO   r,   r&   )r   rO   r&   r   rZ   )r   rO   rd   re   )r   rO   r,   r&   rs   rt   )r*   r+   r   re   rN   	expansionrY   r]   rc   squeeze_exciterr   
projection)r7   r   rO   r,   r&   r   r   rs   rt   rZ   expand_in_dimr8   r   r   r+     s4   

zEfficientNetBlock.__init__rV   r;   c                 C   s<   |}| j dkr| |}| |}| |}| ||}|S )Nr   )r   r   r]   r   r   )r7   rV   r|   r   r   r   r>   H  s   



zEfficientNetBlock.forwardr}   r   r   r8   r   r     s,    	
)r   c                	       sP   e Zd ZdZdef fddZ		ddejdee	 d	ee	 d
e
fddZ  ZS )EfficientNetEncoderz
    Forward propagates the embeddings through each EfficientNet block.

    Args:
        config ([`EfficientNetConfig`]):
            Model configuration class.
    r   c                    sn  t    |_|j_fdd t|j}t fdd|jD }d}g }t|D ]c}t	||j| }t	||j
| }|j| }	|j| }
|j| }t |j| D ]8}|dk}|dkradn|	}	|dkri|n|}||jv}|j| | }t||||	|
||||d	}|| |d7 }qUq+t|_tj|t	|ddd	d
d_tj|j|j|jd_t|j _d S )Nc                    s   t t j|  S r<   )r   mathceildepth_coefficient)repeats)r7   r   r   round_repeatsc  s   z3EfficientNetEncoder.__init__.<locals>.round_repeatsc                 3   s    | ]} |V  qd S r<   r   ).0n)r   r   r   	<genexpr>h  s    z/EfficientNetEncoder.__init__.<locals>.<genexpr>r   r   )	r   rO   r,   r&   r   r   rs   rt   rZ   i   rP   FrQ   r[   )r*   r+   r   r   lenrH   sumnum_block_repeatsranger   rI   strideskernel_sizesexpand_ratiosdepthwise_paddingdrop_connect_rater   appendr   
ModuleListblocksr.   top_convr0   
hidden_dimr1   r2   top_bnr   r4   top_activation)r7   r   num_base_blocks
num_blockscurr_block_numr   irO   r,   r&   r   r   jrt   rZ   rs   blockr8   )r   r7   r   r+   ^  s\   







zEfficientNetEncoder.__init__FTrV   output_hidden_statesreturn_dictr;   c                 C   st   |r|fnd }| j D ]}||}|r||f7 }q
| |}| |}| |}|s4tdd ||fD S t||dS )Nc                 s   s    | ]	}|d ur|V  qd S r<   r   )r   vr   r   r   r     s    z.EfficientNetEncoder.forward.<locals>.<genexpr>)last_hidden_staterV   )r   r   r   r   tupler   )r7   rV   r   r   all_hidden_statesr   r   r   r   r>     s   




zEfficientNetEncoder.forward)FT)r?   r@   rA   rB   r   r+   rC   rX   r   rb   r   r>   rE   r   r   r8   r   r   U  s    :r   c                   @   s2   e Zd ZU eed< dZdZg Zdej	fddZ
dS )EfficientNetPreTrainedModelr   efficientnetr:   modulec                 C   sN   t |tjtjtjfr#|jjjd| jj	d |j
dur%|j
j  dS dS dS )zInitialize the weightsg        )meanstdN)r   r   Linearr.   r0   weightdatanormal_r   initializer_ranger'   zero_)r7   r   r   r   r   _init_weights  s   
z)EfficientNetPreTrainedModel._init_weightsN)r?   r@   rA   r   __annotations__base_model_prefixmain_input_name_no_split_modulesr   Moduler   r   r   r   r   r     s   
 r   c                       s^   e Zd Zdef fddZe			ddeej dee	 dee	 de
eef fd	d
Z  ZS )EfficientNetModelr   c                    s~   t  | || _t|| _t|| _|jdkr"tj	|j
dd| _n|jdkr1tj|j
dd| _ntd|j |   d S )Nr   T)	ceil_moder   z2config.pooling must be one of ['mean', 'max'] got )r*   r+   r   r    r|   r   encoderpooling_typer   	AvgPool2dr   pooler	MaxPool2d
ValueErrorpooling	post_initr6   r8   r   r   r+     s   



zEfficientNetModel.__init__Nr:   r   r   r;   c                 C   s   |d ur|n| j j}|d ur|n| j j}|d u rtd| |}| j|||d}|d }| |}||jd d }|sH||f|dd   S t	|||j
dS )Nz You have to specify pixel_valuesr   r   r   r   r   )r   pooler_outputrV   )r   r   use_return_dictr   r|   r   r   reshapeshaper   rV   )r7   r:   r   r   embedding_outputencoder_outputsr   pooled_outputr   r   r   r>     s*   

zEfficientNetModel.forward)NNN)r?   r@   rA   r   r+   r   r   rC   rX   rb   r   r   r   r>   rE   r   r   r8   r   r     s    
r   z
    EfficientNet Model with an image classification head on top (a linear layer on top of the pooled features), e.g.
    for ImageNet.
    )custom_introc                       sd   e Zd Z fddZe				ddeej deej dee	 dee	 de
eef f
d	d
Z  ZS )"EfficientNetForImageClassificationc                    sd   t  | |j| _|| _t|| _tj|jd| _	| jdkr't
|j| jnt | _|   d S )Nru   r   )r*   r+   
num_labelsr   r   r   r   rz   dropout_rater{   r   r   Identity
classifierr   r6   r8   r   r   r+      s   
$z+EfficientNetForImageClassification.__init__Nr:   labelsr   r   r;   c           
      C   s   |dur|n| j j}| j|||d}|r|jn|d }| |}| |}d}|dur3| ||| j }|sI|f|dd  }	|durG|f|	 S |	S t|||jdS )a  
        labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If
            `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr   r   r   )losslogitsrV   )	r   r   r   r   r{   r   loss_functionr	   rV   )
r7   r:   r   r   r   outputsr   r   r   outputr   r   r   r>     s    

z*EfficientNetForImageClassification.forward)NNNN)r?   r@   rA   r+   r   r   rC   rX   
LongTensorrb   r   r   r	   r>   rE   r   r   r8   r   r     s$    
r   )r   r   r   )T)*rB   r   typingr   r   rC   r   activationsr   modeling_outputsr   r   r	   modeling_utilsr
   utilsr   r   configuration_efficientnetr   
get_loggerr?   loggerr   r   r   rb   r   r   r    r.   rF   rN   rY   rc   rr   r   r   r   r   r   __all__r   r   r   r   <module>   s<   
''!QZ82