o
    iJ_                     @   s  d Z ddlmZmZ ddlZddlmZ ddlm	Z	m
Z
mZ ddlmZmZmZ ddlmZmZmZmZmZ dd	lmZ dd
lmZ ddlmZ eeZdZdZ g dZ!dZ"dZ#G dd dej$j%Z&G dd dej$j%Z'G dd dej$j%Z(G dd dej$j%Z)G dd dej$j%Z*G dd dej$j%Z+G dd dej$j%Z,G dd  d ej$j%Z-eG d!d" d"ej$j%Z.G d#d$ d$eZ/d%Z0d&Z1e
d'e0G d(d) d)e/Z2e
d*e0G d+d, d,e/eZ3g d-Z4dS ).zTensorFlow RegNet model.    )OptionalUnionN   )ACT2FN)add_code_sample_docstringsadd_start_docstrings%add_start_docstrings_to_model_forward) TFBaseModelOutputWithNoAttention*TFBaseModelOutputWithPoolingAndNoAttentionTFSequenceClassifierOutput)TFPreTrainedModelTFSequenceClassificationLosskeraskeras_serializableunpack_inputs)
shape_list)logging   )RegNetConfigr   zfacebook/regnet-y-040)r   i@     r   ztabby, tabby catc                       sV   e Zd Z				ddededededed	ee f fd
dZdd ZdddZ  Z	S )TFRegNetConvLayerr   r   reluin_channelsout_channelskernel_sizestridegroups
activationc              	      s~   t  jdi | tjj|d d| _tjj|||d|ddd| _tjjddd	d
| _	|d ur3t
| ntj| _|| _|| _d S )N   )paddingVALIDFconvolution)filtersr   stridesr   r   use_biasnameh㈵>?normalizationepsilonmomentumr%    )super__init__r   layersZeroPadding2Dr   Conv2Dr!   BatchNormalizationr(   r   tfidentityr   r   r   )selfr   r   r   r   r   r   kwargs	__class__r,   a/home/ubuntu/.local/lib/python3.10/site-packages/transformers/models/regnet/modeling_tf_regnet.pyr.   7   s   
	
zTFRegNetConvLayer.__init__c                 C   s(   |  | |}| |}| |}|S N)r!   r   r(   r   )r5   hidden_stater,   r,   r9   callS   s   

zTFRegNetConvLayer.callNc                 C      | j rd S d| _ t| dd d ur2t| jj | jd d d | jg W d    n1 s-w   Y  t| dd d ur_t| jj | jd d d | j	g W d    d S 1 sXw   Y  d S d S NTr!   r(   
builtgetattrr3   
name_scoper!   r%   buildr   r(   r   r5   input_shaper,   r,   r9   rC   Y      "zTFRegNetConvLayer.build)r   r   r   r   r:   )
__name__
__module____qualname__intr   strr.   r<   rC   __classcell__r,   r,   r7   r9   r   6   s(    r   c                       s8   e Zd ZdZdef fddZdd Zd
dd	Z  ZS )TFRegNetEmbeddingszO
    RegNet Embeddings (stem) composed of a single aggressive convolution.
    configc                    s:   t  jdi | |j| _t|j|jdd|jdd| _d S )Nr   r   embedder)r   r   r   r   r   r%   r,   )r-   r.   num_channelsr   embedding_size
hidden_actrO   r5   rN   r6   r7   r,   r9   r.   j   s   zTFRegNetEmbeddings.__init__c                 C   sB   t |d }t r|| jkrtdtj|dd}| |}|S )Nr   zeMake sure that the channel dimension of the pixel values match with the one set in the configuration.)r   r   r   r   perm)r   r3   executing_eagerlyrP   
ValueError	transposerO   )r5   pixel_valuesrP   r;   r,   r,   r9   r<   v   s   
zTFRegNetEmbeddings.callNc                 C   d   | j rd S d| _ t| dd d ur0t| jj | jd  W d    d S 1 s)w   Y  d S d S )NTrO   )r@   rA   r3   rB   rO   r%   rC   rD   r,   r,   r9   rC         "zTFRegNetEmbeddings.buildr:   )	rG   rH   rI   __doc__r   r.   r<   rC   rL   r,   r,   r7   r9   rM   e   s
    rM   c                       sV   e Zd ZdZddededef fddZdd	ejd
edejfddZ	dddZ
  ZS )TFRegNetShortCutz
    RegNet shortcut, used to project the residual features to the correct size. If needed, it is also used to
    downsample the input using `stride=2`.
    r   r   r   r   c                    sN   t  jd	i | tjj|d|ddd| _tjjdddd| _|| _|| _	d S )
Nr   Fr!   )r"   r   r#   r$   r%   r&   r'   r(   r)   r,   )
r-   r.   r   r/   r1   r!   r2   r(   r   r   )r5   r   r   r   r6   r7   r,   r9   r.      s   

zTFRegNetShortCut.__init__Finputstrainingreturnc                 C   s   | j | ||dS )Nr_   )r(   r!   )r5   r^   r_   r,   r,   r9   r<      s   zTFRegNetShortCut.callNc                 C   r=   r>   r?   rD   r,   r,   r9   rC      rF   zTFRegNetShortCut.build)r   )Fr:   )rG   rH   rI   r\   rJ   r.   r3   Tensorboolr<   rC   rL   r,   r,   r7   r9   r]      s
    	r]   c                       s<   e Zd ZdZdedef fddZdd Zdd	d
Z  ZS )TFRegNetSELayerz
    Squeeze and Excitation layer (SE) proposed in [Squeeze-and-Excitation Networks](https://huggingface.co/papers/1709.01507).
    r   reduced_channelsc                    s^   t  jd
i | tjjddd| _tjj|ddddtjj|ddd	dg| _|| _|| _	d S )NTpoolerkeepdimsr%   r   r   zattention.0)r"   r   r   r%   sigmoidzattention.2r,   )
r-   r.   r   r/   GlobalAveragePooling2Drf   r1   	attentionr   re   )r5   r   re   r6   r7   r,   r9   r.      s   
zTFRegNetSELayer.__init__c                 C   s*   |  |}| jD ]}||}q|| }|S r:   )rf   rk   )r5   r;   pooledlayer_moduler,   r,   r9   r<      s
   


zTFRegNetSELayer.callNc                 C   s  | j rd S d| _ t| dd d ur-t| jj | jd W d    n1 s(w   Y  t| dd d urt| jd j | jd d d d | jg W d    n1 sVw   Y  t| jd j | jd d d d | j	g W d    d S 1 s}w   Y  d S d S )NTrf   NNNNrk   r   r   )
r@   rA   r3   rB   rf   r%   rC   rk   r   re   rD   r,   r,   r9   rC      s   "zTFRegNetSELayer.buildr:   )	rG   rH   rI   r\   rJ   r.   r<   rC   rL   r,   r,   r7   r9   rd      s
    
rd   c                	       F   e Zd ZdZddedededef fddZd	d
 ZdddZ  Z	S )TFRegNetXLayerzt
    RegNet's layer composed by three `3x3` convolutions, same as a ResNet bottleneck layer with reduction = 1.
    r   rN   r   r   r   c              	      s   t  jdi | ||kp|dk}td||j }|r#t|||ddntjjddd| _t	||d|j
ddt	|||||j
dd	t	||dd d
dg| _t|j
 | _d S )Nr   shortcutr   r%   linearr%   layer.0r   r   r%   layer.1r   r   r   r%   layer.2r,   )r-   r.   maxgroups_widthr]   r   r/   
Activationrq   r   rR   r   r   r5   rN   r   r   r   r6   should_apply_shortcutr   r7   r,   r9   r.      s   zTFRegNetXLayer.__init__c                 C   8   |}| j D ]}||}q| |}||7 }| |}|S r:   r/   rq   r   r5   r;   residualrm   r,   r,   r9   r<         



zTFRegNetXLayer.callNc              	   C      | j rd S d| _ t| dd d ur-t| jj | jd  W d    n1 s(w   Y  t| dd d urV| jD ]}t|j |d  W d    n1 sPw   Y  q8d S d S NTrq   r/   r@   rA   r3   rB   rq   r%   rC   r/   r5   rE   layerr,   r,   r9   rC         
zTFRegNetXLayer.buildr   r:   
rG   rH   rI   r\   r   rJ   r.   r<   rC   rL   r,   r,   r7   r9   rp      
     	rp   c                	       ro   )TFRegNetYLayerzC
    RegNet's Y layer: an X layer with Squeeze and Excitation.
    r   rN   r   r   r   c              
      s   t  jdi | ||kp|dk}td||j }|r#t|||ddntjjddd| _t	||d|j
ddt	|||||j
dd	t|tt|d
 ddt	||dd ddg| _t|j
 | _d S )Nr   rq   rr   rs   rt   ru   rv   rw   rx      ry   )re   r%   zlayer.3r,   )r-   r.   rz   r{   r]   r   r/   r|   rq   r   rR   rd   rJ   roundr   r   r}   r7   r,   r9   r.     s   zTFRegNetYLayer.__init__c                 C   r   r:   r   r   r,   r,   r9   r<     r   zTFRegNetYLayer.callNc              	   C   r   r   r   r   r,   r,   r9   rC     r   zTFRegNetYLayer.buildr   r:   r   r,   r,   r7   r9   r      r   r   c                       sL   e Zd ZdZ	ddededededef
 fdd	Zd
d ZdddZ  Z	S )TFRegNetStagez4
    A RegNet stage composed by stacked layers.
    r   rN   r   r   r   depthc                    s\   t  jdi |  jdkrtnt ||ddg fddt|d D | _d S )Nxzlayers.0rr   c              	      s&   g | ]} d |d  dqS )zlayers.r   rt   r,   ).0irN   r   r   r,   r9   
<listcomp>:  s   & z*TFRegNetStage.__init__.<locals>.<listcomp>r   r,   )r-   r.   
layer_typerp   r   ranger/   )r5   rN   r   r   r   r   r6   r7   r   r9   r.   1  s   
zTFRegNetStage.__init__c                 C   s   | j D ]}||}q|S r:   )r/   )r5   r;   rm   r,   r,   r9   r<   =  s   

zTFRegNetStage.callNc              	   C   sj   | j rd S d| _ t| dd d ur1| jD ]}t|j |d  W d    n1 s+w   Y  qd S d S )NTr/   )r@   rA   r/   r3   rB   r%   rC   r   r,   r,   r9   rC   B  s   
zTFRegNetStage.build)r   r   r:   r   r,   r,   r7   r9   r   ,  s     r   c                	       sL   e Zd Zdef fddZ	ddejdeded	efd
dZ	dddZ
  ZS )TFRegNetEncoderrN   c                    s   t  jdi | g | _| jt||j|jd |jrdnd|jd dd t	|j|jdd  }t
t	||jdd  D ]\}\\}}}| jt||||d|d  d q=d S )	Nr   r   r   zstages.0)r   r   r%   zstages.)r   r%   r,   )r-   r.   stagesappendr   rQ   hidden_sizesdownsample_in_first_stagedepthszip	enumerate)r5   rN   r6   in_out_channelsr   r   r   r   r7   r,   r9   r.   M  s    
(&zTFRegNetEncoder.__init__FTr;   output_hidden_statesreturn_dictr`   c                 C   sb   |rdnd }| j D ]}|r||f }||}q	|r||f }|s+tdd ||fD S t||dS )Nr,   c                 s   s    | ]	}|d ur|V  qd S r:   r,   )r   vr,   r,   r9   	<genexpr>n  s    z'TFRegNetEncoder.call.<locals>.<genexpr>)last_hidden_statehidden_states)r   tupler	   )r5   r;   r   r   r   stage_moduler,   r,   r9   r<   _  s   



zTFRegNetEncoder.callNc              	   C   sV   | j rd S d| _ | jD ]}t|j |d  W d    n1 s#w   Y  qd S )NT)r@   r   r3   rB   r%   rC   )r5   rE   stager,   r,   r9   rC   r  s   
zTFRegNetEncoder.build)FTr:   )rG   rH   rI   r   r.   r3   rb   rc   r	   r<   rC   rL   r,   r,   r7   r9   r   L  s    
r   c                       s^   e Zd ZeZ fddZe			ddejde	e
 de	e
 de
d	ef
d
dZdddZ  ZS )TFRegNetMainLayerc                    sJ   t  jdi | || _t|dd| _t|dd| _tjj	ddd| _
d S )NrO   rt   encoderTrf   rg   r,   )r-   r.   rN   rM   rO   r   r   r   r/   rj   rf   rS   r7   r,   r9   r.     s
   zTFRegNetMainLayer.__init__NFrY   r   r   r_   r`   c           
      C   s   |d ur|n| j j}|d ur|n| j j}| j||d}| j||||d}|d }| |}tj|dd}tj|dd}|rHtdd |d D }	|sT||f|dd   S t	|||r]|	d	S |j
d	S )
Nra   r   r   r_   r   r   r   r   r   rT   c                 s   s    | ]
}t j|d dV  qdS )r   rT   N)r3   rX   )r   hr,   r,   r9   r     s    z)TFRegNetMainLayer.call.<locals>.<genexpr>r   r   pooler_outputr   )rN   r   use_return_dictrO   r   rf   r3   rX   r   r
   r   )
r5   rY   r   r   r_   embedding_outputencoder_outputsr   pooled_outputr   r,   r,   r9   r<     s,   	
zTFRegNetMainLayer.callc                 C   s   | j rd S d| _ t| dd d ur-t| jj | jd  W d    n1 s(w   Y  t| dd d urRt| jj | jd  W d    n1 sMw   Y  t| dd d urzt| jj | jd W d    d S 1 ssw   Y  d S d S )NTrO   r   rf   rn   )	r@   rA   r3   rB   rO   r%   rC   r   rf   rD   r,   r,   r9   rC     s    "zTFRegNetMainLayer.buildNNFr:   )rG   rH   rI   r   config_classr.   r   r3   rb   r   rc   r
   r<   rC   rL   r,   r,   r7   r9   r   {  s&    &r   c                   @   s(   e Zd ZdZeZdZdZedd Z	dS )TFRegNetPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    regnetrY   c                 C   s    dt jd | jjddft jdiS )NrY      )shapedtype)r3   
TensorSpecrN   rP   float32)r5   r,   r,   r9   input_signature  s    z'TFRegNetPreTrainedModel.input_signatureN)
rG   rH   rI   r\   r   r   base_model_prefixmain_input_namepropertyr   r,   r,   r,   r9   r     s    r   ad  
    This model is a Tensorflow
    [keras.layers.Layer](https://www.tensorflow.org/api_docs/python/tf/keras/layers/Layer) sub-class. Use it as a
    regular Tensorflow Module and refer to the Tensorflow documentation for all matter related to general usage and
    behavior.

    Parameters:
        config ([`RegNetConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~TFPreTrainedModel.from_pretrained`] method to load the model weights.
a>  
    Args:
        pixel_values (`tf.Tensor` of shape `(batch_size, num_channels, height, width)`):
            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
            [`ConveNextImageProcessor.__call__`] for details.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
zOThe bare RegNet model outputting raw features without any specific head on top.c                       s   e Zd Zdef fddZeeeee	e
eded			ddejd	ee d
ee dedee
eej f f
ddZdddZ  ZS )TFRegNetModelrN   c                    s,   t  j|g|R i | t|dd| _d S )Nr   rt   )r-   r.   r   r   r5   rN   r^   r6   r7   r,   r9   r.     s   zTFRegNetModel.__init__vision)
checkpointoutput_typer   modalityexpected_outputNFrY   r   r   r_   r`   c                 C   sh   |d ur|n| j j}|d ur|n| j j}| j||||d}|s*|d f|dd   S t|j|j|jdS )N)rY   r   r   r_   r   r   r   )rN   r   r   r   r
   r   r   r   )r5   rY   r   r   r_   outputsr,   r,   r9   r<     s    zTFRegNetModel.callc                 C   rZ   )NTr   )r@   rA   r3   rB   r   r%   rC   rD   r,   r,   r9   rC     r[   zTFRegNetModel.buildr   r:   )rG   rH   rI   r   r.   r   r   REGNET_INPUTS_DOCSTRINGr   _CHECKPOINT_FOR_DOCr
   _CONFIG_FOR_DOC_EXPECTED_OUTPUT_SHAPEr3   rb   r   rc   r   r   r<   rC   rL   r,   r,   r7   r9   r     s4    
r   z
    RegNet Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    c                       s   e Zd Zdef fddZeeeee	e
eed					ddeej deej d	ee d
ee dedee
eej f fddZdddZ  ZS )TFRegNetForImageClassificationrN   c                    sb   t  j|g|R i | |j| _t|dd| _tj |jdkr*tjj|jddnt	j
g| _d S )Nr   rt   r   zclassifier.1)r-   r.   
num_labelsr   r   r   r/   FlattenDenser3   r4   
classifierr   r7   r,   r9   r.   "  s    
z'TFRegNetForImageClassification.__init__)r   r   r   r   NFrY   labelsr   r   r_   r`   c                 C   s   |dur|n| j j}|dur|n| j j}| j||||d}|r"|jn|d }| jd |}| jd |}	|du r:dn| j||	d}
|sW|	f|dd  }|
durU|
f| S |S t|
|	|jdS )a)  
        labels (`tf.Tensor` of shape `(batch_size,)`, *optional*):
            Labels for computing the image classification/regression loss. Indices should be in `[0, ...,
            config.num_labels - 1]`. If `config.num_labels > 1` a classification loss is computed (Cross-Entropy).
        Nr   r   r   )r   logitsr   )lossr   r   )	rN   r   r   r   r   r   hf_compute_lossr   r   )r5   rY   r   r   r   r_   r   r   flattened_outputr   r   outputr,   r,   r9   r<   ,  s   z#TFRegNetForImageClassification.callc                 C   s   | j rd S d| _ t| dd d ur-t| jj | jd  W d    n1 s(w   Y  t| dd d urat| jd j | jd d d d | jj	d g W d    d S 1 sZw   Y  d S d S )NTr   r   r   )
r@   rA   r3   rB   r   r%   rC   r   rN   r   rD   r,   r,   r9   rC   W  s   ""z$TFRegNetForImageClassification.build)NNNNFr:   )rG   rH   rI   r   r.   r   r   r   r   _IMAGE_CLASS_CHECKPOINTr   r   _IMAGE_CLASS_EXPECTED_OUTPUTr   r3   rb   rc   r   r   r<   rC   rL   r,   r,   r7   r9   r     s:    
#r   )r   r   r   )5r\   typingr   r   
tensorflowr3   activations_tfr   
file_utilsr   r   r   modeling_tf_outputsr	   r
   r   modeling_tf_utilsr   r   r   r   r   tf_utilsr   utilsr   configuration_regnetr   
get_loggerrG   loggerr   r   r   r   r   r/   Layerr   rM   r]   rd   rp   r   r   r   r   r   REGNET_START_DOCSTRINGr   r   r   __all__r,   r,   r,   r9   <module>   sP   
/(%.. /@2B