o
    igo                     @   sj  d dl mZ d dlmZ d dlmZ d dlZd dlm	Z
 d dlmZmZmZ d dlmZmZ d dlmZ d dlmZmZmZmZ d dlmZmZmZmZ d d	lmZm Z  d
Z!dZ"G dd dej#Z$G dd dej#Z%G dd dej#Z&G dd dej#Z'G dd dej#Z(G dd dej#Z)G dd dej#Z*G dd dej#Z+G dd dej#Z,G dd dej#Z-G d d! d!ej#Z.G d"d# d#ej#Z/G d$d% d%ej#Z0G d&d' d'ej#Z1G d(d) d)eZ2G d*d+ d+ej#Z3ed,e!G d-d. d.e2Z4d/Z5ee4e5 ee4eed0 G d1d2 d2ej#Z6G d3d4 d4ej#Z7ed5e!G d6d7 d7e2Z8d8Z9ee8e9 ee8eed0 g d9Z:dS ):    )partial)OptionalN)
FrozenDictfreezeunfreeze)flatten_dictunflatten_dict)RegNetConfig)"FlaxBaseModelOutputWithNoAttentionFlaxBaseModelOutputWithPooling,FlaxBaseModelOutputWithPoolingAndNoAttention(FlaxImageClassifierOutputWithNoAttention)ACT2FNFlaxPreTrainedModel append_replace_return_docstringsoverwrite_call_docstring)add_start_docstrings%add_start_docstrings_to_model_forwarda  

    This model inherits from [`FlaxPreTrainedModel`]. Check the superclass documentation for the generic methods the
    library implements for all its model (such as downloading, saving and converting weights from PyTorch models)

    This model is also a
    [flax.linen.Module](https://flax.readthedocs.io/en/latest/api_reference/flax.linen/module.html) subclass. Use it as
    a regular Flax linen Module and refer to the Flax documentation for all matter related to general usage and
    behavior.

    Finally, this model supports inherent JAX features such as:

    - [Just-In-Time (JIT) compilation](https://jax.readthedocs.io/en/latest/jax.html#just-in-time-compilation-jit)
    - [Automatic Differentiation](https://jax.readthedocs.io/en/latest/jax.html#automatic-differentiation)
    - [Vectorization](https://jax.readthedocs.io/en/latest/jax.html#vectorization-vmap)
    - [Parallelization](https://jax.readthedocs.io/en/latest/jax.html#parallelization-pmap)

    Parameters:
        config ([`RegNetConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~FlaxPreTrainedModel.from_pretrained`] method to load the model weights.
        dtype (`jax.numpy.dtype`, *optional*, defaults to `jax.numpy.float32`):
            The data type of the computation. Can be one of `jax.numpy.float32`, `jax.numpy.float16` (on GPUs) and
            `jax.numpy.bfloat16` (on TPUs).

            This can be used to enable mixed-precision training or half-precision inference on GPUs or TPUs. If
            specified all the computation will be performed with the given `dtype`.

            **Note that this only specifies the dtype of the computation and does not influence the dtype of model
            parameters.**

            If you wish to change the dtype of the model parameters, see [`~FlaxPreTrainedModel.to_fp16`] and
            [`~FlaxPreTrainedModel.to_bf16`].
a@  
    Args:
        pixel_values (`numpy.ndarray` of shape `(batch_size, num_channels, height, width)`):
            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
            [`RegNetImageProcessor.__call__`] for details.

        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
c                   @   s   e Zd ZdZejdd ZdS )IdentityzIdentity function.c                 K   s   |S N )selfxkwargsr   r   c/home/ubuntu/.local/lib/python3.10/site-packages/transformers/models/regnet/modeling_flax_regnet.py__call__b   s   zIdentity.__call__N)__name__
__module____qualname____doc__nncompactr   r   r   r   r   r   _   s    r   c                   @   s~   e Zd ZU eed< dZeed< dZeed< dZeed< dZe	e
 ed< ejZejed	< d
d ZddejdedejfddZdS )FlaxRegNetConvLayerout_channels   kernel_size   stridegroupsrelu
activationdtypec                 C   sz   t j| j| j| jf| j| jd | jdt jjdddd| jd| _	t j
dd	| jd
| _| jd ur7t| j | _d S t | _d S )N   F       @fan_outtruncated_normalmodedistribution)r%   stridespaddingfeature_group_countuse_biaskernel_initr+   ?h㈵>momentumepsilonr+   )r    Convr#   r%   r'   r(   initializersvariance_scalingr+   convolution	BatchNormnormalizationr*   r   r   activation_funcr   r   r   r   setupo   s   

&zFlaxRegNetConvLayer.setupThidden_statedeterministicreturnc                 C   s&   |  |}| j||d}| |}|S N)use_running_average)r@   rB   rC   )r   rF   rG   r   r   r   r   }   s   

zFlaxRegNetConvLayer.__call__NT)r   r   r   int__annotations__r%   r'   r(   r*   r   strjnpfloat32r+   rE   ndarrayboolr   r   r   r   r   r"   g   s   
  r"   c                   @   sJ   e Zd ZU eed< ejZejed< dd Zddej	de
dej	fd	d
ZdS )FlaxRegNetEmbeddingsconfigr+   c                 C   s"   t | jjdd| jj| jd| _d S )Nr$   r,   )r%   r'   r*   r+   )r"   rT   embedding_size
hidden_actr+   embedderrD   r   r   r   rE      s   zFlaxRegNetEmbeddings.setupTpixel_valuesrG   rH   c                 C   s0   |j d }|| jjkrtd| j||d}|S )NzeMake sure that the channel dimension of the pixel values match with the one set in the configuration.rG   )shaperT   num_channels
ValueErrorrW   )r   rX   rG   r\   rF   r   r   r   r      s   
zFlaxRegNetEmbeddings.__call__NrK   )r   r   r   r	   rM   rO   rP   r+   rE   rQ   rR   r   r   r   r   r   rS      s
   
  	rS   c                   @   sZ   e Zd ZU dZeed< dZeed< ejZ	ej	ed< dd Z
dd	ejd
edejfddZdS )FlaxRegNetShortCutz
    RegNet shortcut, used to project the residual features to the correct size. If needed, it is also used to
    downsample the input using `stride=2`.
    r#   r,   r'   r+   c              
   C   sD   t j| jd| jdt jjdddd| jd| _t jdd	| jd
| _	d S )Nr&   r&   Fr-   r.   r/   r0   )r%   r3   r6   r7   r+   r8   r9   r:   )
r    r=   r#   r'   r>   r?   r+   r@   rA   rB   rD   r   r   r   rE      s   zFlaxRegNetShortCut.setupTr   rG   rH   c                 C   s   |  |}| j||d}|S rI   )r@   rB   )r   r   rG   rF   r   r   r   r         
zFlaxRegNetShortCut.__call__NrK   )r   r   r   r   rL   rM   r'   rO   rP   r+   rE   rQ   rR   r   r   r   r   r   r^      s   
  r^   c                   @   sL   e Zd ZU eed< eed< ejZejed< dd Zdej	dej	fdd	Z
d
S )FlaxRegNetSELayerCollectionin_channelsreduced_channelsr+   c                 C   sT   t j| jdt jjdddd| jdd| _t j| jdt jjdddd| jdd| _d S )	Nr_   r-   r.   r/   r0   0)r%   r7   r+   name2)	r    r=   rc   r>   r?   r+   conv_1rb   conv_2rD   r   r   r   rE      s   z!FlaxRegNetSELayerCollection.setuprF   rH   c                 C   s,   |  |}t|}| |}t|}|S r   )rg   r    r)   rh   sigmoid)r   rF   	attentionr   r   r   r      s
   



z$FlaxRegNetSELayerCollection.__call__N)r   r   r   rL   rM   rO   rP   r+   rE   rQ   r   r   r   r   r   ra      s   
 ra   c                   @   sP   e Zd ZU dZeed< eed< ejZejed< dd Z	dej
dej
fd	d
ZdS )FlaxRegNetSELayerz
    Squeeze and Excitation layer (SE) proposed in [Squeeze-and-Excitation Networks](https://huggingface.co/papers/1709.01507).
    rb   rc   r+   c                 C   s*   t tjdd| _t| j| j| jd| _d S )Nr   r   rm   r4   r+   )	r   r    avg_poolpoolerra   rb   rc   r+   rj   rD   r   r   r   rE      s   zFlaxRegNetSELayer.setuprF   rH   c                 C   sF   | j ||jd |jd f|jd |jd fd}| |}|| }|S )Nr&   r,   window_shaper3   )rq   r[   rj   )r   rF   pooledrj   r   r   r   r      s   
zFlaxRegNetSELayer.__call__N)r   r   r   r   rL   rM   rO   rP   r+   rE   rQ   r   r   r   r   r   rk      s   
 rk   c                   @   s^   e Zd ZU eed< eed< dZeed< ejZ	ej	ed< dd Z
dd	ejd
edejfddZdS )FlaxRegNetXLayerCollectionrT   r#   r&   r'   r+   c              	   C   sf   t d| j| jj }t| jd| jj| jddt| j| j|| jj| jddt| jdd | jddg| _d S )Nr&   rd   r%   r*   r+   re   1r'   r(   r*   r+   re   rf   )	maxr#   rT   groups_widthr"   rV   r+   r'   layerr   r(   r   r   r   rE      s0   
z FlaxRegNetXLayerCollection.setupTrF   rG   rH   c                 C   s   | j D ]}|||d}q|S NrZ   r{   )r   rF   rG   r{   r   r   r   r     r`   z#FlaxRegNetXLayerCollection.__call__NrK   )r   r   r   r	   rM   rL   r'   rO   rP   r+   rE   rQ   rR   r   r   r   r   r   ru      s   
  ru   c                   @   j   e Zd ZU dZeed< eed< eed< dZeed< ej	Z
ej
ed< dd	 ZddejdedejfddZdS )FlaxRegNetXLayerzt
    RegNet's layer composed by three `3x3` convolutions, same as a ResNet bottleneck layer with reduction = 1.
    rT   rb   r#   r&   r'   r+   c                 C   f   | j | jkp
| jdk}|rt| j| j| jdnt | _t| j| j | j| j| jd| _	t
| jj | _d S Nr&   )r'   r+   )rb   r#   r'   r+   )rb   r#   r'   r^   r+   r   shortcutru   rT   r{   r   rV   rC   r   should_apply_shortcutr   r   r   rE      s"   	zFlaxRegNetXLayer.setupTrF   rG   rH   c                 C   2   |}|  |}| j||d}||7 }| |}|S r}   r{   r   rC   r   rF   rG   residualr   r   r   r   4     

zFlaxRegNetXLayer.__call__NrK   r   r   r   r   r	   rM   rL   r'   rO   rP   r+   rE   rQ   rR   r   r   r   r   r   r     s   
  r   c                   @   s`   e Zd ZU eed< eed< eed< dZeed< ejZ	ej	ed< dd Z
d	ejd
ejfddZdS )FlaxRegNetYLayerCollectionrT   rb   r#   r&   r'   r+   c              
   C   s   t d| j| jj }t| jd| jj| jddt| j| j|| jj| jddt| jt	t
| jd | jddt| jdd | jd	dg| _d S )
Nr&   rd   rv   rw   rx      rf   )rc   r+   re   3)ry   r#   rT   rz   r"   rV   r+   r'   rk   rL   roundrb   r{   r|   r   r   r   rE   D  s<   
z FlaxRegNetYLayerCollection.setuprF   rH   c                 C   s   | j D ]}||}q|S r   r~   )r   rF   r{   r   r   r   r   f  s   

z#FlaxRegNetYLayerCollection.__call__N)r   r   r   r	   rM   rL   r'   rO   rP   r+   rE   rQ   r   r   r   r   r   r   =  s   
 "r   c                   @   r   )FlaxRegNetYLayerzC
    RegNet's Y layer: an X layer with Squeeze and Excitation.
    rT   rb   r#   r&   r'   r+   c                 C   r   r   )rb   r#   r'   r^   r+   r   r   r   rT   r{   r   rV   rC   r   r   r   r   rE   w  s"   	zFlaxRegNetYLayer.setupTrF   rG   rH   c                 C   r   r}   r   r   r   r   r   r     r   zFlaxRegNetYLayer.__call__NrK   r   r   r   r   r   r   l  s   
  r   c                   @   v   e Zd ZU dZeed< eed< eed< dZeed< dZeed< e	j
Ze	jed< d	d
 Zdde	jdede	jfddZdS )FlaxRegNetStageLayersCollection4
    A RegNet stage composed by stacked layers.
    rT   rb   r#   r,   r'   depthr+   c                 C   sz   | j jdkrtnt}|| j | j| j| j| jddg}t| j	d D ]}|
|| j | j| j| jt|d d q!|| _d S )Nr   rd   )r'   r+   re   r&   r+   re   )rT   
layer_typer   r   rb   r#   r'   r+   ranger   appendrN   layers)r   r{   r   ir   r   r   rE     s*   


z%FlaxRegNetStageLayersCollection.setupTr   rG   rH   c                 C   s    |}| j D ]}|||d}q|S r}   r   )r   r   rG   rF   r{   r   r   r   r     s   
z(FlaxRegNetStageLayersCollection.__call__NrK   r   r   r   r   r	   rM   rL   r'   r   rO   rP   r+   rE   rQ   rR   r   r   r   r   r   r     s   
  r   c                   @   r   )FlaxRegNetStager   rT   rb   r#   r,   r'   r   r+   c                 C   s&   t | j| j| j| j| j| jd| _d S )N)rb   r#   r'   r   r+   )r   rT   rb   r#   r'   r   r+   r   rD   r   r   r   rE     s   zFlaxRegNetStage.setupTr   rG   rH   c                 C   s   | j ||dS r}   r   )r   r   rG   r   r   r   r     s   zFlaxRegNetStage.__call__NrK   r   r   r   r   r   r     s   
  
r   c                	   @   sP   e Zd ZU eed< ejZejed< dd Z		ddej	de
d	e
d
efddZdS )FlaxRegNetStageCollectionrT   r+   c                 C   s   t | jj| jjdd  }t| j| jj| jjd | jjrdnd| jjd | jddg}tt || jjdd  D ]\}\\}}}|	t| j|||| jt
|d d q8|| _d S )Nr&   r   r,   rd   )r'   r   r+   re   )r   r+   re   )ziprT   hidden_sizesr   rU   downsample_in_first_stagedepthsr+   	enumerater   rN   stages)r   in_out_channelsr   r   rb   r#   r   r   r   r   rE     s    

*
zFlaxRegNetStageCollection.setupFTrF   output_hidden_statesrG   rH   c                 C   sF   |rdnd }| j D ]}|r||ddddf }|||d}q	||fS )Nr   r   r$   r&   r,   rZ   )r   	transpose)r   rF   r   rG   hidden_statesstage_moduler   r   r   r     s   
z"FlaxRegNetStageCollection.__call__N)FTr   r   r   r	   rM   rO   rP   r+   rE   rQ   rR   r
   r   r   r   r   r   r     s   
 r   c                   @   sV   e Zd ZU eed< ejZejed< dd Z			ddej	de
d	e
d
e
def
ddZdS )FlaxRegNetEncoderrT   r+   c                 C   s   t | j| jd| _d S )Nro   )r   rT   r+   r   rD   r   r   r   rE     s   zFlaxRegNetEncoder.setupFTrF   r   return_dictrG   rH   c                 C   sT   | j |||d\}}|r||ddddf }|s$tdd ||fD S t||dS )	N)r   rG   r   r$   r&   r,   c                 s   s    | ]	}|d ur|V  qd S r   r   ).0vr   r   r   	<genexpr>!  s    z-FlaxRegNetEncoder.__call__.<locals>.<genexpr>)last_hidden_stater   )r   r   tupler
   )r   rF   r   r   rG   r   r   r   r   r     s   
zFlaxRegNetEncoder.__call__N)FTTr   r   r   r   r   r     s$   
 r   c                       s   e Zd ZU dZeZdZdZdZe	j
ed< ddejdfd	ed
edejdef fddZddejjdededefddZee				ddee dedee dee fddZ  ZS )FlaxRegNetPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    regnetrX   Nmodule_class)r&      r   r$   r   TrT   seedr+   _do_initc                    sL   | j d||d|}|d u rd|j|j|jf}t j||||||d d S )NrT   r+   r&   )input_shaper   r+   r   r   )r   
image_sizer\   super__init__)r   rT   r   r   r+   r   r   module	__class__r   r   r   5  s   	z"FlaxRegNetPreTrainedModel.__init__rngr   paramsrH   c                 C   sz   t j|| jd}d|i}| jj||dd}|d ur;tt|}tt|}| jD ]}|| ||< q(t | _t	t
|S |S )Nro   r   F)r   )rO   zerosr+   r   initr   r   _missing_keyssetr   r   )r   r   r   r   rX   rngsrandom_paramsmissing_keyr   r   r   init_weightsC  s   
z&FlaxRegNetPreTrainedModel.init_weightsFtrainr   r   c              	   C   s   |d ur|n| j j}|d ur|n| j j}t|d}i }| jj|d ur'|d n| jd |d ur3|d n| jd dtj|tj	d| ||||rLdgdS ddS )N)r   r,   r$   r&   r   batch_stats)r   r   ro   F)r   mutable)
rT   r   r   rO   r   r   applyr   arrayrP   )r   rX   r   r   r   r   r   r   r   r   r   U  s$   

z"FlaxRegNetPreTrainedModel.__call__r   )NFNN)r   r   r   r   r	   config_classbase_model_prefixmain_input_namer   r    ModulerM   rO   rP   rL   r+   rR   r   jaxrandomPRNGKeyr   r   r   r   REGNET_INPUTS_DOCSTRINGr   dictr   __classcell__r   r   r   r   r   *  sD   
  r   c                	   @   sP   e Zd ZU eed< ejZejed< dd Z			dde	de	d	e	d
e
fddZdS )FlaxRegNetModulerT   r+   c                 C   s8   t | j| jd| _t| j| jd| _ttjdd| _	d S )Nro   rl   rn   )
rS   rT   r+   rW   r   encoderr   r    rp   rq   rD   r   r   r   rE   {  s   zFlaxRegNetModule.setupTFrG   r   r   rH   c           	      C   s   |d ur|n| j j}|d ur|n| j j}| j||d}| j||||d}|d }| j||jd |jd f|jd |jd fddddd}|dddd}|sZ||f|dd   S t|||j	dS )	NrZ   )r   r   rG   r   r&   r,   rr   r$   )r   pooler_outputr   )
rT   r   use_return_dictrW   r   rq   r[   r   r   r   )	r   rX   rG   r   r   embedding_outputencoder_outputsr   pooled_outputr   r   r   r     s4   zFlaxRegNetModule.__call__N)TFT)r   r   r   r	   rM   rO   rP   r+   rE   rR   r   r   r   r   r   r   r   w  s    
 r   zOThe bare RegNet model outputting raw features without any specific head on top.c                   @      e Zd ZeZdS )FlaxRegNetModelN)r   r   r   r   r   r   r   r   r   r     s    r   at  
    Returns:

    Examples:

    ```python
    >>> from transformers import AutoImageProcessor, FlaxRegNetModel
    >>> from PIL import Image
    >>> import requests

    >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
    >>> image = Image.open(requests.get(url, stream=True).raw)

    >>> image_processor = AutoImageProcessor.from_pretrained("facebook/regnet-y-040")
    >>> model = FlaxRegNetModel.from_pretrained("facebook/regnet-y-040")

    >>> inputs = image_processor(images=image, return_tensors="np")
    >>> outputs = model(**inputs)
    >>> last_hidden_states = outputs.last_hidden_state
    ```
)output_typer   c                   @   sD   e Zd ZU eed< ejZejed< dd Zdej	dej	fddZ
d	S )
FlaxRegNetClassifierCollectionrT   r+   c                 C   s   t j| jj| jdd| _d S )Nrw   r   )r    DenserT   
num_labelsr+   
classifierrD   r   r   r   rE     s   z$FlaxRegNetClassifierCollection.setupr   rH   c                 C   s
   |  |S r   )r   )r   r   r   r   r   r     s   
z'FlaxRegNetClassifierCollection.__call__N)r   r   r   r	   rM   rO   rP   r+   rE   rQ   r   r   r   r   r   r     s
   
 r   c                   @   sF   e Zd ZU eed< ejZejed< dd Z				d
de	fdd	Z
dS )&FlaxRegNetForImageClassificationModulerT   r+   c                 C   s@   t | j| jd| _| jjdkrt| j| jd| _d S t | _d S )Nr   r   ro   )r   rT   r+   r   r   r   r   r   rD   r   r   r   rE     s   z,FlaxRegNetForImageClassificationModule.setupNTrG   c           	      C   s~   |d ur|n| j j}| j||||d}|r|jn|d }| |d d d d ddf }|s8|f|dd   }|S t||jdS )N)rG   r   r   r&   r   r,   )logitsr   )rT   r   r   r   r   r   r   )	r   rX   rG   r   r   outputsr   r   outputr   r   r   r     s   z/FlaxRegNetForImageClassificationModule.__call__)NTNN)r   r   r   r	   rM   rO   rP   r+   rE   rR   r   r   r   r   r   r     s   
 
r   z
    RegNet Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    c                   @   r   ) FlaxRegNetForImageClassificationN)r   r   r   r   r   r   r   r   r   r     s    r   aa  
    Returns:

    Example:

    ```python
    >>> from transformers import AutoImageProcessor, FlaxRegNetForImageClassification
    >>> from PIL import Image
    >>> import jax
    >>> import requests

    >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
    >>> image = Image.open(requests.get(url, stream=True).raw)

    >>> image_processor = AutoImageProcessor.from_pretrained("facebook/regnet-y-040")
    >>> model = FlaxRegNetForImageClassification.from_pretrained("facebook/regnet-y-040")

    >>> inputs = image_processor(images=image, return_tensors="np")
    >>> outputs = model(**inputs)
    >>> logits = outputs.logits

    >>> # model predicts one of the 1000 ImageNet classes
    >>> predicted_class_idx = jax.numpy.argmax(logits, axis=-1)
    >>> print("Predicted class:", model.config.id2label[predicted_class_idx.item()])
    ```
)r   r   r   );	functoolsr   typingr   
flax.linenlinenr    r   	jax.numpynumpyrO   flax.core.frozen_dictr   r   r   flax.traverse_utilr   r   transformersr	   "transformers.modeling_flax_outputsr
   r   r   r    transformers.modeling_flax_utilsr   r   r   r   transformers.utilsr   r   REGNET_START_DOCSTRINGr   r   r   r"   rS   r^   ra   rk   ru   r   r   r   r   r   r   r   r   r   r   FLAX_VISION_MODEL_DOCSTRINGr   r   r   FLAX_VISION_CLASSIF_DOCSTRING__all__r   r   r   r   <module>   sp   #((/)0+M7
'
