o
    i`                     @   sB  d dl mZ d dlmZ d dlmZ d dlZd dlm	Z
 d dlmZmZmZ d dlmZmZ ddlmZmZmZ ddlmZmZmZmZ dd	lmZmZ d
dlmZ dZ dZ!G dd dej"Z#G dd dej"Z$G dd dej"Z%G dd dej"Z&G dd dej"Z'G dd dej"Z(G dd dej"Z)G dd dej"Z*G dd dej"Z+G d d! d!ej"Z,G d"d# d#ej"Z-G d$d% d%ej"Z.G d&d' d'eZ/G d(d) d)ej"Z0ed*e G d+d, d,e/Z1d-Z2ee1e2 ee1eed. G d/d0 d0ej"Z3G d1d2 d2ej"Z4ed3e G d4d5 d5e/Z5d6Z6ee5e6 ee5eed. g d7Z7dS )8    )partial)OptionalN)
FrozenDictfreezeunfreeze)flatten_dictunflatten_dict   )"FlaxBaseModelOutputWithNoAttention,FlaxBaseModelOutputWithPoolingAndNoAttention(FlaxImageClassifierOutputWithNoAttention)ACT2FNFlaxPreTrainedModel append_replace_return_docstringsoverwrite_call_docstring)add_start_docstrings%add_start_docstrings_to_model_forward   )ResNetConfiga  

    This model inherits from [`FlaxPreTrainedModel`]. Check the superclass documentation for the generic methods the
    library implements for all its model (such as downloading, saving and converting weights from PyTorch models)

    This model is also a
    [flax.linen.Module](https://flax.readthedocs.io/en/latest/api_reference/flax.linen/module.html) subclass. Use it as
    a regular Flax linen Module and refer to the Flax documentation for all matter related to general usage and
    behavior.

    Finally, this model supports inherent JAX features such as:

    - [Just-In-Time (JIT) compilation](https://jax.readthedocs.io/en/latest/jax.html#just-in-time-compilation-jit)
    - [Automatic Differentiation](https://jax.readthedocs.io/en/latest/jax.html#automatic-differentiation)
    - [Vectorization](https://jax.readthedocs.io/en/latest/jax.html#vectorization-vmap)
    - [Parallelization](https://jax.readthedocs.io/en/latest/jax.html#parallelization-pmap)

    Parameters:
        config ([`ResNetConfig`]): Model configuration class with all the parameters of the model.
            Initializing with a config file does not load the weights associated with the model, only the
            configuration. Check out the [`~FlaxPreTrainedModel.from_pretrained`] method to load the model weights.
        dtype (`jax.numpy.dtype`, *optional*, defaults to `jax.numpy.float32`):
            The data type of the computation. Can be one of `jax.numpy.float32`, `jax.numpy.float16` (on GPUs) and
            `jax.numpy.bfloat16` (on TPUs).

            This can be used to enable mixed-precision training or half-precision inference on GPUs or TPUs. If
            specified all the computation will be performed with the given `dtype`.

            **Note that this only specifies the dtype of the computation and does not influence the dtype of model
            parameters.**

            If you wish to change the dtype of the model parameters, see [`~FlaxPreTrainedModel.to_fp16`] and
            [`~FlaxPreTrainedModel.to_bf16`].
aA  
    Args:
        pixel_values (`jax.numpy.float32` of shape `(batch_size, num_channels, height, width)`):
            Pixel values. Pixel values can be obtained using [`AutoImageProcessor`]. See
            [`AutoImageProcessor.__call__`] for details.
        output_hidden_states (`bool`, *optional*):
            Whether or not to return the hidden states of all layers. See `hidden_states` under returned tensors for
            more detail.
        return_dict (`bool`, *optional*):
            Whether or not to return a [`~utils.ModelOutput`] instead of a plain tuple.
c                   @   s   e Zd ZdZejdd ZdS )IdentityzIdentity function.c                 K   s   |S N )selfxkwargsr   r   c/home/ubuntu/.local/lib/python3.10/site-packages/transformers/models/resnet/modeling_flax_resnet.py__call__\   s   zIdentity.__call__N)__name__
__module____qualname____doc__nncompactr   r   r   r   r   r   Y   s    r   c                   @   sr   e Zd ZU eed< dZeed< dZeed< dZee	 ed< e
jZe
jed< d	d
 Zdde
jdede
jfddZdS )FlaxResNetConvLayerout_channelsr	   kernel_sizer   striderelu
activationdtypec                 C   sz   t j| j| j| jf| j| jd | jdt jjddd| jdd| _t j	dd	| jd
| _
| jd ur7t| j | _d S t | _d S )N   F       @fan_outnormal)modedistributionr)   )r%   stridespaddingr)   use_biaskernel_init?h㈵>momentumepsilonr)   )r!   Convr$   r%   r&   r)   initializersvariance_scalingconvolution	BatchNormnormalizationr(   r   r   activation_funcr   r   r   r   setuph   s   
	&zFlaxResNetConvLayer.setupTr   deterministicreturnc                 C   s&   |  |}| j||d}| |}|S N)use_running_average)r<   r>   r?   r   r   rB   hidden_stater   r   r   r   u   s   

zFlaxResNetConvLayer.__call__NT)r   r   r   int__annotations__r%   r&   r(   r   strjnpfloat32r)   rA   ndarrayboolr   r   r   r   r   r#   a   s   
  r#   c                   @   sN   e Zd ZU dZeed< ejZejed< dd Z	ddej
ded	ej
fd
dZdS )FlaxResNetEmbeddingszO
    ResNet Embeddings (stem) composed of a single aggressive convolution.
    configr)   c                 C   s6   t | jjdd| jj| jd| _ttjdddd| _d S )N   r*   )r%   r&   r(   r)   )r	   r	   )r*   r*   )r   r   rS   )window_shaper0   r1   )	r#   rQ   embedding_size
hidden_actr)   embedderr   r!   max_poolr@   r   r   r   rA      s   zFlaxResNetEmbeddings.setupTpixel_valuesrB   rC   c                 C   s:   |j d }|| jjkrtd| j||d}| |}|S )NzeMake sure that the channel dimension of the pixel values match with the one set in the configuration.rB   )shaperQ   num_channels
ValueErrorrW   rX   )r   rY   rB   r]   	embeddingr   r   r   r      s   

zFlaxResNetEmbeddings.__call__NrH   )r   r   r   r    r   rJ   rL   rM   r)   rA   rN   rO   r   r   r   r   r   rP   |   s   
  rP   c                   @   sZ   e Zd ZU dZeed< dZeed< ejZ	ej	ed< dd Z
dd	ejd
edejfddZdS )FlaxResNetShortCutz
    ResNet shortcut, used to project the residual features to the correct size. If needed, it is also used to
    downsample the input using `stride=2`.
    r$   r*   r&   r)   c              
   C   sD   t j| jd| jdt jjdddd| jd| _t jdd	| jd
| _	d S )NrS   Fr+   r,   truncated_normal)r.   r/   )r%   r0   r2   r3   r)   r4   r5   r6   )
r!   r9   r$   r&   r:   r;   r)   r<   r=   r>   r@   r   r   r   rA      s   zFlaxResNetShortCut.setupTr   rB   rC   c                 C   s   |  |}| j||d}|S rD   )r<   r>   rF   r   r   r   r         
zFlaxResNetShortCut.__call__NrH   )r   r   r   r    rI   rJ   r&   rL   rM   r)   rA   rN   rO   r   r   r   r   r   r`      s   
  r`   c                   @   sV   e Zd ZU eed< dZeed< ejZejed< dd Z	ddej
d	ed
ej
fddZdS )FlaxResNetBasicLayerCollectionr$   r   r&   r)   c                 C   s,   t | j| j| jdt | jd | jdg| _d S )Nr&   r)   )r(   r)   )r#   r$   r&   r)   layerr@   r   r   r   rA      s   
z$FlaxResNetBasicLayerCollection.setupTrG   rB   rC   c                 C      | j D ]}|||d}q|S Nr[   re   r   rG   rB   re   r   r   r   r      rb   z'FlaxResNetBasicLayerCollection.__call__NrH   )r   r   r   rI   rJ   r&   rL   rM   r)   rA   rN   rO   r   r   r   r   r   rc      s   
  rc   c                   @   sf   e Zd ZU dZeed< eed< dZeed< dZee	 ed< e
jZe
jed< d	d
 ZddefddZdS )FlaxResNetBasicLayerzO
    A classic ResNet's residual layer composed by two `3x3` convolutions.
    in_channelsr$   r   r&   r'   r(   r)   c                 C   sZ   | j | jkp
| jdk}|rt| j| j| jdnd | _t| j| j| jd| _t| j	 | _
d S )Nr   rd   )r$   r&   r)   )rk   r$   r&   r`   r)   shortcutrc   re   r   r(   r?   r   should_apply_shortcutr   r   r   rA      s   zFlaxResNetBasicLayer.setupTrB   c                 C   s@   |}| j ||d}| jd ur| j||d}||7 }| |}|S rg   )re   rl   r?   r   rG   rB   residualr   r   r   r      s   

zFlaxResNetBasicLayer.__call__NrH   )r   r   r   r    rI   rJ   r&   r(   r   rK   rL   rM   r)   rA   rO   r   r   r   r   r   rj      s   
 rj   c                   @   sr   e Zd ZU eed< dZeed< dZee ed< dZ	eed< e
jZe
jed< d	d
 Zdde
jdede
jfddZdS )#FlaxResNetBottleNeckLayerCollectionr$   r   r&   r'   r(      	reductionr)   c              	   C   sL   | j | j }t|d| jddt|| j| jddt| j dd | jddg| _d S )Nr   0)r%   r)   name1)r&   r)   ru   2)r%   r(   r)   ru   )r$   rs   r#   r)   r&   re   )r   reduces_channelsr   r   r   rA      s
   
z)FlaxResNetBottleNeckLayerCollection.setupTrG   rB   rC   c                 C   rf   rg   rh   ri   r   r   r   r      rb   z,FlaxResNetBottleNeckLayerCollection.__call__NrH   )r   r   r   rI   rJ   r&   r(   r   rK   rs   rL   rM   r)   rA   rN   rO   r   r   r   r   r   rq      s   
  	rq   c                   @   s~   e Zd ZU dZeed< eed< dZeed< dZee	 ed< dZ
eed	< ejZejed
< dd ZddejdedejfddZdS )FlaxResNetBottleNeckLayera$  
    A classic ResNet's bottleneck layer composed by three `3x3` convolutions. The first `1x1` convolution reduces the
    input by a factor of `reduction` in order to make the second `3x3` convolution faster. The last `1x1` convolution
    remaps the reduced features to `out_channels`.
    rk   r$   r   r&   r'   r(   rr   rs   r)   c                 C   sb   | j | jkp
| jdk}|rt| j| j| jdnd | _t| j| j| j| j| jd| _	t
| j | _d S )Nr   rd   )r&   r(   rs   r)   )rk   r$   r&   r`   r)   rl   rq   r(   rs   re   r   r?   rm   r   r   r   rA     s   zFlaxResNetBottleNeckLayer.setupTrG   rB   rC   c                 C   s>   |}| j d ur| j ||d}| ||}||7 }| |}|S rg   )rl   re   r?   ro   r   r   r   r   !  s   

z"FlaxResNetBottleNeckLayer.__call__NrH   )r   r   r   r    rI   rJ   r&   r(   r   rK   rs   rL   rM   r)   rA   rN   rO   r   r   r   r   r   ry     s   
  ry   c                   @   v   e Zd ZU dZeed< eed< eed< dZeed< dZeed< e	j
Ze	jed< d	d
 Zdde	jdede	jfddZdS )FlaxResNetStageLayersCollection4
    A ResNet stage composed by stacked layers.
    rQ   rk   r$   r*   r&   depthr)   c                 C   s~   | j jdkrtnt}|| j| j| j| j j| jddg}t	| j
d D ]}||| j| j| j j| jt|d d q"|| _d S )N
bottleneckrt   )r&   r(   r)   ru   r   )r(   r)   ru   )rQ   
layer_typery   rj   rk   r$   r&   rV   r)   ranger}   appendrK   layers)r   re   r   ir   r   r   rA   8  s*   


z%FlaxResNetStageLayersCollection.setupTr   rB   rC   c                 C   s    |}| j D ]}|||d}q|S rg   r   )r   r   rB   rG   re   r   r   r   r   T  s   
z(FlaxResNetStageLayersCollection.__call__NrH   r   r   r   r    r   rJ   rI   r&   r}   rL   rM   r)   rA   rN   rO   r   r   r   r   r   r{   ,  s   
  r{   c                   @   rz   )FlaxResNetStager|   rQ   rk   r$   r*   r&   r}   r)   c                 C   s&   t | j| j| j| j| j| jd| _d S )N)rk   r$   r&   r}   r)   )r{   rQ   rk   r$   r&   r}   r)   r   r@   r   r   r   rA   g  s   zFlaxResNetStage.setupTr   rB   rC   c                 C   s   | j ||dS rg   r   )r   r   rB   r   r   r   r   q  s   zFlaxResNetStage.__call__NrH   r   r   r   r   r   r   [  s   
  
r   c                	   @   sP   e Zd ZU eed< ejZejed< dd Z		ddej	de
d	e
d
efddZdS )FlaxResNetStageCollectionrQ   r)   c                 C   s   t | jj| jjdd  }t| j| jj| jjd | jjrdnd| jjd | jddg}tt || jjdd  D ]\}\\}}}|	t| j|||| jt
|d d q8|| _d S )Nr   r   r*   rt   )r&   r}   r)   ru   )r}   r)   ru   )ziprQ   hidden_sizesr   rU   downsample_in_first_stagedepthsr)   	enumerater   rK   stages)r   in_out_channelsr   r   rk   r$   r}   r   r   r   rA   y  s    

*
zFlaxResNetStageCollection.setupFTrG   output_hidden_statesrB   rC   c                 C   sF   |rdnd }| j D ]}|r||ddddf }|||d}q	||fS )Nr   r   r	   r   r*   r[   )r   	transpose)r   rG   r   rB   hidden_statesstage_moduler   r   r   r     s   
z"FlaxResNetStageCollection.__call__N)FTr   r   r   r   rJ   rL   rM   r)   rA   rN   rO   r
   r   r   r   r   r   r   u  s   
 r   c                   @   sV   e Zd ZU eed< ejZejed< dd Z			ddej	de
d	e
d
e
def
ddZdS )FlaxResNetEncoderrQ   r)   c                 C   s   t | j| jd| _d S )Nr)   )r   rQ   r)   r   r@   r   r   r   rA     s   zFlaxResNetEncoder.setupFTrG   r   return_dictrB   rC   c                 C   sT   | j |||d\}}|r||ddddf }|s$tdd ||fD S t||dS )	N)r   rB   r   r	   r   r*   c                 s   s    | ]	}|d ur|V  qd S r   r   ).0vr   r   r   	<genexpr>  s    z-FlaxResNetEncoder.__call__.<locals>.<genexpr>)last_hidden_stater   )r   r   tupler
   )r   rG   r   r   rB   r   r   r   r   r     s   
zFlaxResNetEncoder.__call__N)FTTr   r   r   r   r   r     s$   
 r   c                       s   e Zd ZU dZeZdZdZdZe	j
ed< ddejdfd	ed
edejdef fddZddejjdededefddZee				ddee dedee dee fddZ  ZS )FlaxResNetPreTrainedModelz
    An abstract class to handle weights initialization and a simple interface for downloading and loading pretrained
    models.
    resnetrY   Nmodule_class)r      r   r	   r   TrQ   seedr)   _do_initc                    sL   | j d||d|}|d u rd|j|j|jf}t j||||||d d S )NrQ   r)   r   )input_shaper   r)   r   r   )r   
image_sizer]   super__init__)r   rQ   r   r   r)   r   r   module	__class__r   r   r     s   	z"FlaxResNetPreTrainedModel.__init__rngr   paramsrC   c                 C   sz   t j|| jd}d|i}| jj||dd}|d ur;tt|}tt|}| jD ]}|| ||< q(t | _t	t
|S |S )Nr   r   F)r   )rL   zerosr)   r   initr   r   _missing_keyssetr   r   )r   r   r   r   rY   rngsrandom_paramsmissing_keyr   r   r   init_weights  s   
z&FlaxResNetPreTrainedModel.init_weightsFtrainr   r   c              	   C   s   |d ur|n| j j}|d ur|n| j j}t|d}i }| jj|d ur'|d n| jd |d ur3|d n| jd dtj|tj	d| ||||rLdgdS ddS )N)r   r*   r	   r   r   batch_stats)r   r   r   F)r   mutable)
rQ   r   r   rL   r   r   applyr   arrayrM   )r   rY   r   r   r   r   r   r   r   r   r     s$   

z"FlaxResNetPreTrainedModel.__call__r   )NFNN)r   r   r   r    r   config_classbase_model_prefixmain_input_namer   r!   ModulerJ   rL   rM   rI   r)   rO   r   jaxrandomPRNGKeyr   r   r   r   RESNET_INPUTS_DOCSTRINGr   dictr   __classcell__r   r   r   r   r     sD   
  r   c                	   @   sP   e Zd ZU eed< ejZejed< dd Z			dde	de	d	e	d
e
fddZdS )FlaxResNetModulerQ   r)   c                 C   s8   t | j| jd| _t| j| jd| _ttjdd| _	d S )Nr   )r   r   r   )r1   )
rP   rQ   r)   rW   r   encoderr   r!   avg_poolpoolerr@   r   r   r   rA     s   zFlaxResNetModule.setupTFrB   r   r   rC   c           	      C   s   |d ur|n| j j}|d ur|n| j j}| j||d}| j||||d}|d }| j||jd |jd f|jd |jd fddddd}|dddd}|sZ||f|dd   S t|||j	dS )	Nr[   )r   r   rB   r   r   r*   )rT   r0   r	   )r   pooler_outputr   )
rQ   r   use_return_dictrW   r   r   r\   r   r   r   )	r   rY   rB   r   r   embedding_outputencoder_outputsr   pooled_outputr   r   r   r     s4   zFlaxResNetModule.__call__N)TFT)r   r   r   r   rJ   rL   rM   r)   rA   rO   r   r   r   r   r   r   r   	  s    
 r   zOThe bare ResNet model outputting raw features without any specific head on top.c                   @      e Zd ZeZdS )FlaxResNetModelN)r   r   r   r   r   r   r   r   r   r   @  s    r   an  
    Returns:

    Examples:

    ```python
    >>> from transformers import AutoImageProcessor, FlaxResNetModel
    >>> from PIL import Image
    >>> import requests

    >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
    >>> image = Image.open(requests.get(url, stream=True).raw)
    >>> image_processor = AutoImageProcessor.from_pretrained("microsoft/resnet-50")
    >>> model = FlaxResNetModel.from_pretrained("microsoft/resnet-50")
    >>> inputs = image_processor(images=image, return_tensors="np")
    >>> outputs = model(**inputs)
    >>> last_hidden_states = outputs.last_hidden_state
    ```
)output_typer   c                   @   sD   e Zd ZU eed< ejZejed< dd Zdej	dej	fddZ
d	S )
FlaxResNetClassifierCollectionrQ   r)   c                 C   s   t j| jj| jdd| _d S )Nrv   )r)   ru   )r!   DenserQ   
num_labelsr)   
classifierr@   r   r   r   rA   f  s   z$FlaxResNetClassifierCollection.setupr   rC   c                 C   s
   |  |S r   )r   )r   r   r   r   r   r   i  s   
z'FlaxResNetClassifierCollection.__call__N)r   r   r   r   rJ   rL   rM   r)   rA   rN   r   r   r   r   r   r   b  s
   
 r   c                   @   sF   e Zd ZU eed< ejZejed< dd Z				d
de	fdd	Z
dS )&FlaxResNetForImageClassificationModulerQ   r)   c                 C   s@   t | j| jd| _| jjdkrt| j| jd| _d S t | _d S )Nr   r   r   )r   rQ   r)   r   r   r   r   r   r@   r   r   r   rA   q  s   z,FlaxResNetForImageClassificationModule.setupNTrB   c           	      C   s~   |d ur|n| j j}| j||||d}|r|jn|d }| |d d d d ddf }|s8|f|dd   }|S t||jdS )N)rB   r   r   r   r   r*   )logitsr   )rQ   r   r   r   r   r   r   )	r   rY   rB   r   r   outputsr   r   outputr   r   r   r   y  s   z/FlaxResNetForImageClassificationModule.__call__)NTNN)r   r   r   r   rJ   rL   rM   r)   rA   rO   r   r   r   r   r   r   m  s   
 
r   z
    ResNet Model with an image classification head on top (a linear layer on top of the pooled features), e.g. for
    ImageNet.
    c                   @   r   ) FlaxResNetForImageClassificationN)r   r   r   r   r   r   r   r   r   r     s    r   a]  
    Returns:

    Example:

    ```python
    >>> from transformers import AutoImageProcessor, FlaxResNetForImageClassification
    >>> from PIL import Image
    >>> import jax
    >>> import requests

    >>> url = "http://images.cocodataset.org/val2017/000000039769.jpg"
    >>> image = Image.open(requests.get(url, stream=True).raw)

    >>> image_processor = AutoImageProcessor.from_pretrained("microsoft/resnet-50")
    >>> model = FlaxResNetForImageClassification.from_pretrained("microsoft/resnet-50")

    >>> inputs = image_processor(images=image, return_tensors="np")
    >>> outputs = model(**inputs)
    >>> logits = outputs.logits

    >>> # model predicts one of the 1000 ImageNet classes
    >>> predicted_class_idx = jax.numpy.argmax(logits, axis=-1)
    >>> print("Predicted class:", model.config.id2label[predicted_class_idx.item()])
    ```
)r   r   r   )8	functoolsr   typingr   
flax.linenlinenr!   r   	jax.numpynumpyrL   flax.core.frozen_dictr   r   r   flax.traverse_utilr   r   modeling_flax_outputsr
   r   r   modeling_flax_utilsr   r   r   r   utilsr   r   configuration_resnetr   RESNET_START_DOCSTRINGr   r   r   r#   rP   r`   rc   rj   rq   ry   r{   r   r   r   r   r   r   FLAX_VISION_MODEL_DOCSTRINGr   r   r   FLAX_VISION_CLASSIF_DOCSTRING__all__r   r   r   r   <module>   sd   $%+/*L7
'
