o
    ۷ii                     @   s   d dl Z d dlmZ d dlmZ d dlZd dlmZ d dl	Z
d dlmZmZ d dlmZmZ d dlmZmZ d dlmZmZ d dlmZmZmZmZmZ d	d
lmZmZ d	dl m!Z!m"Z"m#Z#m$Z$m%Z%m&Z& ddl'm(Z( e&)e*Z+G dd de%Z,dS )    N)UnpicklingError)Any)
FrozenDictunfreeze)
from_bytesto_bytes)flatten_dictunflatten_dict)create_repohf_hub_download)EntryNotFoundErrorHfHubHTTPErrorRepositoryNotFoundErrorRevisionNotFoundErrorvalidate_hf_hub_args   )__version__is_torch_available)CONFIG_NAMEFLAX_WEIGHTS_NAMEHUGGINGFACE_CO_RESOLVE_ENDPOINTWEIGHTS_NAMEPushToHubMixinlogging   )"convert_pytorch_state_dict_to_flaxc                	   @   s   e Zd ZdZeZg dZg dZedd Z	d de
eB dejd	ed
efddZd de
eB d	efddZd de
eB d	efddZd de
eB d	efddZdejd
e
fddZeeejfdeejB dejfddZ		d!deejB de
eB dedefddZdS )"FlaxModelMixina*  
    Base class for all Flax models.

    [`FlaxModelMixin`] takes care of storing the model configuration and provides methods for loading, downloading and
    saving models.

        - **config_name** ([`str`]) -- Filename to save a model to when calling [`~FlaxModelMixin.save_pretrained`].
    )_diffusers_version_class_name_name_or_path)nameparentdtypec                 K   s   | |fi |S )zZ
        All context managers that the model should be initialized under go here.
         )clsconfigkwargsr#   r#   Z/home/ubuntu/vllm_env/lib/python3.10/site-packages/diffusers/models/modeling_flax_utils.py_from_config@   s   zFlaxModelMixin._from_configNparamsr"   maskreturnc                    sn    fdd}|du rt ||S t|}t |\}}t|| D ]\}}	|r2||	 }
||
||	< q"t|S )zk
        Helper method to cast floating-point values of given parameter `PyTree` to given `dtype`.
        c                    s*   t | tjrt| jtjr|  } | S N)
isinstancejnpndarray
issubdtyper"   floatingastype)paramr"   r#   r'   conditional_castM   s   
z:FlaxModelMixin._cast_floating_to.<locals>.conditional_castN)jaxtree_mapr   tree_flattenzipkeysr	   )selfr)   r"   r*   r5   flat_params	flat_mask_maskedkeyr3   r#   r4   r'   _cast_floating_toG   s   z FlaxModelMixin._cast_floating_toc                 C      |  |tj|S )a  
        Cast the floating-point `params` to `jax.numpy.bfloat16`. This returns a new `params` tree and does not cast
        the `params` in place.

        This method can be used on a TPU to explicitly convert the model parameters to bfloat16 precision to do full
        half-precision training or to save weights in bfloat16 for inference in order to save memory and improve speed.

        Arguments:
            params (`dict | FrozenDict`):
                A `PyTree` of model parameters.
            mask (`dict | FrozenDict`):
                A `PyTree` with same structure as the `params` tree. The leaves should be booleans. It should be `True`
                for params you want to cast, and `False` for those you want to skip.

        Examples:

        ```python
        >>> from diffusers import FlaxUNet2DConditionModel

        >>> # load model
        >>> model, params = FlaxUNet2DConditionModel.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
        >>> # By default, the model parameters will be in fp32 precision, to cast these to bfloat16 precision
        >>> params = model.to_bf16(params)
        >>> # If you don't want to cast certain parameters (for example layer norm bias and scale)
        >>> # then pass the mask as follows
        >>> from flax import traverse_util

        >>> model, params = FlaxUNet2DConditionModel.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
        >>> flat_params = traverse_util.flatten_dict(params)
        >>> mask = {
        ...     path: (path[-2] != ("LayerNorm", "bias") and path[-2:] != ("LayerNorm", "scale"))
        ...     for path in flat_params
        ... }
        >>> mask = traverse_util.unflatten_dict(mask)
        >>> params = model.to_bf16(params, mask)
        ```)rA   r.   bfloat16r;   r)   r*   r#   r#   r'   to_bf16_      %zFlaxModelMixin.to_bf16c                 C   rB   )a  
        Cast the floating-point `params` to `jax.numpy.float32`. This method can be used to explicitly convert the
        model parameters to fp32 precision. This returns a new `params` tree and does not cast the `params` in place.

        Arguments:
            params (`dict | FrozenDict`):
                A `PyTree` of model parameters.
            mask (`dict | FrozenDict`):
                A `PyTree` with same structure as the `params` tree. The leaves should be booleans. It should be `True`
                for params you want to cast, and `False` for those you want to skip.

        Examples:

        ```python
        >>> from diffusers import FlaxUNet2DConditionModel

        >>> # Download model and configuration from huggingface.co
        >>> model, params = FlaxUNet2DConditionModel.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
        >>> # By default, the model params will be in fp32, to illustrate the use of this method,
        >>> # we'll first cast to fp16 and back to fp32
        >>> params = model.to_f16(params)
        >>> # now cast back to fp32
        >>> params = model.to_fp32(params)
        ```)rA   r.   float32rD   r#   r#   r'   to_fp32   s   zFlaxModelMixin.to_fp32c                 C   rB   )a  
        Cast the floating-point `params` to `jax.numpy.float16`. This returns a new `params` tree and does not cast the
        `params` in place.

        This method can be used on a GPU to explicitly convert the model parameters to float16 precision to do full
        half-precision training or to save weights in float16 for inference in order to save memory and improve speed.

        Arguments:
            params (`dict | FrozenDict`):
                A `PyTree` of model parameters.
            mask (`dict | FrozenDict`):
                A `PyTree` with same structure as the `params` tree. The leaves should be booleans. It should be `True`
                for params you want to cast, and `False` for those you want to skip.

        Examples:

        ```python
        >>> from diffusers import FlaxUNet2DConditionModel

        >>> # load model
        >>> model, params = FlaxUNet2DConditionModel.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
        >>> # By default, the model params will be in fp32, to cast these to float16
        >>> params = model.to_fp16(params)
        >>> # If you want don't want to cast certain parameters (for example layer norm bias and scale)
        >>> # then pass the mask as follows
        >>> from flax import traverse_util

        >>> model, params = FlaxUNet2DConditionModel.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
        >>> flat_params = traverse_util.flatten_dict(params)
        >>> mask = {
        ...     path: (path[-2] != ("LayerNorm", "bias") and path[-2:] != ("LayerNorm", "scale"))
        ...     for path in flat_params
        ... }
        >>> mask = traverse_util.unflatten_dict(mask)
        >>> params = model.to_fp16(params, mask)
        ```)rA   r.   float16rD   r#   r#   r'   to_fp16   rF   zFlaxModelMixin.to_fp16rngc                 C   s   t d|  )Nz.init_weights method has to be implemented for )NotImplementedError)r;   rK   r#   r#   r'   init_weights   s   zFlaxModelMixin.init_weightspretrained_model_name_or_pathc           "      O   sv  t d |dd}|dd}|dd}|dd}|dd}	|d	d}
|d
d}|dd}|dd}tddd}|du rY| j|f|d||	|
|||d|\}}| j|f|dd|\}}|du rm|ntj||}tj	|r|rtj
tj|tstdt d| dtj|t}ntj
tj|trtj|t}ntj
tj|trtt d| dtdt dt d| dzt||stnt|||	|
||||d
}W nv ty   t| d ty   t| d| d ty   t| dt d ty* } z
td| d | d}~w tyB   td!t d"| d#t dt d$	 tyZ   td%| d&| d't dt d	w |rwt rid(d)lm} ntd*||}t||}nizt|d+}t| | }W d   n	1 sw   Y  W nI ttjjfy } z8zt|}|  d,rt!d-t|1 sw   Y  W n t"tfy   td.| d/w W Y d}~nd}~ww t#j$%d0d1 |}t&|}t#j'|j(t#j)*d2d3}t+t&t,|- }t&t,|}|t+|-  }t+|- | }|r+t d4| d5| d6 || _.|- D ]'} | |v rU||  j/||  j/krUtd7|  d8||  j/ d9||  j/ d:q/|D ]}!||!= qYt0|d2kr~t d;| d<|j1j2 d=| d>|j1j2 d?	 nt 3d@|j1j2 dA t0|d2krt dB|j1j2 dC| dD| dE nt 3dF|j1j2 dG| dH|j1j2 dI |t4|fS )Ja  
        Instantiate a pretrained Flax model from a pretrained model configuration.

        Parameters:
            pretrained_model_name_or_path (`str` or `os.PathLike`):
                Can be either:

                    - A string, the *model id* (for example `stable-diffusion-v1-5/stable-diffusion-v1-5`) of a
                      pretrained model hosted on the Hub.
                    - A path to a *directory* (for example `./my_model_directory`) containing the model weights saved
                      using [`~FlaxModelMixin.save_pretrained`].
            dtype (`jax.numpy.dtype`, *optional*, defaults to `jax.numpy.float32`):
                The data type of the computation. Can be one of `jax.numpy.float32`, `jax.numpy.float16` (on GPUs) and
                `jax.numpy.bfloat16` (on TPUs).

                This can be used to enable mixed-precision training or half-precision inference on GPUs or TPUs. If
                specified, all the computation will be performed with the given `dtype`.

                > [!TIP] > This only specifies the dtype of the *computation* and does not influence the dtype of model
                > parameters. > > If you wish to change the dtype of the model parameters, see
                [`~FlaxModelMixin.to_fp16`] and > [`~FlaxModelMixin.to_bf16`].

            model_args (sequence of positional arguments, *optional*):
                All remaining positional arguments are passed to the underlying model's `__init__` method.
            cache_dir (`str | os.PathLike`, *optional*):
                Path to a directory where a downloaded pretrained model configuration is cached if the standard cache
                is not used.
            force_download (`bool`, *optional*, defaults to `False`):
                Whether or not to force the (re-)download of the model weights and configuration files, overriding the
                cached versions if they exist.

            proxies (`dict[str, str]`, *optional*):
                A dictionary of proxy servers to use by protocol or endpoint, for example, `{'http': 'foo.bar:3128',
                'http://hostname': 'foo.bar:4012'}`. The proxies are used on each request.
            local_files_only(`bool`, *optional*, defaults to `False`):
                Whether to only load local model weights and configuration files or not. If set to `True`, the model
                won't be downloaded from the Hub.
            revision (`str`, *optional*, defaults to `"main"`):
                The specific model version to use. It can be a branch name, a tag name, a commit id, or any identifier
                allowed by Git.
            from_pt (`bool`, *optional*, defaults to `False`):
                Load the model weights from a PyTorch checkpoint save file.
            kwargs (remaining dictionary of keyword arguments, *optional*):
                Can be used to update the configuration object (after it is loaded) and initiate the model (for
                example, `output_attentions=True`). Behaves differently depending on whether a `config` is provided or
                automatically loaded:

                    - If a configuration is provided with `config`, `kwargs` are directly passed to the underlying
                      model's `__init__` method (we assume all relevant updates to the configuration have already been
                      done).
                    - If a configuration is not provided, `kwargs` are first passed to the configuration class
                      initialization function [`~ConfigMixin.from_config`]. Each key of the `kwargs` that corresponds
                      to a configuration attribute is used to override said attribute with the supplied `kwargs` value.
                      Remaining keys that do not correspond to any configuration attribute are passed to the underlying
                      model's `__init__` function.

        Examples:

        ```python
        >>> from diffusers import FlaxUNet2DConditionModel

        >>> # Download model and configuration from huggingface.co and cache.
        >>> model, params = FlaxUNet2DConditionModel.from_pretrained("stable-diffusion-v1-5/stable-diffusion-v1-5")
        >>> # Model was saved using *save_pretrained('./test/saved_model/')* (for example purposes, not runnable).
        >>> model, params = FlaxUNet2DConditionModel.from_pretrained("./test/saved_model/")
        ```

        If you get the error message below, you need to finetune the weights for your downstream task:

        ```bash
        Some weights of UNet2DConditionModel were not initialized from the model checkpoint at stable-diffusion-v1-5/stable-diffusion-v1-5 and are newly initialized because the shapes did not match:
        - conv_in.weight: found shape torch.Size([320, 4, 3, 3]) in the checkpoint and torch.Size([320, 9, 3, 3]) in the model instantiated
        You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
        ```
        zFlax classes are deprecated and will be removed in Diffusers v1.0.0. We recommend migrating to PyTorch classes or pinning your version of Diffusers.r%   N	cache_dirforce_downloadFfrom_ptproxieslocal_files_onlytokenrevision	subfoldermodelflax)	diffusers	file_type	frameworkT)rO   return_unused_kwargsrP   rR   rS   rT   rU   rV   )r"   r\   zError no file named z found in directory  z file found in directory z-. Please load the model using `from_pt=True`.z or .)	filenamerO   rP   rR   rS   rT   
user_agentrV   rU   z is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo with `token` or log in with `hf auth login`.z is not a valid git identifier (branch name, tag name or commit id) that exists for this model name. Check the model page at 'https://huggingface.co/z' for available revisions.z& does not appear to have a file named z:There was a specific connection error when trying to load z:
zWe couldn't connect to 'zM' to load this model, couldn't find it in the cached files and it looks like z8 is not the path to a directory containing a file named z.
Checkout your internet connection or see how to run the library in offline mode at 'https://huggingface.co/docs/transformers/installation#offline-mode'.zCan't load the model for 'z'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'z=' is the correct path to a directory containing a file named r   )load_state_dictz|Can't load the model in PyTorch format because PyTorch is not installed. Please, install PyTorch or use native Flax weights.rbversionzYou seem to have cloned a repository without having git-lfs installed. Please install git-lfs and run `git lfs install` followed by `git lfs pull` in the folder you cloned.zUnable to convert z  to Flax deserializable object. c                 S   s   t | t jddd S )Ncpu)backendr   )r6   
device_putlocal_devices)xr#   r#   r'   <lambda>  s    z0FlaxModelMixin.from_pretrained.<locals>.<lambda>r   )rK   zThe checkpoint z is missing required keys: zI. Make sure to call model.init_weights to initialize the missing weights.z)Trying to load the pretrained weight for z failed: checkpoint has shape z, which is incompatible with the model shape z. z(Some weights of the model checkpoint at z! were not used when initializing z: z,
- This IS expected if you are initializing zU from the checkpoint of a model trained on another task or with another architecture.z9All model checkpoint weights were used when initializing z.
zSome weights of z3 were not initialized from the model checkpoint at z and are newly initialized: zo
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.zAll the weights of z/ were initialized from the model checkpoint at zf.
If your task is similar to the task the model of the checkpoint was trained on, you can already use z* for predictions without further training.)5loggerwarningpopr   load_configfrom_configospathjoinisdirisfiler   EnvironmentErrorr   r   r   r   r   r   
ValueErrorr   r   modeling_utilsra   r   openr   readr   msgpack
exceptions	ExtraData
startswithOSErrorUnicodeDecodeErrorr6   	tree_utilr7   r   
eval_shaperM   randomPRNGKeysetr   r:   _missing_keysshapelen	__class____name__infor	   )"r$   rN   r"   
model_argsr&   r%   rO   rP   rQ   rR   rS   rT   rU   rV   r`   unused_kwargsrW   model_kwargspretrained_path_with_subfolder
model_fileerrra   pytorch_model_filestatestate_fefparams_shape_treerequired_paramsshape_statemissing_keysunexpected_keysr@   unexpected_keyr#   r#   r'   from_pretrained   sx  T




	 
zFlaxModelMixin.from_pretrainedTFsave_directoryis_main_processpush_to_hubc                 K   s  t j|rtd| d dS t j|dd |rH|dd}|dd}|dd	}|d
d}	|d|t jjd }
t	|
d||	dj
}
| }|rQ|| t j|t}t|d}t|}|| W d   n1 sqw   Y  td|  |r| j||
|	||d dS dS )a
  
        Save a model and its configuration file to a directory so that it can be reloaded using the
        [`~FlaxModelMixin.from_pretrained`] class method.

        Arguments:
            save_directory (`str` or `os.PathLike`):
                Directory to save a model and its configuration file to. Will be created if it doesn't exist.
            params (`dict | FrozenDict`):
                A `PyTree` of model parameters.
            is_main_process (`bool`, *optional*, defaults to `True`):
                Whether the process calling this is the main process or not. Useful during distributed training and you
                need to call this function on all processes. In this case, set `is_main_process=True` only on the main
                process to avoid race conditions.
            push_to_hub (`bool`, *optional*, defaults to `False`):
                Whether or not to push your model to the Hugging Face model hub after saving it. You can specify the
                repository you want to push to with `repo_id` (will default to the name of `save_directory` in your
                namespace).
            kwargs (`dict[str, Any]`, *optional*):
                Additional key word arguments passed along to the [`~utils.PushToHubMixin.push_to_hub`] method.
        zProvided path (z#) should be a directory, not a fileNT)exist_okcommit_messageprivate	create_prFrT   repo_id)r   r   rT   wbzModel weights saved in )rT   r   r   )ro   rp   rs   rj   errormakedirsrl   splitsepr
   r   save_configrq   r   rw   r   writer   _upload_folder)r;   r   r)   r   r   r&   r   r   r   rT   r   model_to_saveoutput_model_filer   model_bytesr#   r#   r'   save_pretrained  s:   

zFlaxModelMixin.save_pretrainedr,   )TF)r   
__module____qualname____doc__r   config_name_automatically_saved_args_flax_internal_argsclassmethodr(   dictr   r.   r"   r   rA   rE   rH   rJ   r6   ArrayrM   r   rG   strro   PathLiker   boolr   r#   r#   r#   r'   r   2   sB    	
"''  'r   )-ro   pickler   typingr   r6   	jax.numpynumpyr.   msgpack.exceptionsry   flax.core.frozen_dictr   r   flax.serializationr   r   flax.traverse_utilr   r	   huggingface_hubr
   r   huggingface_hub.utilsr   r   r   r   r    r   r   utilsr   r   r   r   r   r   modeling_flax_pytorch_utilsr   
get_loggerr   rj   r   r#   r#   r#   r'   <module>   s     
