o
    }oiw                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZmZmZmZmZmZmZmZ d dlZd dlm  m  m Z d dlmZ d dlmZ  d dl!m"Z# d d	l!m$Z$ d d
l%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z, d dl-m.Z. d dl/m0Z1 d dl2m3Z3 d dl4m5Z5 ede.dZ6edZ7e1  e8 Z9dede:fddZ;de#j<deee=e>f ef fddZ?dJddZ@dd  ZAd!d" ZBG d#d$ d$ZCG d%d& d&ZDdKd'eee*  fd(d)ZEdee>ef fd*d+ZFdej<e( fd,d-ZGd.d/ ZHd0d1 ZId2d3 ZJd4d5 ZKd6d7 ZLed8fd9ej<d:ed;efd<d=ZMd9ej<d>efd?d@ZNdAej<de:fdBdCZOeddDfd>edEee7 dFee> dGe:de7f
dHdIZdS )L    N)deepcopy)Path)locate)AnyCallableDictListOptionalTypeTypeVarUnion)dump)loadconfig)partial)serialization)Self)Artifact)
IOProtocol)ModelConnector)enable)	to_config)loggingConnT)boundCkptTypeargreturnc                 C   s
   | t jkS N)dataclasses_HAS_DEFAULT_FACTORY)r    r"   K/home/ubuntu/.local/lib/python3.10/site-packages/nemo/lightning/io/mixin.py_is_default_factory5   s   
r$   datac                 C   s   t j| dd}| D ]5\}}t|t jrt|}|||< qt|r@t| j	r@t
| j	}|D ]}|j|kr?| ||<  nq0qd|v rItdtt | j dt | j |d< t| tjrgd|d< |S )NT)include_defaults__fn_or_cls__z]It is not supported to dump objects of functions/classes that have a __fn_or_cls__ parameter.._target_	_partial_)
config_libordered_argumentsitems
isinstanceConfig_ordered_arguments_with_defaultr$   r    is_dataclassr'   fieldsnamedefault_factory
ValueErrorinspect	getmoduleget_callable__name____qualname__r   Partial)r%   resultkeyr   ordered_argr2   fieldr"   r"   r#   r0   9   s.   

"r0   r/   c                 C   s   t |}| |S )z(Returns a YAML representation of `data`.)r0   represent_data)dumperr%   	type_namevaluer"   r"   r#   !_config_representer_with_defaultsU   s   
rD   c                 C   s   t | |ddS )Nr;   )rB   )rD   )rA   r%   r"   r"   r#   "_partial_representer_with_defaults[   s   rE   c                 C   sn   z|}t |j d|j }d}W n ty,   |j}t |j d|j }d}Y nw ||d}| |S )a  
    Represent a given object as YAML using the specified dumper.

    This function is a fallback for objects that don't have specific representers.
    If the object has __qualname__ attr,
    the __target__ is set to f"{inspect.getmodule(obj).__name__}.{obj.__qualname__}".
    If the object does not have a __qualname__ attr, the __target__ is set from its __class__ attr.
    The __call__ key is used to indicate whether the target should be called to create an instance.

    Args:
        dumper (yaml.Dumper): The YAML dumper to use for serialization.
        data (Any): The data to serialize. This can be any Python object,
            but if it's a class or a class instance, special handling will be applied.

    Returns:
        str: The YAML representation of the data.
    r(   FT)r)   _call_)r6   r7   r9   r:   AttributeError	__class__r@   )rA   r%   objtargetcallrC   r"   r"   r#   _safe_object_representer_   s   
rL   c                   @   s   e Zd ZU dZeje ed< dd Zdd Z	de
eef fdd	Zdeje fd
dZedee fddZdedee fddZdejdee fddZdS )IOMixina  
    A mixin class designed to capture the arguments passed to the `__init__` method,
    facilitating the re-creation of the object through `io.reinit` method using stored configurations.

    This class intercepts the initialization of an object to store the arguments in a configuration
    object, which can be serialized and later used to reinitialize the object to its original state.
    It utilizes `fdl.Config` from the Fiddle library to create a structured configuration object
    that holds the initialization parameters. This configuration object is crucial for enabling
    serialization and deserialization of the parameters, thus allowing the object to be reconstructed
    at a later time with the same initial state.

    Attributes
    ----------
        __io__ (fdl.Config[Self]): A configuration object that stores the captured initialization
        parameters in a structured format. This object is an instance of `fdl.Config`, which allows
        for the serialization and deserialization of the parameters, enabling the object to be
        reconstructed at a later time with the same initial state.

    Examples
    --------
        from nemo.lightning import io

        class ExampleClass(io.IOMixin):
            def __init__(self, param1, param2):
                super().__init__()
                self.param1 = param1
                self.param2 = param2

        # Creating an instance of ExampleClass
        example = ExampleClass('value1', 'value2')
        example_copy = io.reinit(example)


    Note:
        For more information on `fdl.Config`, refer to the Fiddle library documentation at
        [Fiddle Config Documentation](https://fiddle.readthedocs.io/en/latest/api_reference/core.html#config).

    __io__c                 O   s   t | } t | }|S )a  
        Overrides the default object creation process to wrap the `__init__` method, allowing
        initialization arguments to be captured and stored in the `__io__` attribute.

        Args:
            *args: Variable length argument list for the `__init__` method.
            **kwargs: Arbitrary keyword arguments for the `__init__` method.

        Returns
        -------
            The newly created object instance.
        )_io_wrap_initobject__new__)clsargskwargsoutputr"   r"   r#   rQ      s   zIOMixin.__new__c                 C   s   t |  d S r   )_io_register_serializationrR   r"   r"   r#   __init_subclass__   s   zIOMixin.__init_subclass__r   c                 O   s   t | |g|R i |S )aA  
        Transforms and captures the arguments passed to the `__init__` method, filtering out
        any arguments that are instances of `IOProtocol` or are dataclass fields with default
        factories.

        Args:
            init_fn (Callable): The original `__init__` method of the class.
            *args: Variable length argument list for the `__init__` method.
            **kwargs: Arbitrary keyword arguments for the `__init__` method.

        Returns
        -------
            Dict[str, Any]: A dictionary of the captured and transformed arguments.
        )_io_transform_args)selfinit_fnrS   rT   r"   r"   r#   io_transform_args   s   zIOMixin.io_transform_argsc                 K   s   t | fi |S )a3  
        Initializes the configuration object (`__io__`) with the captured arguments.

        Args:
            **kwargs: A dictionary of arguments that were captured during object initialization.

        Returns
        -------
            fdl.Config[Self]: The initialized configuration object.
        )_io_init)rZ   rT   r"   r"   r#   io_init   s   zIOMixin.io_initc                 C   s   g S )zInitialize io artifactsr"   rW   r"   r"   r#   io_artifacts   s   zIOMixin.io_artifactsrU   
yaml_attrsc                 C   s   t |}d}|| }|jddd |t_|t_|d }t|d}t| t| j||}t	
|}	||	 W d   n1 s?w   Y  | j||d}
|
 D ]\}}|| d }|| qOt`t`t| sqt| dS dS )	a  
        Serializes the configuration object (`__io__`) to a file, allowing the object state to be
        saved and later restored. Also creates an artifacts directory and stores it in a thread-local
        global variable. If the artifacts directory is empty at the end, it is deleted.

        Args:
            output (Path): The path to the directory where the configuration object and artifacts
                           will be stored.
        r(   T)parentsexist_okio.jsonwN)attrsz.yaml)r   mkdir_thread_locallocal_artifacts_diroutput_pathopen_artifact_transform_saver   rN   r   	dump_jsonwrite_io_dump_yamlr-   
write_textanyiterdirshutilrmtree)rZ   rU   r`   ri   rh   artifacts_dirconfig_pathfiojsonyaml_configsattrserialized_str_pathr"   r"   r#   io_dump   s*   

zIOMixin.io_dumprw   re   c                 C   s   dd l }|jj }ddlm}m} ddlm} |j	t
jt |j	tjt |j	|t |j	|t |jtt | }i }	|D ]}
|t||
|	|
< qD||j_|	S )Nr   )r/   r;   )YamlSerializer)yaml
SafeDumperyaml_representerscopynemo_run.configr/   r;    nemo_run.core.serialization.yamlr~   add_representerr+   rD   r   rE   add_multi_representerrP   rL   	serializegetattr)rZ   rw   re   r   original_representersr/   r;   r~   
serializerr<   rz   r"   r"   r#   rn   	  s   zIOMixin._io_dump_yamlN)r9   
__module__r:   __doc__fdlr/   r   __annotations__rQ   rX   r   strr   r\   r^   classmethodr   r   r_   r   listr}   r+   rn   r"   r"   r"   r#   rM      s   
 '&rM   c                   @   sJ  e Zd ZU dZi Zeeee f e	d< i Z
eeee f e	d< ededefddZedd	ed
ee deee gee f fddZedd	ed
ee deee gee f fddZededefddZed	edeeef defddZddededee defddZe	dd	eeef deeeef  dedefddZdS )ConnectorMixina  
    A mixin class that provides methods to register and retrieve model connectors for importing
    and exporting models. This class supports dynamic registration of connectors based on file
    extensions, which facilitates the customization and extension of model serialization and
    deserialization processes.

    Attributes
    ----------
        _IMPORTERS (Dict[str, Type[ModelConnector]]): A dictionary mapping file extensions to
            model connector classes that handle the import process.
        _EXPORTERS (Dict[str, Type[ModelConnector]]): A dictionary mapping file extensions to
            model connector classes that handle the export process.
    
_IMPORTERS
_EXPORTERSpathr   c                 C   s   |  | }|||_|S )a  
        Creates an instance of a model by using the appropriate importer based on the file
        extension of the provided path.

        Args:
            path (str): The path to the model file to be imported.

        Example:
            from nemo.collections import llm
            model = llm.Mistral7BModel.import_from("hf")

        Returns
        -------
            Self: An instance of the model initialized from the imported data.
        )_get_connectorinitimport_ckpt	ckpt_path)rR   r   rU   r"   r"   r#   import_from3  s   zConnectorMixin.import_fromNextdefault_pathc                    &   dt t dt t f fdd}|S )a  
        A class method decorator to register a model connector as an importer for a specific file
        extension.

        Args:
            ext (str): The file extension to associate with the model connector.
            default_path (Optional[str]): The default path to use if no path is specified during import.

        Returns
        -------
            Callable[[Type[ConnT]], Type[ConnT]]: The decorator that registers the model connector.
        	connectorr   c                        |  j t  < r| _| S r   )r   r   r   r   rR   r   r   r"   r#   	decoratorX     z3ConnectorMixin.register_importer.<locals>.decoratorr
   r   rR   r   r   r   r"   r   r#   register_importerI     "z ConnectorMixin.register_importerc                    r   )a  
        A class method decorator to register a model connector as an exporter for a specific file
        extension.

        Args:
            ext (str): The file extension to associate with the model connector.
            default_path (Optional[str]): The default path to use if no path is specified during export.

        Returns
        -------
            Callable[[Type[ConnT]], Type[ConnT]]: The decorator that registers the model connector.
        r   r   c                    r   r   )r   r   r   r   r   r"   r#   r   o  r   z3ConnectorMixin.register_exporter.<locals>.decoratorr   r   r"   r   r#   register_exporter`  r   z ConnectorMixin.register_exporterc                 C   s   | j |ddS )aJ  
        Retrieves the appropriate model connector for importing based on the extension of the
        provided path.

        Args:
            path (str): The path to the model file to be imported.

        Returns
        -------
            ModelConnector: The model connector instance capable of handling the import.
        Timporterr   )rR   r   r"   r"   r#   r   w  s   zConnectorMixin.importerc                 C   s   | j ||ddS )a  
        Retrieves the appropriate model connector for exporting based on the extension.

        Args:
            ext (str): The file extension associated with the model connector.
            path (Union[str, Path]): The path where the model will be exported.

        Returns
        -------
            ModelConnector: The model connector instance capable of handling the export.
        Fr   r   )rR   r   r   r"   r"   r#   exporter  s   zConnectorMixin.exporterF	overwrite	base_pathc                 K   s8   | j |fi |}|j|d}|||d}||  |S )a  
        Imports a checkpoint from a specified path, potentially overwriting existing files.

        Args:
            path (str): The path to the checkpoint file to be imported.
            overwrite (bool): Flag to determine if existing files should be overwritten (default is False).
            base_path (Optional[Path]): The base path where the checkpoint file is located; used to resolve
                                        relative paths.

        Returns
        -------
            Path: The path to the imported checkpoint.

        Raises
        ------
            FileNotFoundError: If the checkpoint file does not exist at the specified path.
        )r   )r   )r   
local_pathon_import_ckpt)rZ   r   r   r   rT   r   r   r"   r"   r#   r     s
   
zConnectorMixin.import_ckptTr   c                 K   s   d}t |}d|v r|d\}}nt |}|r"| jt | | n	| jt | | }|s8td| d|  |sI|jsFtd| dd| S ||fi |S )a   
        Retrieves the appropriate model connector based on the file extension and path,
        distinguishing between importers and exporters.

        Args:
            ext (Union[str, Path]): The file extension or a URI that may include a protocol specifier.
            path (Optional[Union[str, Path]]): The path where the model file is located or will be saved.
            importer (bool): Flag to determine if the connector is for importing (True) or exporting (False).

        Returns
        -------
            ModelConnector: The model connector instance capable of handling the import or export.

        Raises
        ------
            ValueError: If no connector is found for the specified extension or if no default path is provided
                        when required.
        Nz://z"No connector found for extension 'z' for z)No default path specified for extension 'z'. zPlease provide a path)r   splitr   getr   r5   r   )rR   r   r   r   rT   r|   r   r"   r"   r#   r     s   ,zConnectorMixin._get_connectorr   )FN)NT)r9   r   r:   r   r   r   r   r
   r   r   r   r   r   r   r	   r   r   r   r   r   r   r   r   boolr   r   r"   r"   r"   r#   r   !  s4   
 00  
r   	artifactsc                    sP   fdd  fdd}dd t | tjr|| S t| r$ | S td)a;  
    Adds IO functionality to the target object or eligible classes in the target module
    by wrapping __init__ and registering serialization methods.

    Args:
        target (object or types.ModuleType): The target object or module to modify.

    Returns:
        object or types.ModuleType: The modified target with IO functionality added to eligible classes.

    Examples:
        >>> from nemo.collections.common import tokenizers
        >>> modified_tokenizers = track_io(tokenizers)
        >>> ModifiedWordTokenizer = track_io(tokenizers.WordTokenizer)
    c              
      s\   t | r,t| dr,t| ds,| ttttttt	t
d fv r| S t| } t|   p*g | _| S )N__init__rN   )r6   isclasshasattrr   intfloattupler   dictr   typerO   rV   __io_artifacts__rW   )r   r"   r#   _add_io_to_class  s   
z"track_io.<locals>._add_io_to_classc                    s<   t | D ]\}}t |r|| rt| | | q| S r   )r6   
getmembersr   setattr)moduler3   rI   )r   #_is_defined_in_module_or_submodulesr"   r#   _process_module  s
   z!track_io.<locals>._process_modulec                 S   s    | j |jkp| j |j dS )Nr(   )r   r9   
startswith)rI   r   r"   r"   r#   r     s    z5track_io.<locals>._is_defined_in_module_or_submodulesz"Target must be a module or a class)r.   types
ModuleTyper6   r   	TypeError)rJ   r   r   r"   )r   r   r   r#   track_io  s   

r   c           	      O   s   t |}|j| g|R i |}dd |j D }g }|D ]/}t|| tr/|| j||< t	|| rAt
j|| dd||< || jjdkrN|| q|D ]}||= qQ|S )a  
    Transforms and captures the arguments passed to the `__init__` method, filtering out
    any arguments that are instances of `IOProtocol` or are dataclass fields with default
    factories.

    Args:
        init_fn (Callable): The original `__init__` method of the class.
        *args: Variable length argument list for the `__init__` method.
        **kwargs: Arbitrary keyword arguments for the `__init__` method.

    Returns
    -------
        Dict[str, Any]: A dictionary of the captured and transformed arguments.
    c                 S   s   i | ]\}}|d kr||qS )rZ   r"   ).0kvr"   r"   r#   
<dictcomp>  s    z&_io_transform_args.<locals>.<dictcomp>T)allow_post_init_HAS_DEFAULT_FACTORY_CLASS)r6   	signaturebind_partial	argumentsr-   r.   r   rN   r    r1   fdl_dcconvert_dataclasses_to_configsrH   r9   append)	rZ   r[   rS   rT   sig
bound_argsconfig_kwargsto_delr=   r"   r"   r#   rY     s   

rY   c              
   K   s^   zt jt| fi |W S  ty. } zdt| j dt| d| d}t||d}~ww )a  
    Initializes the configuration object (`__io__`) with the captured arguments.

    Args:
        **kwargs: A dictionary of arguments that were captured during object initialization.

    Returns
    -------
        fdl.Config[Self]: The initialized configuration object.
    zError creating fdl.Config for : z"
Arguments that caused the error: zH
This may be due to unsupported argument types or nested configurations.N)r   r/   r   	Exceptionr9   r   RuntimeError)rZ   rT   e	error_msgr"   r"   r#   r]   &  s   
r]   c                    s<   | j  t| ddr| S t  fdd}|| _ d| _| S )z=Wraps the __init__ method of a class to add IO functionality.__wrapped_init__Fc                    s   t | dr| j g|R i |}nt|  g|R i |}t | dr-| jdi || _n	t| fi || _ | g|R i | d S )Nr\   r^   r"   )r   r\   rY   r^   rN   r]   )rZ   rS   rT   
cfg_kwargsoriginal_initr"   r#   wrapped_initC  s   

z#_io_wrap_init.<locals>.wrapped_initT)r   r   	functoolswrapsr   )rR   r   r"   r   r#   rO   <  s   rO   c                 C   s   t j| tttd d S )N)
flatten_fnunflatten_fnpath_elements_fn)r   register_node_traverser_io_flatten_object_io_unflatten_object_io_path_elements_fnrW   r"   r"   r#   rV   U  s   
rV   c                 C   s   zt | j W nW t jtfy_ } zHttdrttds|ttjt	
   }tj}|| }t|d}tt| d| | W d    n1 sIw   Y  t|fd fW  Y d }~S d }~ww | j S )Nrh   ri   wbrN   )r   rl   rN   UnserializableValueErrorrG   r   rg   r   rh   uuiduuid4ri   rj   r   r   r   __flatten__)instancer   local_artifact_pathri   artifact_pathrv   r"   r"   r#   r   ^  s   
r   c                 C   sz   t tdstj| |S tj}t| dkr6| d }tt|| d}t	|W  d    S 1 s1w   Y  tj| |S )N
output_dir   r   rb)
r   rg   r   r/   __unflatten__r   lenrj   r   pickle_load)valuesmetadatar   pickle_pathrv   r"   r"   r#   r   o  s   
 r   c              	   C   s>   zt | j W n t jtfy   t  f Y S w | j S r   )r   rl   rN   r   rG   IdentityElement__path_elements__)xr"   r"   r#   r   }  s   
r   r(   cfgri   relative_dirc           
   
   C   s&  t |jdg }|D ]G}|jst||js|jsq	t||js*|jr*td|j dt ||j}|d u rA|jr@td|j dq	|| |||}t||j| q	t	|D ];}zt ||}	W n	 t
yg   Y qUw t|	}	zt|	tjtjfrt||tt | |d |	||d W qU ty   Y qUw |S )Nr   z
Artifact 'z' is required but not provided)ri   r  )r   r'   skipr   rz   requiredr5   r   r   dirrG   r   r.   r   r/   r;   rk   )
r   r   ri   r  r   artifactcurrent_valnew_valrz   childr"   r"   r#   rk     sJ   

rk   r   c              	   C   s   t | jdg D ]8}t | |j}t|tjr!t| |jt|j q|jr%qt | |j}|d u r0qt	t
|| }t| |j| qt| D ]}ztt | |tjrYtt | ||d W qD tyc   Y qDw d S )Nr   )r   )r   r'   rz   r.   r   r/   r   buildr  r   r   r  _artifact_transform_loadr5   )r   r   r  r  r  rz   r"   r"   r#   r
    s*   r
  r   c                    s,   ddt jdtf fdd  | d S )a   
    Analyzes config to detect unexpected keyword arguments -- for example, deprecated parameters -- and
    updates the config by dropping them. Returns True if the config gets updated and False otherwise.

    Args:
        config (fdl.Config): The configuration object to analyze.
    Fr   prefixc                    s   t | tjrYt| j tdd  j D }|s= fdd| j	D }|r<dt
d| d|  |D ]}| j	|= q5n	t
d| d	 | j	 D ]\}}||d
 |  qKd S d S )Nc                 s   s    | ]
}|j tjju V  qd S r   )kindr6   	ParameterVAR_KEYWORDr   paramr"   r"   r#   	<genexpr>  s    z:drop_unexpected_params.<locals>.analyze.<locals>.<genexpr>c                    s   g | ]	}| j vr|qS r"   )
parametersr  r   r"   r#   
<listcomp>  s    z;drop_unexpected_params.<locals>.analyze.<locals>.<listcomp>Tz#Deprecated parameters to drop from r   zSkip analyzing z+ as it accepts arbitrary keyword arguments.r(   )r.   r   r/   r6   r   r'   rp   r  r   __arguments__r   warningdebugr-   )r   r  accept_kwargsto_dropr  r=   rC   analyzeupdatedr  r#   r    s    
z'drop_unexpected_params.<locals>.analyzez<root>)r   r/   r   r   r"   r  r#   drop_unexpected_params  s   	
r  Toutput_typesubpathr	  c                    s  t | }|t_t|dr| rt |d }nt|dr%|jr%t |d }| s1td| d r7d   t|}t	
|}W d   n1 sKw   Y  |di  D ]4\}}d	|d
 d |d
 d g}	 rd|v rtt fdd|d rqXtt|	stt|	 qXt|d}t	| }
W d   n1 sw   Y  d}|
di  D ]\}}d|v rň |d v r|} nq r|std  d| d|  |r||
d d< t|
j}t||  t| |s|S t|S )a  
    Loads a configuration from a pickle file and constructs an object of the specified type.

    Args:
        path (Path): The path to the pickle file or directory containing 'io.pkl'.
        output_type (Type[CkptType]): The type of the object to be constructed from the loaded data.
        subpath (Optional[str]): Subpath to selectively load only specific objects inside the output_type.
                                 Defaults to None.

    Returns
    -------
        CkptType: An instance of the specified type constructed from the loaded configuration.

    Raises
    ------
        FileNotFoundError: If the specified file does not exist.

    Example:
        loaded_model = load("/path/to/model", output_type=MyModel)
    is_dirrc   isdirzNo such file: ''z<root>.Nobjectsr(   r   r   r3   pathsc                    s    | vS r   r"   )pr  r"   r#   <lambda>  s    zload.<locals>.<lambda>r   zCould not find z for z in rootr=   )r   rg   r   r   r   r!  is_fileFileNotFoundErrorrj   rx   r   r   r-   joinallmapr   find_node_traverserr   r   loadsreadr   r  Deserializationr<   r
  r  r   r	  )r   r  r  r	  r|   rv   jrI   valclssjson_configroot_keyr   r"   r&  r#   r     sP   


r   )r/   r   )Pr    r   r6   rx   rr   	threadingr   r   r   r   pathlibr   pydocr   typingr   r   r   r   r	   r
   r   r   fiddler   $fiddle._src.experimental.dataclasses_srcexperimentalr   cloudpickler   r   r   fiddle._srcr   r+   r   fiddle._src.experimentalr   typing_extensionsr   nemo.lightning.io.artifact.baser   nemo.lightning.io.capturer   nemo.lightning.io.connectorr   nemo.lightning.io.fdl_torchr   _enable_extnemo.lightning.io.to_configr   
nemo.utilsr   r   r   localrg   r   r$   r/   r   r   r0   rD   rE   rL   rM   r   r   rY   r]   rO   rV   r   r   r   rk   r
  r  r"   r"   r"   r#   <module>   sf   ($
" ! 7,#	 	,2&