o
    wiB.                     @   s  d dl Z d dlmZ d dlmZmZmZmZmZm	Z	 d dl
mZ d dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZmZm Z  d dl!m"Z" d dl#m$Z$ e %e&Z'edej(dZ)edej*dZ+dd Z,G dd dZ-G dd dee"Z.dS )    N)Path)AnyCallableDictOptionalTypeVarUnion)HFValidationError)CheckpointIO)get_filesystem)_PATH)nn)override)HF_ADAPTER_CONFIG_FILENAMEHF_ADAPTER_PATHHF_WEIGHTS_PATHWEIGHTS_PATH)IOMixin)ckpt_to_weights_subdirLightningModuleT)boundModuleTc                   C   s    t   pt   pt  dkS )z=Checks whether rank=0 accounting for un-inintialized dist-envr   )distis_availableis_initializedget_rank r   r   Q/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/nemo/lightning/io/hf.py	is_rank_0(   s    r   c                   @   s(   e Zd ZdZedd Zedd ZdS )HFAdapterKeyRenamerzDummy class for key renamingc                 C   "   |  ddd ddd dddS )z Converts lora adapter FQNs to HFmodel.model.base_model.model.model.   lora_a.weightlora_A.weightlora_b.weightlora_B.weightreplacexr   r   r   
nemo_to_hf0      

zHFAdapterKeyRenamer.nemo_to_hfc                 C   r    )z"Converts lora adapter FQNs to NeMor'   r&   r#   r%   r$   r"   r!   r(   r*   r   r   r   
hf_to_nemo9   r-   zHFAdapterKeyRenamer.hf_to_nemoN)__name__
__module____qualname____doc__staticmethodr,   r.   r   r   r   r   r   -   s    
r   c                       s   e Zd ZdZd fdd	Zeddeeef de	de
e d	df fd
dZ	ddeeef deeef de
e d	dfddZedeeef d	eeef fddZe			dde	de
e de
d eB d	eeef fddZede	d	dfddZ  ZS )HFCheckpointIOzHFCheckpointIO that utilizes :func:`torch.save` and :func:`torch.load` to save and load
    checkpoints respectively, common for most use cases.

    .. warning::  This is an :ref:`experimental <versioning:Experimental API>` feature.

    NFc                    s   t    || _|| _dS )a+  Initializes HFCheckpointIO

        Args:
            model (nn.Module, optional): The nn.Module that's used for training.
                This supplies the save_pretrained function.
            adapter_only (bool, optional): If true, will only save LoRA adapter weights. Defaults to False.
        N)super__init__adapter_onlymodel)selfr8   r7   	__class__r   r   r6   K   s   

zHFCheckpointIO.__init__
checkpointpathstorage_optionsreturnc                    s  t  sJ dt|dd}|jd tksJ d|t|j}t|}|j|dd | jrK|t	 }|j
dd | |d|| t||d  d
S tt| jd	d
rt|t }|j
ddd | jj||dd t||d  d
S t ||| tdt| )a  
        Save model/training states to a checkpoint file.

        Note:
            This function assumes it's only written by RANK=0 if executed inside a dist-env.
        Args:
            checkpoint: dict containing model and trainer state
            path: write-target path
            storage_options: not used in ``TorchCheckpointIO.save_checkpoint``

        Raises
        ------
            TypeError:
                If ``storage_options`` arg is passed in

        zExpected to run only on rank=0T)	is_savingzExpected {} to end with {})exist_ok
state_dict
trainer.ptsave_pretrainedN)parentsrB   )rC   zCheckpoint was saved at: )r   r   partsr   formatparentr   makedirsr7   r   mkdir_save_adapter_weights_onlypoptorchsavecallablegetattrr8   r   rE   r5   save_checkpointNotImplementedErrorstr)r9   r<   r=   r>   checkpoint_dirfsadapter_pathhf_weights_pathr:   r   r   rR   W   s(   zHFCheckpointIO.save_checkpointrC   c           	   
   C   sv   ddl m} t| }|D ]}||}||t|< qz
|||d  W dS  ty: } ztd|d}~ww )a  
        Saves only the adapter weights in a safetensors format.

        Args:
            state_dict (Dict[str, Any]): The state dictionary containing model weights.
            path (Union[str, Path]): The directory path where the adapter weights should be saved.
            storage_options (Optional[Any], optional): Additional storage options, if required.

        Raises:
            OSError: If saving the file fails.
        r   )	save_fileadapter_model.safetensorsz"Failed to save adapter weights: {}N)	safetensors.torchrY   listkeysrM   r   r,   OSErrorrH   )	r9   rC   r=   r>   rY   module_namesnameparamer   r   r   rL      s   
z)HFCheckpointIO._save_adapter_weights_onlyc           	   
   C   s  t | }|| std| || std| i }t| d }| s-td|t| t }| s>td|ddlm	} z2||dd	d
}|
 D ]}|||t|< qQW d   n1 shw   Y  W d|iS W d|iS  ty } ztd| d}~ww )a  
        Loads only the adapter weights from a safetensors checkpoint.

        Args:
            path (Union[str, Path]): The directory path where the adapter weights are stored.

        Returns:
            Dict[str, Any]: A dictionary containing the state dictionary of the adapter model.

        Raises:
            FileNotFoundError: If the checkpoint directory does not exist.
            ValueError: If the checkpoint path is not a directory.
            OSError: If loading the weights fails.
        zCheckpoint file not found: {}z,Checkpoints should be a directory. Found: {}rZ   z"Adapter weights file not found: {}z!Adapter config file not found: {}r   )	safe_openptcpu)	frameworkdeviceNz Failed to load adapter weights: rC   )r   existsFileNotFoundErrorisdir
ValueErrorrH   r   r   safetensorsrc   r]   
get_tensorr   r.   r^   )	r=   rV   rC   adapter_fileconfig_filerc   fkrb   r   r   r   _load_adapter_weights_only   s4   


z)HFCheckpointIO._load_adapter_weights_onlymap_locationstrictStrictHandlingc              	   C   s   t |}|jd tksJ d| dt i }|d  s&td| ntj|d dddd	}| j	rA|t
 }|t|O }|S tt| jd
driz| j|t |d< W |S  ttfyh   td| dw td)a  Loads checkpoint using :func:`torch.load`, with additional handling for ``fsspec`` remote loading of files.

        Args:
            path: Path to checkpoint
            map_location: a function, :class:`torch.device`, string or a dict specifying how to remap storage
                locations.

        Returns: The loaded checkpoint.

        Raises
        ------
            FileNotFoundError: If ``path`` is not found by the ``fsspec`` filesystem

        rA   z	Expected z not to end with rD   z<Asked to restore from checkpoint without trainer state at {}re   TF)rs   mmapweights_onlyload_pretrainedNrC   zFailed to load weights from z. If this is a local checkpoint, please make sure the path exists and has the correct format. If this is a model from the HuggingFace Hub, please provide a valid repo_id of a model on the Hub.zCheckpoint load has failed: 'load_pretrained' is not defined for this model and 'adapter_only' is disabled. Please implement 'load_pretrained' or switch to 'adapter_only' mode.)r   rG   r   rh   logginginforH   rN   loadr7   r   r4   rr   rP   rQ   r8   rx   EnvironmentErrorr	   RuntimeError)r9   r=   sharded_state_dictrs   rt   trainer_staterW   r   r   r   load_checkpoint   s6   "
zHFCheckpointIO.load_checkpointc                 C   s8   t |}||r|j|dd td| dS dS )ziRemove checkpoint file from the filesystem.

        Args:
            path: Path to checkpoint

        T)	recursivezRemoved checkpoint: {}N)r   rh   rmlogdebugrH   )r9   r=   rV   r   r   r   remove_checkpoint  s
   
z HFCheckpointIO.remove_checkpoint)NF)N)NNN)r/   r0   r1   r2   r6   r   r   rT   r   r   r   rR   r   r   rL   r3   rr   r   boolr   r   __classcell__r   r   r:   r   r4   C   s@    .C


$*

7r4   )/ry   pathlibr   typingr   r   r   r   r   r   lightning.pytorchpytorchplrN   torch.distributeddistributedr   huggingface_hub.errorsr	   lightning.fabric.pluginsr
   #lightning.fabric.utilities.cloud_ior    lightning.fabric.utilities.typesr   r   typing_extensionsr   nemo.lightning.ckpt_utilsr   r   r   r   nemo.lightning.io.mixinr   nemo.lightning.io.plr   	getLoggerr/   r   LightningModuler   Moduler   r   r   r4   r   r   r   r   <module>   s*    
