o
    }oi                     @   s   d dl Z d dlmZ d dlmZ d dlmZmZmZm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZm Z  d dl!m"Z" dZ#G dd deZ$G dd de Z%dS )    N)partial)Path)AnyCallableDictOptional)_PATH)	TrainerFn)model_summary)override)base)ADAPTER_META_FILENAME)ckpt_to_dir)MegatronParallel)PEFTWrappedAdapterIO)loggingspeechlm_peft_resumec                       s^   e Zd Zdef fddZdd Z				dd	d
Zdd ZdddZde	j
fddZ  ZS )SpeechToTextLLMPEFTpeftc                    s   t    || _d S )N)super__init__r   )selfr   	__class__ c/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/speechlm/utils/model_transform.pyr   %   s   

zSpeechToTextLLMPEFT.__init__c                 C   s   t t| dS )z}
        This is a helper function to return a partial function that wraps the checkpoint I/O with the PEFT adapter.
        )r   )r   SpeechLMWrappedAdapterIO)r   r   r   r   get_wrappped_io)   s   z#SpeechToTextLLMPEFT.get_wrappped_iomodel/nemo.collections.speechlm.model.SpeechToTextLLMreturnc                 C   s   |   |j}td| j  t|tr&t|dkr&|D ]}| | qn| | t	|dr=|j
jjtjkr=| | tdtj|dd  |S )aR  Apply the PEFT method to the LLM.

        This method freezes the LLM parameters and walks through the model
        structure, applying the transform method to LLM module.

        Args:
            model (nn.Module): The model to be fine-tuned.

        Returns:
            nn.Module: The transformed model with PEFT applied.
        z&Applying PEFT to language model with:    trainer
   )	max_depth)
freeze_llmmoduler   infor   
isinstancer   len_transform_modulehasattrr#   statefnr	   FITTINGfreeze_modelr
   	summarize)r   r   r(   model_chunkr   r   r   __call__/   s   

zSpeechToTextLLMPEFT.__call__c                 C   s2   t |ds|j}t |drtj|j| jdd d S )Nlanguage_modelT)	_skip_map)r-   r(   r/   walkr5   	transform)r   r(   r   r   r   r,   P   s   

z%SpeechToTextLLMPEFT._transform_moduleNc                 C   s   | j j|||dS )N)nameprefix)r   r8   )r   r(   r9   r:   r   r   r   r8   U   s   zSpeechToTextLLMPEFT.transformr#   c                 C   s8   |j }tdd | D | _t| jdkrtddS )z
        Set params that should be saved for PEFT, including some params that don't require gradients,
        such as the running mean and var of batchnorm.
        c                 S   s   g | ]\}}|qS r   r   ).0r9   _r   r   r   
<listcomp>^   s    z:SpeechToTextLLMPEFT.set_params_to_save.<locals>.<listcomp>r   z'No trainable parameters found for PEFT!N)lightning_modulesettrainable_parametersparams_to_saver+   RuntimeError)r   r#   r   r   r   r   set_params_to_saveX   s
   z&SpeechToTextLLMPEFT.set_params_to_save)r   r    r!   r    )NN)__name__
__module____qualname__r   r   r   r4   r,   r8   plTrainerrC   __classcell__r   r   r   r   r   $   s    
!
r   c                   @   sx   e Zd Ze			ddedee ded eB dee	e
f fddZ				ddedee d
eded eB ddf
ddZdS )r   Npathmap_locationstrictStrictHandlingr!   c                 C   s   | j dusJ t|t }d}d}t|ddr/tt|| _| j j|i d}|j }| _	d}n,|
 rXt|d}t|}	W d   n1 sHw   Y  t|	d | _	|| _n|| _	| j|||||d}
|dur||d	 |
d	  t|
v rzd|t< |S |
S )
ad  
        Overwrite the load_checkpoint method to handle PEFT resume for SpeechLM.

        =====================
        Initial PEFT Training
        =====================
        Initial PEFT training requires loading the base model weights. In this case, this function is called by
        trainer.strategy.setup() -> megatron_strategy.restore_model() -> megatron_strategy.load_checkpoint().
        `path = PosixPath(<base_path>)`, and sharded_state_dict contains only base model weights

        ===========
        PEFT Resume
        ===========
        PEFT resume requires loading two set of model weights, 1) base model weights and 2) adapter weights
        Base model weights could be imported from e.g. HF, and is frozen during PEFT training.
        Adapter weights contains the training metadata that will need to be loaded.
        As such, this function will be entered twice during PEFT training resume.

        For the FIRST TIME this function is called by trainer._checkpoint_connector._restore_modules_and_callbacks.
        `path = AdapterPath(<adapter_path>, base_model_path=<base_path>)`, and sharded_state_dict contains only base model weights

        For the SECOND TIME this function is called by PEFT.apply_transform (above, in the same file).
        `path = PosixPath(<adapter_path>)`, and sharded_state_dict contains only adapter weights.
        NFbase_model_path)sharded_state_dictTrmodel_ckpt_path)rL   
state_dict)checkpoint_ior   r   getattrr   stradapter_ckpt_pathload_checkpointrN   rQ   existsopenjsonload_load_checkpointupdateSPEECHLM_PEFT_RESUME)r   rJ   rO   rK   rL   adapter_meta_pathadapter_ckpt	load_basefmetadata
model_ckptr   r   r   rW   e   s.   !z(SpeechLMWrappedAdapterIO.load_checkpointFra   c                 C   s(   |r	dt  tdiS | j||||}|S )NrR   T)dictr^   rS   rW   )r   rJ   rO   rK   ra   rL   rd   r   r   r   r\      s   z)SpeechLMWrappedAdapterIO._load_checkpoint)NNN)NFN)rD   rE   rF   r   r   r   r   boolr   rU   r   rW   r\   r   r   r   r   r   c   s8    

F
r   )&rZ   	functoolsr   pathlibr   typingr   r   r   r   lightning.pytorchpytorchrG    lightning.fabric.utilities.typesr    lightning.pytorch.trainer.statesr	   lightning.pytorch.utilitiesr
   typing_extensionsr   nemo.collections.llm.fnr   r/   nemo.lightning.ckpt_utilsr   nemo.lightning.io.plr    nemo.lightning.megatron_parallelr   %nemo.lightning.pytorch.callbacks.peftr   r   
nemo.utilsr   r^   r   r   r   r   r   r   <module>   s$   ?