o
    }oi                     @   s   d dl mZ d dlmZ d dlZd dlmZ d dlm	Z
 d dlmZ d dlmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZ ed\ZZh dZeG dd dZ		ddejdedejdB de
jdB dejf
ddZde
jde ddfddZ!dS )    )	dataclass)partialN)dist_checkpointing)	lightning)llm)ckpt_to_context_subdir)TrainerContextckpt_to_weights_subdir)logging)is_global_rank_zero)safe_importzmodelopt.torch.prune>   
num_layershidden_sizeffn_hidden_sizenum_query_groupsnum_attention_headsc                   @   s~   e Zd ZU dZdZedB ed< dZedB ed< dZedB ed< dZ	edB ed< dZ
edB ed< dZee dB ed< d	d
 ZdS )PruningConfiga\  Pruning parameters. None means no pruning of the corresponding dimension.

    Args:
        target_ffn_hidden_size (int, optional): Target size of MLP FFN hidden dimension.
        target_hidden_size (int, optional): Target size of embedding hidden dimension.
        target_num_attention_heads (int, optional): Target number of attention heads.
            Required if `target_num_query_groups` is provided.
        target_num_query_groups (int, optional): Target number of query groups for grouped-query attention.
            Required if `target_num_attention_heads` is provided.
        target_num_layers (int, optional): Target number of transformer layers using importance metric.
        drop_layers (list[int], optional): List of specific layer indices (1-indexed) to drop from the model.
            Cannot be used with other pruning parameters.
    Ntarget_ffn_hidden_sizetarget_hidden_sizetarget_num_attention_headstarget_num_query_groupstarget_num_layersdrop_layersc                 C   s@   | j r| j| j| j| j| jg}tdd |D rtdd S d S )Nc                 s   s    | ]}|d uV  qd S )N ).0pr   r   ^/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/llm/modelopt/prune/pruner.py	<genexpr>J   s    z.PruningConfig.__post_init__.<locals>.<genexpr>z8drop_layers cannot be used with other pruning parameters)r   r   r   r   r   r   any
ValueError)selfother_paramsr   r   r   __post_init__A   s   zPruningConfig.__post_init__)__name__
__module____qualname____doc__r   int__annotations__r   r   r   r   r   listr"   r   r   r   r   r   *   s   
 r   modelpruning_configdata_moduletrainerreturnc                    s   t sJ d jrtjjj|  jd | S |dusJ d|dus%J d|j|_t	d  fddt
D }tj| d	d
|iddttj||ddid | S )a   Prune a GPT model in-place based on the provided pruning configuration.

    Args:
        model (llm.GPTModel): The model to prune.
        pruning_config (PruningConfig): The pruning configuration.
        data_module (pl.LightningDataModule, optional): The data module to use for pruning.
            Required if not dropping layers.
        trainer (nl.Trainer, optional): The trainer to use for pruning.
            Required if not dropping layers.

    Returns:
        llm.GPTModel: The pruned model.
    z/nvidia-modelopt is required to prune the model.)layers_to_dropNz+data_module is required to prune the model.z'trainer is required to prune the model.zPruning model...c                    s2   i | ]}t  d | dur|t  d | qS )target_N)getattr)r   kr+   r   r   
<dictcomp>k   s
    z#prune_gpt_model.<locals>.<dictcomp>mcore_gpt_minitronexport_configforward_loopr*   )datar-   	tokenizer)modeconstraintsdummy_inputconfig)HAVE_MODELOPTr   mtppluginsmegatrondrop_mcore_gpt_layerstrain_dataloaderval_dataloaderr
   infoSUPPORTED_PRUNING_HPARAMSpruner   r   validate)r*   r+   r,   r-   r6   r   r3   r   prune_gpt_modelN   s&   

rI   	save_pathc                 C   s   t d| d tdhB D ]}t| jjj|t| jj| qt|dd}|j	ddd t
| jj | t rFt| jt|dgd t d	| d
 dS )z"Save pruned model nemo checkpoint.zSaving pruned model to z...kv_channelsT)	is_saving)parentsexist_okr*   )
yaml_attrszPruned model saved to 
N)r
   rE   rF   setattrr*   __io__r=   r1   r	   mkdirr   savestrategymegatron_parallelsharded_state_dictr   r   from_trainerio_dumpr   )r-   rJ   r2   weight_pathr   r   r   save_pruned_model{   s   r[   )NN)"dataclassesr   	functoolsr   pytorch_lightningplmegatron.corer   nemor   nlnemo.collectionsr   nemo.lightning.ckpt_utilsr   nemo.lightning.io.plr   r	   
nemo.utilsr
   nemo.utils.get_rankr   nemo.utils.import_utilsr   r?   r>   rF   r   GPTModelLightningDataModuleTrainerrI   strr[   r   r   r   r   <module>   s:   &
-