o
    wi                     @   s   d dl mZ d dlmZ d dlm  mZ d dlZ	d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZmZ d d	lmZ d d
lmZ h dZeG dd dZ		ddejdede	jdB dejdB dejf
ddZdejde ddfddZ!dS )    )	dataclass)partialN)dist_checkpointing)	lightning)llm)ckpt_to_context_subdir)TrainerContextckpt_to_weights_subdir)logging)is_global_rank_zero>   
num_layershidden_sizemamba_head_dimffn_hidden_sizemamba_num_headsnum_query_groupsnum_attention_headsc                   @   s   e Zd ZU dZdZedB ed< dZedB ed< dZedB ed< dZ	edB ed< dZ
edB ed< dZedB ed< dZedB ed	< dZee dB ed
< dd ZdS )PruningConfiga  Pruning parameters. None means no pruning of the corresponding dimension.

    Args:
        target_ffn_hidden_size (int, optional): Target size of MLP FFN hidden dimension.
        target_hidden_size (int, optional): Target size of embedding hidden dimension.
        target_num_attention_heads (int, optional): Target number of attention heads.
            Required if `target_num_query_groups` is provided.
        target_num_query_groups (int, optional): Target number of query groups for grouped-query attention.
            Required if `target_num_attention_heads` is provided.
        target_mamba_num_heads (int, optional): Target number of Mamba attention heads.
        target_mamba_head_dim (int, optional): Target dimension of Mamba attention heads.
        target_num_layers (int, optional): Target number of transformer layers using importance metric.
        drop_layers (list[int], optional): List of specific layer indices (1-indexed) to drop from the model.
            Cannot be used with other pruning parameters.
    Ntarget_ffn_hidden_sizetarget_hidden_sizetarget_num_attention_headstarget_num_query_groupstarget_mamba_num_headstarget_mamba_head_dimtarget_num_layersdrop_layersc                 C   sH   | j r | j| j| j| j| j| j| jg}tdd |D r"t	dd S d S )Nc                 s   s    | ]}|d uV  qd S )N ).0pr   r   g/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/nemo/collections/llm/modelopt/prune/pruner.py	<genexpr>P   s    z.PruningConfig.__post_init__.<locals>.<genexpr>z8drop_layers cannot be used with other pruning parameters)
r   r   r   r   r   r   r   r   any
ValueError)selfother_paramsr   r   r   __post_init__E   s   	
zPruningConfig.__post_init__)__name__
__module____qualname____doc__r   int__annotations__r   r   r   r   r   r   r   listr%   r   r   r   r   r   *   s   
 r   modelpruning_configdata_moduletrainerreturnc                    s    j rtjj|  j d | S |dusJ d|dusJ d|j|_td  fddtD }tj	| dd	|idd
t
tj||ddid | S )a  Prune a GPT / Mamba (sub-class of GPT) model in-place based on the provided pruning configuration.

    Args:
        model (llm.GPTModel): The model to prune.
        pruning_config (PruningConfig): The pruning configuration.
        data_module (pl.LightningDataModule, optional): The data module to use for pruning.
            Required if not dropping layers.
        trainer (nl.Trainer, optional): The trainer to use for pruning.
            Required if not dropping layers.

    Returns:
        llm.GPTModel: The pruned model.
    )layers_to_dropNz+data_module is required to prune the model.z'trainer is required to prune the model.zPruning model...c                    s2   i | ]}t  d | dur|t  d | qS )target_N)getattr)r   kr.   r   r   
<dictcomp>p   s
    z(prune_language_model.<locals>.<dictcomp>mcore_minitronexport_configforward_loopr-   )datar0   	tokenizer)modeconstraintsdummy_inputconfig)r   mtpplugins drop_mcore_language_model_layerstrain_dataloaderval_dataloaderr
   infoSUPPORTED_PRUNING_HPARAMSpruner   r   validate)r-   r.   r/   r0   r9   r   r6   r   prune_language_modelT   s$   

rJ   	save_pathc                 C   s   t d| d tdhB D ]}t| jjj|t| jj| qt|dd}|j	ddd t
j| jj || jjd t rJt| jt|dgd	 t d
| d dS )z"Save pruned model nemo checkpoint.zSaving pruned model to z...kv_channelsT)	is_saving)parentsexist_ok)content_metadatar-   )
yaml_attrszPruned model saved to 
N)r
   rF   rG   setattrr-   __io__r@   r4   r	   mkdirr   savestrategymegatron_parallelsharded_state_dictsharded_state_dict_metadatar   r   from_trainerio_dumpr   )r0   rK   r5   weight_pathr   r   r   save_pruned_model   s   
r^   )NN)"dataclassesr   	functoolsr   modelopt.torch.prunetorchrH   rA   pytorch_lightningplmegatron.corer   nemor   nlnemo.collectionsr   nemo.lightning.ckpt_utilsr   nemo.lightning.io.plr   r	   
nemo.utilsr
   nemo.utils.get_rankr   rG   r   GPTModelLightningDataModuleTrainerrJ   strr^   r   r   r   r   <module>   s8   ,
,