o
    wi                      @   s  d dl mZ d dlmZmZ d dlZd dlmZ d dl	m
Z
 d dlmZmZmZ d dlmZ d dlmZ d d	lmZ ejjd
dd										ddedededededB dedB dedB dedB de
ee eje f dedB dededefdd
ZdS )    )Path)CallableOptionalN)Console)	Annotated)ExportConfigQuantizationConfig	Quantizer)HFAutoModelForImageTextToText)2setup_trainer_and_restore_model_with_modelopt_spec)is_global_rank_zeroptqvlm)name	namespace   F
model_pathexport_configcalibration_tpcalibration_pp"num_layers_in_first_pipeline_stage!num_layers_in_last_pipeline_stagedevices	num_nodesquantization_configforward_looplegacy_ckpttrust_remote_codereturnc                 C   s
  |st  }|du r|}|du r|}t||}t|  s$J d|  dt| d  }d}|rE|jdks9J dt| |dd}|  n |jd	ksNJ d
t| ||||||d|
dddi ddid\}}|||	}|	|| | t
 rt }|d|j d |jS )aO  
    Applies Post-Training Quantization (PTQ) for a vision-language model using the
    specified quantization and export configs.
    It runs calibration for a small dataset to collect scaling factors low-precision
    GEMMs used by desired quantization method.
    By default, this function produces TensorRT-LLM checkpoint ready for deployment using nemo.export and nemo.deploy
    modules or directly using TensorRT-LLM library.

    Args:
        model_path (str): The path to model to be quantized.
        export_config (ExportConfig): Export configuration for output checkpoint.
        calibration_tp (int): Calibration tensor parallelism.
        calibration_pp (int): Calibration pipeline parallelism.
        num_layers_in_first_pipeline_stage (int): Number of layers in the first pipeline stage.
        num_layers_in_last_pipeline_stage (int): Number of layers in the last pipeline stage.
        devices (int): Number of devices to use for calibration. Default: calibration_tp.
        num_nodes (int): Number of nodes to use for calibration. Default: calibration_pp.
        quantization_config (QuantizationConfig): Configuration for quantization algorithm.
        forward_loop (Callable): Forward loop to use for calibration.
            If not provided, a forward loop will be created using the calibration dataset.
        legacy_ckpt (bool): If True, allow loading ckpt saved with older version of TE.
        trust_remote_code (bool): Trust remote code when loading HuggingFace models.

    Returns:
        Path: The path where the quantized checkpoint has been saved after calibration.
    NzPath z does not existzconfig.jsonnemoz1Automodel PTQ does not support export format nemoauto)
model_namer   
device_maphfz/Automodel PTQ does not support export format hfTF)sequence_parallel	lazy_initr$   )r   tensor_model_parallel_sizepipeline_model_parallel_sizer   r   r   r   inference_onlyr   strategy_kwargstrainer_kwargsmodel_config_overridesu;   [green]✓ PTQ succeeded, quantized checkpoint exported to z[/green])r   r	   r   existsexport_formatr
   configure_modelr   quantizeexportr   r   printpath)r   r   r   r   r   r   r   r   r   r   r   r   	quantizeris_automodeltrainermodelconsole r8   U/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/nemo/collections/vlm/api.pyr      sJ   )


)
r   r   NNNNNNFF)pathlibr   typingr   r   nemo_runrunrich.consoler   typing_extensionsr   nemo.collections.llm.modeloptr   r   r	   nemo.collections.vlmr
   nemo.collections.vlm.modeloptr   nemo.utils.get_rankr   cli
entrypointstrintConfigboolr   r8   r8   r8   r9   <module>   s^   	
