o
    }oi`                     @   s  d dl Zd dlZd dlmZ ejjdeje fddZejjdeje fddZ	deje fdd	Z
deje fd
dZdeje fddZdeje fddZdeje fddZdeje fddZdeje fddZdeje fddZdeje fddZdS )    N)MegatronMixedPrecisionreturnc                   C   s   t jtdtjtjdddS )zCreate a MegatronMixedPrecision plugin configuration for mixed precision training using BF16.

    Returns:
        run.Config[MegatronMixedPrecision]: Configuration for BF16 mixed precision training
    z
bf16-mixedFT	precisionparams_dtypepipeline_dtypeautocast_enabledgrad_reduce_in_fp32)runConfigr   torchbfloat16 r   r   j/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/llm/recipes/precision/mixed_precision.py
bf16_mixed      r   c                   C   s   t jtdtjtjdddS )zCreate a MegatronMixedPrecision plugin configuration for mixed precision training using FP16.

    Returns:
        run.Config[MegatronMixedPrecision]: Configuration for FP16 mixed precision training
    z16-mixedFr   )r
   r   r   r   halfr   r   r   r   
fp16_mixed'   r   r   c                  C   .   t  } d| _d| _d| _d| _d| _d| _| S )a9  Create a MegatronMixedPrecision plugin configuration for mixed precision training using BF16 with FP8.

    Note: FP8 recipes are experimental and have not been tested for training convergence.

    Returns:
        run.Config[MegatronMixedPrecision]: Configuration for BF16 with FP8 mixed precision training
    hybriddelayedr      maxT)r   fp8
fp8_recipe
fp8_marginfp8_amax_history_lenfp8_amax_compute_algofp8_param_gathercfgr   r   r   bf16_with_fp8_mixed8      r!   c                  C   r   )a9  Create a MegatronMixedPrecision plugin configuration for mixed precision training using FP16 with FP8.

    Note: FP8 recipes are experimental and have not been tested for training convergence.

    Returns:
        run.Config[MegatronMixedPrecision]: Configuration for FP16 with FP8 mixed precision training
    r   r   r   r   r   T)r   r   r   r   r   r   r   r   r   r   r   fp16_with_fp8_mixedJ   r"   r#   c                  C      t  } d| _d| _d| _| S )zCreate a MegatronMixedPrecision plugin configuration for mixed precision training using BF16 with MXFP8.

    Returns:
        run.Config[MegatronMixedPrecision]: Configuration for BF16 with MXFP8 mixed precision training
    r   mxfp8Tr   r   r   r   r   r   r   r   bf16_with_mxfp8_mixed\   
   r'   c                  C   r$   )zCreate a MegatronMixedPrecision plugin configuration for mixed precision training using FP16 with MXFP8.

    Returns:
        run.Config[MegatronMixedPrecision]: Configuration for FP16 with MXFP8 mixed precision training
    r   r%   Tr   r   r   r   r   r   r   r   fp16_with_mxfp8_mixedi   r(   r*   c                  C   .   t  } d| _d| _d| _d| _d| _d| _| S )  Create a MegatronMixedPrecision plugin configuration for mixed precision training using BF16 with FP8
    per-tensor current scaling.

    Note: The baseline current scaling recipe uses BF16 in the first and last Transformer layers. The user
    can choose to disable the BF16 layers or apply BF16 to more Transformer layers.

    Returns:
        run.Config[MegatronMixedPrecision]: Configuration for BF16 with FP8 per-tensor current scaling mixed
        precision training
    r   
tensorwiseT   r   r   r   first_last_layers_bf16num_layers_at_start_in_bf16num_layers_at_end_in_bf16r   r   r   r   r   #bf16_with_fp8_current_scaling_mixedv      r3   c                  C   r+   )r,   r   r-   T   r/   r   r   r   r   .nemotron_h_bf16_with_fp8_current_scaling_mixed   r4   r6   c                  C   r+   )a  Create a MegatronMixedPrecision plugin configuration for mixed precision training using FP16 with FP8
    per-tensor current scaling.

    Note: The baseline current scaling recipe uses FP16 in the first and last Transformer layers. The user
    can choose to disable the FP16 layers or apply FP16 to more Transformer layers.

    Returns:
        run.Config[MegatronMixedPrecision]: Configuration for FP16 with FP8 per-tensor current scaling mixed
        precision training
    r   r-   Tr.   )r   r   r   r0   r1   r2   r   r   r   r   r   #fp16_with_fp8_current_scaling_mixed   r4   r7   c                  C   r$   )a{  Create a MegatronMixedPrecision plugin configuration for mixed precision training using BF16 with FP8
    NV Subchannel scaling. This recipe uses 128x128 blockwise quantization for weight and 1x128 blockwise
    quantization for activation.

    Returns:
        run.Config[MegatronMixedPrecision]: Configuration for BF16 with FP8 subchannel scaling mixed precision training
    r   	blockwiseFr&   r   r   r   r   &bf16_with_fp8_subchannel_scaling_mixed   
   r9   c                  C   r$   )a{  Create a MegatronMixedPrecision plugin configuration for mixed precision training using FP16 with FP8
    NV Subchannel scaling. This recipe uses 128x128 blockwise quantization for weight and 1x128 blockwise
    quantization for activation.

    Returns:
        run.Config[MegatronMixedPrecision]: Configuration for FP16 with FP8 subchannel scaling mixed precision training
    r   r8   Fr)   r   r   r   r   &fp16_with_fp8_subchannel_scaling_mixed   r:   r;   )nemo_runr
   r   .nemo.lightning.pytorch.plugins.mixed_precisionr   clifactoryr   r   r   r!   r#   r'   r*   r3   r6   r7   r9   r;   r   r   r   r   <module>   s    