o
    }oi                  
   @   s   d dl mZ d dlmZ d dlmZ d dlZd dl	Z	d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZmZ d d	lmZmZ d
ZeG dd deZejjeddejej fddZejjej ed				dde!dee! de"de"fddZ#dS )    )	dataclass)OptionalN)DistributedDataParallelConfig)OptimizerConfig)	lightning)llm)FluxModelParamsMegatronFluxModel)default_resumetensorboard_loggerz	flux-535mc                   @   s   e Zd ZdZdd ZdS )DummyModelParamszF
    Initialize a toy model that only has one layer of each type.
    c                 C   s&   d | _ d | _d | _d| j_d| j_d S )N   )	t5_paramsclip_params
vae_configflux_confignum_single_layersnum_joint_layers)self r   `/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/diffusion/recipes/flux_535m.py__post_init__'   s
   zDummyModelParams.__post_init__N)__name__
__module____qualname____doc__r   r   r   r   r   r   !   s    r   namereturnc                   C   s   t jtt tdS )a  
    Factory function to create a Flux sample model configuration with only 1 transformer layers.

    Returns:
        run.Config[pl.LightningModule]: Configuration for the Flux sample (535 million) model.

    Examples:
        CLI usage:
            $ nemo llm pretrain model=bert_110m ...

        Python API usage:
            >>> model_config = model(flux_params)
            >>> print(model_config)
    )flux_params)runConfigr	   r   r   r   r   r   model/   s   r"   )targetr   defaultr      r   dir	num_nodesnum_gpus_per_nodec                 C   s   t jtjt t jtj||dt jtjddddt	j
t jtddddt jtjddd	d
dd	t jtjd| t| d|dt jtjt jtdddd	ddt dS )zC
    Flux ci test recipe with default trainer, no parallelism.
    gpur   FT)check_for_nan_in_gradgrad_reduce_in_fp32)tensor_model_parallel_sizepipeline_model_parallel_sizecontext_parallel_sizesequence_parallelpipeline_dtypeddpz
bf16-mixed)	precisionr   
   )devicesr'   acceleratorstrategypluginsnum_sanity_val_steps	max_stepslog_every_n_stepsNr   )ckptr   tensorboardlog_dirg-C6?)lrbf16use_distributed_optimizerweight_decay)config)r"   trainerlogoptimresume)r    Partialr   trainr"   r!   nlTrainerMegatronStrategytorchbfloat16r   MegatronMixedPrecision
NeMoLoggerr   MegatronOptimizerModuler   r
   )r   r&   r'   r(   r   r   r   unit_test_recipeH   sZ   

rQ   )r$   Nr   r%   )$dataclassesr   typingr   lightning.pytorchpytorchplnemo_runr    rL   megatron.core.distributedr   megatron.core.optimizerr   nemor   rI   nemo.collectionsr   ,nemo.collections.diffusion.models.flux.modelr   r	   (nemo.collections.llm.recipes.log.defaultr
   r   NAMEr   clifactoryr!   LightningModuler"   rH   strintrQ   r   r   r   r   <module>   s<   