o
    }oim0                     @   s  d dl mZmZmZmZmZmZ d dlZd dl	Z	d dl
Z	d dlZ	d dlZd dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ d dl	mZm Z  d dl!m"Z# d dl$m%Z% d dl&m'Z' d dl(m)Z) d dl*m+Z+ d dl,m-Z- d dl.m/Z/m0Z0m1Z1 d dl2m3Z3 G dd de0Z4G dd deZ5G dd de+Z6d%ddZ7G dd dee-Z8ej9j:e%j;dd ej<fd!d"Z=e>d#krej9j?e%j;e=d$ dS dS )&    )AnyCallableDictOptionalSequenceTupleN)VAEGenerator)LPIPSWithDiscriminator)AutoencoderKL)parallel_state)	ModelType)MegatronModule)TransformerConfig)DefaultTaskEncoderImageSample)Tensornn)	lightning)llm)DiffusionDataModule)pretrain)GPTModel)IOMixin)DataTMegatronLossReduction
ReductionT)OptimizerModulec                   @   sD   e Zd ZdZdededeeef fddZde	e defdd	Z
d
S )AvgLossReductionz5Performs average loss reduction across micro-batches.batchforward_outreturnc                 C   s   |  }|d|ifS )z
        Forward pass for loss reduction.

        Args:
            batch: The batch of data.
            forward_out: The output tensor from forward computation.

        Returns:
            A tuple of (loss, reduction dictionary).
        avg)mean)selfr   r   loss r%   \/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/diffusion/vae/train_vae.pyforward/   s   zAvgLossReduction.forwardlosses_reduced_per_micro_batchc                 C   s   t dd |D }| S )z
        Reduce losses across multiple micro-batches by averaging them.

        Args:
            losses_reduced_per_micro_batch: A sequence of loss dictionaries.

        Returns:
            The averaged loss tensor.
        c                 S   s   g | ]}|d  qS )r!   r%   ).0r$   r%   r%   r&   
<listcomp>G   s    z+AvgLossReduction.reduce.<locals>.<listcomp>)torchstackr"   )r#   r(   lossesr%   r%   r&   reduce=   s   
zAvgLossReduction.reduceN)__name__
__module____qualname____doc__r   r   r   r   r'   r   r.   r%   r%   r%   r&   r   ,   s    r   c                       s<   e Zd ZdZd fdd	Zdd Zdedd	fd
dZ  ZS )VAEz%Variational Autoencoder (VAE) module.Fc                    s   t  | |rtddd| _n	tj|tjd| _tjddtjd}|	 }| j	   fdd	|
 D }| jj|d
d ~tdddddddddd
d
dd| _dS )a  
        Initialize the VAE model.

        Args:
            config: Transformer configuration.
            pretrained_model_name_or_path: Path or name of the pretrained model.
            search_vae: Flag to indicate whether to search for a target VAE using AutoVAE.
        i      )input_resolutioncompression_ratio)weight_dtypez(stabilityai/stable-diffusion-xl-base-1.0vae)	subfolderr7   c                    s2   i | ]\}}| v r |   |  kr||qS r%   )numel)r)   kvvae_dictr%   r&   
<dictcomp>e   s   2 z VAE.__init__.<locals>.<dictcomp>F)strictiQ  g        gư>g      ?         ?hinge)
disc_startlogvar_init	kl_weightpixelloss_weightdisc_num_layersdisc_in_channelsdisc_factordisc_weightperceptual_weightuse_actnormdisc_conditional	disc_lossN)super__init__r   r8   r
   from_configr+   bfloat16from_pretrained
state_dictitemsload_state_dictr	   vae_loss)r#   configpretrained_model_name_or_path
search_vaesdxl_vaesd_dictpre_dict	__class__r=   r&   rQ   N   s4   	
zVAE.__init__c                 C   sP   | j |j}| }| j |j}| j|||d|| j jjjd\}}|||fS )a  
        Forward pass through the VAE.

        Args:
            target: Target images.
            global_step: Current global step.

        Returns:
            A tuple (aeloss, log_dict_ae, pred) containing the loss, log dictionary, and predictions.
        r   )inputsreconstructions
posteriorsoptimizer_idxglobal_step
last_layer)	r8   encodelatent_distsampledecoderX   decoderconv_outweight)r#   targetre   	posteriorzpredaelosslog_dict_aer%   r%   r&   r'   x   s   


zVAE.forwardinput_tensorr    Nc                 C   s   dS )zk
        Set input tensor.

        Args:
            input_tensor: The input tensor to the model.
        Nr%   )r#   rt   r%   r%   r&   set_input_tensor   s   zVAE.set_input_tensor)F)	r/   r0   r1   r2   rQ   r'   r   ru   __classcell__r%   r%   r_   r&   r3   K   s
    *r3   c                
       s   e Zd ZdZ		ddedee deeej	gej	f  f fddZ
dd	d
Zdeeef fddZdd ZddejfddZddejfddZedefddZedefddZd fddZ  ZS )VAEModelzA GPTModel wrapper for the VAE.NrZ   optimmodel_transformc                    s2   || _ tdddd}tj| _t j|||d dS )z
        Initialize the VAEModel.

        Args:
            pretrained_model_name_or_path: Path or name of the pretrained model.
            optim: Optional optimizer module.
            model_transform: Optional function to transform the model.
           )
num_layershidden_sizenum_attention_heads)rx   ry   N)rZ   r   r   encoder_or_decoder
model_typerP   rQ   )r#   rZ   rx   ry   rY   r_   r%   r&   rQ      s   zVAEModel.__init__r    c                 C   s"   t | dst| j| j| _dS dS )z/Configure the model by initializing the module.moduleN)hasattrr3   rY   rZ   r   r#   r%   r%   r&   configure_model   s   
zVAEModel.configure_modelc                 C   s$   t |d }d|jjdtjddiS )z
        Perform a single data step to fetch a batch from the iterator.

        Args:
            dataloader_iter: The dataloader iterator.

        Returns:
            A dictionary with 'pixel_values' ready for the model.
        r   pixel_valuescudaT)devicedtypenon_blocking)nextimagetor+   rS   )r#   dataloader_iterr   r%   r%   r&   	data_step   s   
zVAEModel.data_stepc                 O   s   | j |i |S )z
        Forward pass through the underlying module.

        Args:
            *args: Variable length argument list.
            **kwargs: Arbitrary keyword arguments.

        Returns:
            The result of forward pass of self.module.
        )r   )r#   argskwargsr%   r%   r&   r'      s   zVAEModel.forwardc                 C   s2   | |d | j \}}}tj dkr| | |S )z
        Perform a single training step.

        Args:
            batch: The input batch.
            batch_idx: Batch index.

        Returns:
            The loss tensor.
        r   r   )re   r+   distributedget_ranklog_dict)r#   r   	batch_idxr$   rs   rq   r%   r%   r&   training_step   s   
zVAEModel.training_stepc           	      C   s   | |d | j \}}}tj|d  | gdd}|d dd}d}t |krbtj |kr=dd t	t
 D }nd}tjj|||t d	 |durb| | td
dd t|D i |S )z
        Perform a single validation step.

        Args:
            batch: The input batch.
            batch_idx: Batch index.

        Returns:
            The loss tensor.
        r   r   )axisrB   rz   c                 S   s   g | ]}d qS Nr%   )r)   _r%   r%   r&   r*      s    z,VAEModel.validation_step.<locals>.<listcomp>N)groupz'Original (left), Reconstruction (right)c                 S   s"   g | ]\}}t tj|qS r%   )wandbImagetorchvisionutils	make_grid)r)   r   r   r%   r%   r&   r*      s    )re   r+   catcpuclampr   get_data_parallel_src_rankr   r   rangeget_data_parallel_world_sizegather_objectget_data_parallel_groupr   r   log	enumerate)	r#   r   r   r$   rs   rq   r   
wandb_rankgather_listr%   r%   r&   validation_step   s(   
zVAEModel.validation_stepc                 C      | j st | _ | j S )z/Returns the loss reduction method for training.)_training_loss_reductionr   r   r%   r%   r&   training_loss_reduction     z VAEModel.training_loss_reductionc                 C   r   )z1Returns the loss reduction method for validation.)_validation_loss_reductionr   r   r%   r%   r&   validation_loss_reduction  r   z"VAEModel.validation_loss_reductionc                    s<   t    | jjdurt| ddrd| j_d| _dS dS dS )zz
        Hook to handle zero grad on validation model step.
        Used here to skip first validation on resume.
        N_restarting_skip_val_flagTF)rP   on_validation_model_zero_gradtrainer	ckpt_pathgetattrsanity_checkingr   r   r_   r%   r&   r     s
   

z&VAEModel.on_validation_model_zero_grad)NN)r    Nr   )r/   r0   r1   r2   strr   r   r   r   ModulerQ   r   r   r   r   r'   r+   r   r   r   propertyr   r   r   r   rv   r%   r%   r_   r&   rw      s*    
'rw   r4   c                 C   sh   | j d | j d }}|| }|| }|d }|| }|d }|| }	| d||| |||	 f }
|
S )z
    Crop the image so that both dimensions are divisible by the given divisor.

    Args:
        img: Image tensor.
        divisor: The divisor to use for cropping.

    Returns:
        The cropped image tensor.
       .)shape)imgdivisorhwdelta_hdelta_wdelta_h_topdelta_h_bottomdelta_w_leftdelta_w_rightimg_croppedr%   r%   r&   
crop_image!  s   r   c                       s*   e Zd ZdZdedef fddZ  ZS )ImageTaskEncoderz7Image task encoder that crops and normalizes the image.ri   r    c                    s,   t  |}t|jd|_| jd8  _|S )z
        Encode a single image sample by cropping and shifting its values.

        Args:
            sample: An image sample.

        Returns:
            The transformed image sample.
        r4   rB   )rP   encode_sampler   r   )r#   ri   r_   r%   r&   r   ?  s   
zImageTaskEncoder.encode_sample)r/   r0   r1   r2   r   r   rv   r%   r%   r_   r&   r   <  s    r   )rn   r    c                  C   s~   t  } tjtdd| _tjtttddd| _tjtj	j
ddd| j_	d	| jj_d
| jj_d| j_d| j_d| jjd _| S )ze
    Training factory function for VAE.

    Returns:
        A run.Partial recipe for training.
    z1nemo/collections/diffusion/vae/vae16x/config.json)rZ      
   )task_encoderglobal_batch_sizenum_workersd   g    eA)warmup_steps
hold_stepsgh㈵>g{Gz?znemo_experiments/train_vaei  r   )r   runConfigrw   modelr   r   datanllr_schedulerWarmupHoldPolicySchedulerrx   rY   lrweight_decayr   log_dirr   val_check_interval	callbacksevery_n_train_steps)reciper%   r%   r&   	train_vaeO  s$   

r   __main__)default_factory)r4   )@typingr   r   r   r   r   r   nemo_runr   r+   torch.distributedtorch.utils.checkpointr   r   autovaer   contperceptual_lossr	   	diffusersr
   megatron.corer   megatron.core.transformer.enumsr    megatron.core.transformer.moduler   ,megatron.core.transformer.transformer_configr   megatron.energonr   r   r   r   nemor   r   nemo.collectionsr   <nemo.collections.diffusion.data.diffusion_energon_datamoduler    nemo.collections.diffusion.trainr   #nemo.collections.llm.gpt.model.baser   nemo.lightning.io.mixinr    nemo.lightning.megatron_parallelr   r   r   nemo.lightning.pytorch.optimr   r   r3   rw   r   r   clifactorytrainPartialr   r/   mainr%   r%   r%   r&   <module>   sF    O 
