o
    }oi{i                     @   s4  d dl mZ d dlmZ d dlmZ d dlZd dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZmZmZ d dlmZmZmZ d dlmZmZmZ d dl m!Z! d dl"m#Z# d dl$m%Z% dd Z&dd Z'eG dd dee#j(Z)G dd deZ*G dd deZ+G dd deZ,dS )    )nullcontext)	dataclass)CallableN)parallel_state)VisionModule)ColumnParallelLinear)TransformerConfig)
functional)FluxSingleTransformerBlock
MMDiTLayer'get_flux_double_transformer_engine_spec'get_flux_single_transformer_engine_spec)EmbedNDMLPEmbedderTimeStepEmbedder)
FluxConfigFluxModelParamsMegatronFluxModel)ControlNetConditioningEmbedding)io)loggingc                 C   s   |   D ]}tj| q| S )z
    Initializes all parameters of the given module to zero.

    Args:
        module (nn.Module): The module whose parameters will be initialized to zero.

    Returns:
        nn.Module: The same module with zero-initialized parameters.
    )
parametersnninitzeros_)modulep r   k/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/diffusion/models/flux_controlnet/model.pyzero_module(   s   
r   c                 C   sH   t | }t|trt|dkr|d }n|}tdgjdd|d< |S )a#  
    Processes a single step of data from a dataloader iterator for the Flux ControlNet.

    Args:
        dataloader_iter (Iterator): An iterator over the dataloader that provides batches of data.

    Returns:
        dict: A processed batch dictionary with an added 'loss_mask' key.
       r         ?Tnon_blocking	loss_mask)next
isinstancetuplelentorchTensorcuda)dataloader_iterbatch_batchr   r   r   flux_controlnet_data_step7   s   

r/   c                   @   s  e Zd ZU dZdZeed< dZeed< dZeed< dZ	eed< d	Z
eed
< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed < dZeed!< dZeed"< d#Zeed$< eZeed%< dS )&FluxControlNetConfigz<
    Flux config inherits from TransformerConfig class.
       
num_layers
patch_size@   in_channels   num_joint_layers
   num_single_layersi   hidden_size   num_attention_headsi   
vec_in_dimi   context_dimTguidance_embedNnum_mode   model_channelsconditioning_embedding_channelsrotary_interleavedgư>layernorm_epsilonr   hidden_dropoutattention_dropoutadd_qkv_biasuse_cpu_initializationload_from_flux_transformerg      @guidance_scaledata_step_fn) __name__
__module____qualname____doc__r2   int__annotations__r3   r5   r7   r9   r:   r<   r=   r>   r?   boolr@   rB   rC   rD   rE   floatrF   rG   rH   rI   rJ   rK   r/   rL   r   r   r   r   r   r0   K   s0   
 r0   c                       s   e Zd ZdZdef fddZdd Zdd Z													
ddej	dej	dej	dej	dej
dej	dej	dej	defddZ  ZS )FluxControlNeta  
    A VisionModule-based neural network designed for Flux ControlNet tasks.


    Args:
        config (FluxControlNetConfig):
        Configuration object containing model parameters such as input channels, hidden size, patch size,
            and number of transformer layers.
    configc                    s  t     j| _ j| _ j| _t| jdg dd| _t	 j| j| _
t	 j| j| _t j| j| _t j| jd| _ jrS jrNt j| jdnt | _t fddt jD | _t fddt jD | _t | _t jD ]}| jtt| j| j tj j!dd	 q}t | _"t jD ]}| j"tt| j| j tj j!dd	 q j#d
urt$ j#dd| _%t&j	 j| j| _'d
S d
| _%tt&j	 j| j| _'d
S )z
        Initializes the FluxControlNet model with embeddings, transformer layers, and optional conditioning blocks.

        Args:
            config (FluxControlNetConfig): Configuration object with model parameters.
        i'  )   8   rX   )dimthetaaxes_dim)in_dim
hidden_dimc                    s    g | ]}t  t j|d dqS )F)rV   
submoduleslayer_numbercontext_pre_only)r   r   r^   .0irV   r   r   
<listcomp>   s    z+FluxControlNet.__init__.<locals>.<listcomp>c                    s   g | ]}t  t j|d qS ))rV   r^   r_   )r
   r   r^   ra   rd   r   r   re      s    T)rV   init_methodgather_outputN)rW   rW   rW   rW   )rC   block_out_channels)(super__init__r5   out_channelsr:   r3   r   	pos_embedr   Linear	img_embedr>   	txt_embedr   rB   timestep_embeddingr   r=   vector_embeddingr?   Identityguidance_embedding
ModuleListranger7   double_blocksr9   single_blockscontrolnet_double_blocksappendr   r   r   normal_controlnet_single_blocksrC   r   input_hint_blockr)   controlnet_x_embedder)selfrV   _	__class__rd   r   rj   v   st   




zFluxControlNet.__init__c                 C   s   t d | j|j  | j|j  | j|j  | j|j  | j|j  | j	j|j	 dd | j
j|j
 dd dS )z
        Loads pre-trained weights from a Flux Transformer model into the FluxControlNet.

        Args:
            flux (FluxTransformer): A pre-trained Flux Transformer model.
        z-Loading ControlNet layer weights from Flux...F)strictN)r   inforl   load_state_dict
state_dictrn   ro   rp   rq   rv   rw   )r~   fluxr   r   r   rJ      s   
z)FluxControlNet.load_from_flux_transformerc              	   C   s   | j js	t }|S ddl}| j jdkr|jjjj}n| j jdkr'|jjjj}nt	d|jjj
| j j| j j|| j j| j jdd| j j fd}d}t rRtjdd	}|jjd||d
}|S )zcontext manager for fp8 reciper   Ne4m3hybridz3E4M3 and HYBRID are the only supported FP8 formats.F)margininterval
fp8_formatamax_compute_algoamax_history_lenoverride_linear_precisionT)with_context_parallel)enabled
fp8_recipe	fp8_group)rV   fp8r   transformer_enginecommonrecipeFormatE4M3HYBRID
ValueErrorDelayedScaling
fp8_marginfp8_intervalfp8_amax_compute_algofp8_amax_history_len	fp8_wgradr   model_parallel_is_initializedget_amax_reduction_grouppytorchfp8_autocast)r~   fp8_contextr   r   r   r   r   r   r   get_fp8_context   s0   zFluxControlNet.get_fp8_contextNr!   imgcontrolnet_condtxty	timestepsimg_idstxt_idsguidanceconditioning_scalec
               	      s  |  |}
| |}| jdurF| |}|j\}}}}|| jj }|| jj }||||| j|| j}|dddddd}|||| d}|
| | }
|	|j
d	 }| |}|durk|| | j|d	  }|| | }tj||fdd
}| |}d}t| jD ]'\}}|   ||
|||d\}
}||
f }W d   n1 sw   Y  qtj||
gdd
}
d}t| jD ]/\}}|   ||
||d\}
}||
|jd ddf f }W d   n1 sw   Y  qd}t|| jD ]\}}||\}}|r|| n|}||f7 }qd}t|| jD ]\}}||\}}|r)|| n|}||f7 }q fdd|D } fdd|D }t|dkrMdn|}t|dkrXdn|}||fS )a]  
        Forward pass for the FluxControlNet model.

        Args:
            img (torch.Tensor): Input image tensor.
            controlnet_cond (torch.Tensor): Conditioning tensor for ControlNet.
            txt (torch.Tensor, optional): Text embedding tensor. Default is None.
            y (torch.Tensor, optional): Vector embedding tensor. Default is None.
            timesteps (torch.LongTensor, optional): Time step tensor. Default is None.
            img_ids (torch.Tensor, optional): Image IDs. Default is None.
            txt_ids (torch.Tensor, optional): Text IDs. Default is None.
            guidance (torch.Tensor, optional): Guidance tensor. Default is None.
            conditioning_scale (float, optional): Scaling factor for conditioning. Default is 1.0.

        Returns:
            torch.Tensor: The output of the forward pass.
        Nr      r6   r1   r         )rY   r   )hidden_statesencoder_hidden_statesrotary_pos_embemb)r   r   r   .c                       g | ]}|  qS r   r   rb   sampler   r   r   re   R      z*FluxControlNet.forward.<locals>.<listcomp>c                    r   r   r   r   r   r   r   re   S  r   )rn   ro   r|   shaperV   r3   reshapepermuter}   todtyperp   rs   	time_projrq   r)   catrl   	enumeraterv   r   rw   ziprx   r{   r(   ) r~   r   r   r   r   r   r   r   r   r   r   r   
batch_sizechannels	height_pwwidth_pwheightwidthvec_embidsr   double_block_samplesid_blockblocksingle_block_samplesr   controlnet_double_block_samplesdouble_block_samplecontrol_blockbiascontrolnet_single_block_samplessingle_block_sampler   r   r   forward   sz   







	

zFluxControlNet.forward)NNNNNNr!   )rM   rN   rO   rP   r0   rj   rJ   r   r)   r*   
LongTensorrT   r   __classcell__r   r   r   r   rU   k   s>    
U#	
rU   c                       s2   e Zd ZdZdedef fddZdd Z  ZS )FluxControlnetForwardWrapperz]
    A wrapper combines flux and flux controlnet forward pass for easier initialization.
    flux_configflux_controlnet_configc                    sT   t  | | j | _| j D ]}d|_qt|| _|j	r(| j	| j dS dS )zL
        Create flux and flux controlnet instances by their config.
        FN)
ri   rj   rV   configure_modelr   r   requires_gradrU   flux_controlnetrJ   )r~   r   r   paramr   r   r   rj   d  s   
z%FluxControlnetForwardWrapper.__init__c	                 C   sF   | j |||||d |||d\}	}
| j||||d ||||	|
d	}|S )zJ
        Forward pass for the FluxControlnetForwardWrapper model.
        r   )r   r   r   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r   )r   r   )r~   packed_noisy_model_inputcontrol_imageprompt_embedspooled_prompt_embedsr   latent_image_idstext_idsguidance_vecr   r   
noise_predr   r   r   r   r  s,   

z$FluxControlnetForwardWrapper.forward)	rM   rN   rO   rP   r   r0   rj   r   r   r   r   r   r   r   _  s    r   c                       sn   e Zd ZdZdedef fddZdd Zdd	 Zd
d Z	dde
jfddZde
jfddZdddZ  ZS )MegatronFluxControlNetModela  
    Megatron wrapper for flux controlnet model.

    Args:
        flux_params (FluxModelParams): Parameters to configure the Flux model.
        flux_controlnet_config (FluxControlNetConfig): Configuration specific to the FluxControlNet.

    Methods:
        configure_model:
            Configures the model by wrapping the FluxControlNet with the appropriate layers and settings,
            configuring the VAE, scheduler, and text encoders.
        data_step:
            A wrapper around the data-step function specific to FluxControlNet, controlling how data is processed.
        forward:
            Executes a forward pass through FluxControlNet.
        training_step:
            A wrapper step method that calls forward_step with a data batch from data loader.
        forward_step:
            Handles the forward pass specific to training, computing the model's output.
        validation_step:
            Calls inference pipeline with current model weights and save inference result together with the control
            image.
    flux_paramsr   c                    s"   t  | || _| j|  d S N)ri   rj   r   optimconnect)r~   r   r   r   r   r   rj     s   z$MegatronFluxControlNetModel.__init__c                 C   s   t | dsEt| j| j| _| | j |   | | j	| j
 | j D ]"\}}| jjdkr9d|v s6d|v r9d|_d|v rDd|v rDd|_q$dS dS )	zo
        Initialize flux and controlnet modules, vae, scheduler, and text encoders with given configs.
        r   r   contextaddedFrw   zself_attention.linear_proj.biasN)hasattrr   rV   r   r   configure_vae
vae_configconfigure_schedulerconfigure_text_encodersclip_params	t5_paramsnamed_parametersr9   r   )r~   namer   r   r   r   r     s   
z+MegatronFluxControlNetModel.configure_modelc                 C   s   | j |S )zz
        Retrive data batch from dataloader iterator and do necessary processing before feeding into train steps.
        )r   rL   )r~   r,   r   r   r   	data_step  s   z%MegatronFluxControlNetModel.data_stepc                 O   s   | j j j }||i |S )z6
        Calling the controlnet forward pass.
        )r   )r~   argskwargsforward_wrapperr   r   r   r     s   
z#MegatronFluxControlNetModel.forwardNreturnc                 C   s
   |  |S )z\
        A wrapper method takes data batch and returns the results of forward_step.
        )forward_step)r~   r-   	batch_idxr   r   r   training_step  s   
z)MegatronFluxControlNetModel.training_stepc                    s@  | j jjr
tj| _n| j jjrtj| _ntj| _| j	r,|d j
dd}|d j
dd}n&|d j
dd}| j|j| jd}|d j
dd}| j|j| jd}| j|jd |jd	 |jd
 |j| jd}| j||jd |jd |jd	 |jd
 d}| j||jd |jd |jd	 |jd
 ddd}|jd }tj||j|jd}	| d|}
|
| jj  }| jj| j|jd}| jjj|j|jd}| jjj|jd  fdd|D }|j|jd}||  }t|j|jk r|d}t|j|jk sd| | ||	  }|dd}| jjr%tj |jd f| jj!|j|jd}nd}| j"rH|d j
dddd}|d j
dd}|d j
dd}n|d }| j#||j|jd\}}}tj
j$j%| jtj&tjfv | jd+ | j'||||||||d}|	| dd}t(j)| | dd}|W  d   S 1 sw   Y  dS )z1
        The main forward step function.
        latentsTr"   control_latentsimages)r   hintr   r   r    )r   r   r   devicer   r1   )r   num_channels_latentsr   r   )r  r   logit_normalr  c                    s   g | ]
} |k   qS r   )nonzeroitem)rb   tschduler_timestepsr   r   re     s    z<MegatronFluxControlNetModel.forward_step.<locals>.<listcomp>r   r!   Nr   r   r   r   )r   r   r   r   r   r   r   r   mean)	reduction)*r   rV   bf16r)   bfloat16autocast_dtypefp16rT   float32image_precachedr+   vaeencoder   _prepare_latent_image_idsr   r  _pack_latents	transpose
randn_liker   %compute_density_for_timestep_sampling	schedulernum_train_timestepslongr   sigmasflattenr(   ndim	unsqueezer?   fullrK   text_precachedencode_promptampautocasthalfr   Fmse_loss)r~   r-   r  r  r   r  r   r   r   noiseuindicesr   r!  step_indicessigmar   r   r   r   r   r   r   targetlossr   r  r   r     s   








&z(MegatronFluxControlNetModel.forward_stepc                 C   s  t d ddlm} || j| j| jjjj| j| j	| j
| jj| jjjjd}| jr| jr|d jdd}|d jdd}|d	 jdddd
}|d jdd}||||||jd | j |jd | j dd
d| jdd}	|	d | jj d| j d| j d n]|d jdd}
|d jdd}|d }|||dd
|
jd |
jd d| jdd	}	|	d | jj d| j d| j d| d ||}||}|d | jj d| j d| j d tjdgtj dS )z
        Initialize flux controlnet pipeline with current model components.

        Saves the inference results together with the hint image to log folder.
        zStart validation stepr   )FluxControlNetInferencePipeline)paramscontorlnet_configr   r  t5clipscheduler_stepsr   r  Tr"   r  r   r1   r   r   r       g      @F)r  r   r   r   r   r   num_inference_stepsnum_images_per_promptrK   r   save_to_diskz/step=_rankz.pngr  r  r   )r   r;  r<  r   r   rK   r   r=  r   z_control.pngg        r	  )r   r   /nemo.collections.diffusion.models.flux.pipeliner4  r5  r   r   r   r  r7  r8  r9  r   r  r&  r+   r  r   vae_scale_factorr  saveloggerlog_dirglobal_step
local_ranktorch_to_numpynumpy_to_pilr)   tensorcurrent_device)r~   r-   r   r4  piper  r  r   r   
log_imagesr   r  textr   r   r   validation_stepJ  sd   


*.

(z+MegatronFluxControlNetModel.validation_stepr   )rM   rN   rO   rP   r   r0   rj   r   r   r   r)   r*   r  r   rM  r   r   r   r   r   r     s    er   )-
contextlibr   dataclassesr   typingr   r)   torch.nnr   megatron.corer   7megatron.core.models.common.vision_module.vision_moduler   $megatron.core.tensor_parallel.layersr   ,megatron.core.transformer.transformer_configr   r	   r+  4nemo.collections.diffusion.models.dit.dit_layer_specr
   r   r   r   -nemo.collections.diffusion.models.flux.layersr   r   r   ,nemo.collections.diffusion.models.flux.modelr   r   r   8nemo.collections.diffusion.models.flux_controlnet.layersr   nemo.lightningr   
nemo.utilsr   r   r/   IOMixinr0   rU   r   r   r   r   r   r   <module>   s0    u>