o
    }o™i$  ã                   @   s2   d dl Z d dlmZ d dlT G dd„ dejƒZdS )é    N)Ú*c                       sN   e Zd ZdZ											d‡ fdd„	Zdd
d„Z		ddd„Z‡  ZS )ÚLPIPSWithDiscriminatora7  
    A perceptual loss module that combines LPIPS with an adversarial discriminator
    for improved reconstruction quality in variational autoencoders. This class
    calculates a combination of pixel-level, perceptual (LPIPS), KL, and adversarial
    losses for training a VAE model with a discriminator.
    ç        ç      ð?é   FÚhingec                    sŽ   t ƒ  ¡  |dv sJ ‚|| _|| _tƒ  ¡ | _|	| _t 	t
 d¡| ¡| _t|||
d t¡| _|| _|dkr9tnt| _|| _|| _|| _dS )a  
        Initializes the LPIPSWithDiscriminator module.

        Args:
            disc_start (int): Iteration at which to start discriminator updates.
            logvar_init (float): Initial value for the log variance parameter.
            kl_weight (float): Weight for the KL divergence term.
            pixelloss_weight (float): Weight for the pixel-level reconstruction loss.
            disc_num_layers (int): Number of layers in the discriminator.
            disc_in_channels (int): Number of input channels for the discriminator.
            disc_factor (float): Scaling factor for the discriminator loss.
            disc_weight (float): Weight applied to the discriminator gradient balancing.
            perceptual_weight (float): Weight for the LPIPS perceptual loss.
            use_actnorm (bool): Whether to use actnorm in the discriminator.
            disc_conditional (bool): Whether the discriminator is conditional on an additional input.
            disc_loss (str): Type of GAN loss to use ("hinge" or "vanilla").
        )r   Úvanillaé   )Úinput_ncÚn_layersÚuse_actnormr   N)ÚsuperÚ__init__Ú	kl_weightÚpixel_weightÚLPIPSÚevalÚperceptual_lossÚperceptual_weightÚnnÚ	ParameterÚtorchÚonesÚlogvarÚNLayerDiscriminatorÚapplyÚweights_initÚdiscriminatorÚdiscriminator_iter_startÚhinge_d_lossÚvanilla_d_lossÚ	disc_lossÚdisc_factorÚdiscriminator_weightÚdisc_conditional)ÚselfÚ
disc_startÚlogvar_initr   Úpixelloss_weightÚdisc_num_layersÚdisc_in_channelsr"   Údisc_weightr   r   r$   r!   ©Ú	__class__© úf/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/diffusion/vae/contperceptual_loss.pyr      s"   
 ÿþ
zLPIPSWithDiscriminator.__init__Nc                 C   s¦   |durt jj||ddd }t jj||ddd }nt jj|| jd ddd }t jj|| jd ddd }t  |¡t  |¡d  }t  |dd¡ ¡ }|| j }|S )u¡  
        Computes an adaptive weight that balances the reconstruction (NLL) and the
        adversarial (GAN) losses. This ensures stable training by adjusting the
        impact of the discriminatorâ€™s gradient on the generator.

        Args:
            nll_loss (torch.Tensor): The negative log-likelihood loss.
            g_loss (torch.Tensor): The generator (adversarial) loss.
            last_layer (torch.nn.Parameter, optional): Last layer parameters of the model
                for gradient-based calculations. If None, uses self.last_layer[0].

        Returns:
            torch.Tensor: The computed adaptive weight for balancing the discriminator.
        NT)Úretain_graphr   g-Cëâ6?r   g     ˆÃ@)r   ÚautogradÚgradÚ
last_layerÚnormÚclampÚdetachr#   )r%   Únll_lossÚg_lossr3   Ú	nll_gradsÚg_gradsÚd_weightr.   r.   r/   Úcalculate_adaptive_weightO   s   
z0LPIPSWithDiscriminator.calculate_adaptive_weightc	              	   C   s¶  t  | ¡ | ¡  ¡}	| jdkr!|  | ¡ | ¡ ¡}
|	| j|
  }	|	t  | j¡ | j }|}|dur6|| }t  |¡|jd  }t  |¡|jd  }| 	¡ }t  |¡|jd  }|dkró|du rm| j
reJ ‚|  | ¡ ¡}n| j
srJ ‚|  t j| ¡ |fdd¡}t  |¡ }| jdkr©z
| j|||d}W n ty¨   | jr¡J ‚t  d¡}Y nw t  d¡}t| j|| jd}|| j|  || |  }| ¡  ¡  ¡ | j ¡  ¡ | ¡  ¡ | ¡  ¡ |	 ¡  ¡ | ¡ t  |¡| ¡  ¡ dœ}||fS |dkrY|du r|  | ¡  ¡ ¡}|  | ¡  ¡ ¡}n |  t j| ¡  ¡ |fdd¡}|  t j| ¡  ¡ |fdd¡}t| j|| jd}||  ||¡ }| ¡  ¡  ¡ | ¡  ¡ | ¡  ¡ d	œ}||fS dS )
aÅ  
        Forward pass for computing the combined loss. Depending on the optimizer index,
        this either computes the generator loss (including pixel, perceptual, KL, and
        adversarial terms) or the discriminator loss.

        Args:
            inputs (torch.Tensor): Original inputs to reconstruct.
            reconstructions (torch.Tensor): Reconstructed outputs from the model.
            posteriors (object): Posteriors from the VAE model for KL computation.
            optimizer_idx (int): Indicates which optimizer is being updated
                (0 for generator, 1 for discriminator).
            global_step (int): Current training iteration step.
            last_layer (torch.nn.Parameter, optional): The last layer's parameters for
                adaptive weight calculation.
            cond (torch.Tensor, optional): Conditional input for the discriminator.
            weights (torch.Tensor, optional): Sample-wise weighting for the losses.

        Returns:
            (torch.Tensor, dict): A tuple of (loss, log_dict) where loss is the computed loss
            for the current optimizer and log_dict is a dictionary of intermediate values
            for logging and debugging.
        r   Nr	   )Údimr   )r3   )Ú	threshold)Ú
total_lossr   Úkl_lossr7   Úrec_lossr;   r"   r8   )r!   Úlogits_realÚlogits_fake)r   ÚabsÚ
contiguousr   r   Úexpr   ÚsumÚshapeÚklr$   r   ÚcatÚmeanr"   r<   ÚRuntimeErrorÚtrainingÚtensorÚadopt_weightr   r   Úcloner6   Úitemr!   )r%   ÚinputsÚreconstructionsÚ
posteriorsÚoptimizer_idxÚglobal_stepr3   ÚcondÚweightsrA   Úp_lossr7   Úweighted_nll_lossr@   rC   r8   r;   r"   ÚlossÚlogrB   Úd_lossr.   r.   r/   Úforwardj   sj   




þ




ø


  

ýïzLPIPSWithDiscriminator.forward)r   r   r   r   r   r   r   r   FFr   )N)NNN)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r<   r^   Ú__classcell__r.   r.   r,   r/   r      s"    
ó
2ÿr   )r   Útorch.nnr   Ú"taming.modules.losses.vqperceptualÚModuler   r.   r.   r.   r/   Ú<module>   s   