o
    }oipt                     @   s  d dl mZmZ d dlmZmZmZ d dlZd dl	Z	d dl
mZ d dlmZmZ d dlmZmZmZmZmZ d dlmZ G dd	 d	eeZG d
d deZG dd deZG dd deZG dd de	jjeZG dd deZG dd deeZG dd deZ dS )    )ABCabstractmethod)OptionalTupleTypeN)mask_sequence_tensor)NeuralModule	typecheck)	FloatTypeLengthsType
NeuralTypeSpectrogramTypeVoidType)loggingc                       s  e Zd ZdZdededef fddZedefdd	Zedefd
dZ	dede
jde
jfddZede
jde
jdee
je
jf fddZedede idede idede
jde
jfddZddde
jde
jdee
j dee
je
jf fddZedd  Zd!d" Z  ZS )#StochasticDifferentialEquationz1Base class for stochastic differential equations.time_mintime_max	num_stepsc                    sh   t    |dkrtd| ||krtd| d| || _|| _|dkr/td| || _d S )Nr   z+time_min should be positive, current value z5time_max should be larger than time_min, current max 	 and min z.num_steps needs to be positive: current value )super__init__
ValueErrorr   r   r   )selfr   r   r   	__class__ e/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/audio/parts/submodules/diffusion.pyr      s   

z'StochasticDifferentialEquation.__init__returnc                 C   s   | j | j S )zTime step for this SDE.
        This denotes the step size between `0` and `self.time_max` when using `self.num_steps`.
        )r   r   r   r   r   r   dt1   s   z!StochasticDifferentialEquation.dtc                 C   s   | j | j S )zTime range for this SDE.)r   r   r   r   r   r   
time_delta8   s   z)StochasticDifferentialEquation.time_deltasizedevicec                 C   s   t j||d| j | j }|S )a  Generate random time steps in the valid range.

        Time steps are generated between `self.time_min` and `self.time_max`.

        Args:
            size: number of samples
            device: device to use

        Returns:
            A tensor of floats with shape (size,)
        r"   )torchrandr    r   )r   r!   r"   timer   r   r   generate_time=   s   z,StochasticDifferentialEquation.generate_timestater&   c                 K      dS )z
        Args:
            state: tensor of shape (B, C, D, T)
            time: tensor of shape (B,)

        Returns:
            Tuple with drift and diffusion coefficients.
        Nr   )r   r(   r&   kwargsr   r   r   coefficientsL   s   
z+StochasticDifferentialEquation.coefficients
prior_meanBCDTsampleinput_typesoutput_typesc                 C   r)   )zGenerate a sample from the prior distribution p_T.

        Args:
            prior_mean: Mean of the prior distribution

        Returns:
            A sample from the prior distribution.
        Nr   )r   r,   r   r   r   prior_samplingX   s   z-StochasticDifferentialEquation.prior_samplingN)state_lengthr7   c          	      K   s>   | j d|||d|\}}|| j }|t| j }||fS )a  Assume we have the following SDE:

            dx = drift(x, t) * dt + diffusion(x, t) * dwt

        where `wt` is the standard Wiener process.

        We assume the following discretization:

            new_state = current_state + total_drift + total_diffusion * z_norm

        where `z_norm` is sampled from normal distribution with zero mean and unit variance.

        Args:
            state: current state of the process, shape (B, C, D, T)
            time: current time of the process, shape (B,)
            state_length: length of the valid time steps for each example in the batch, shape (B,)
            **kwargs: other parameters

        Returns:
            Drift and diffusion.
        )r(   r&   r7   Nr   )r+   r   npsqrt)	r   r(   r&   r7   r*   drift_coefficientdiffusion_coefficientdrift	diffusionr   r   r   
discretizel   s   

z)StochasticDifferentialEquation.discretizec                 C   r)   )zCreate a copy of this SDE.Nr   r   r   r   r   copy   s   z#StochasticDifferentialEquation.copyc                 C   sJ   | j j d| j d| j d| j d}|d| j 7 }|d| j 7 }|S )Nz
(time_min=, time_max=, num_steps=)
	dt:         
	time_delta: )r   __name__r   r   r   r   r    r   descr   r   r   __repr__   s   &z'StochasticDifferentialEquation.__repr__)rE   
__module____qualname____doc__floatintr   propertyr   r    r$   r"   Tensorr'   r   r   r+   r	   r   r   r6   r   r>   r?   rH   __classcell__r   r   r   r   r      s>    (
'
r   c                       s,  e Zd ZdZ				d2dededed	ed
ededef fddZedefddZedefddZ	e
ede ede eede ddede iddejdejdejdejfddZe
deede ideede iddejdejfdd Ze
ede ede eede dede ede d!ddejdejdejdejfd"d#Ze
ede eede ede eede d$d%d&ede ede d'd	(d3dejdejdejd)eej deejejf f
d*d+Zdejdejfd,d-Zd.d/ Zd0d1 Z  ZS )4%OrnsteinUhlenbeckVarianceExplodingSDEa  This class implements the Ornstein-Uhlenbeck SDE with variance exploding noise schedule.

    The SDE is given by:

        dx = theta * (y - x) dt + g(t) dw

    where `theta` is the stiffness parameter and `g(t)` is the diffusion coefficient:

        g(t) = std_min * (std_max/std_min)^t * sqrt(2 * log(std_max/std_min))

    References:
        Richter et al., Speech Enhancement and Dereverberation with Diffusion-based Generative Models, Tr. ASLP 2023
    d   Q?      ?:0yE>	stiffnessstd_minstd_maxr   r   r   epsc                    s   t  j|||d |dkrtd| || _|| _|dkr%td| ||kr3td| d| || _|| _td| j	j
 td| j td	| j td
| j td| j td| j td| j td| j d S )Nr   r   r   r   z&eps should be positive, current value z*std_min should be positive, current value z3std_max should be larger than std_min, current max r   Initialized %s withz	stiffness:     %sz	std_min:       %sz	std_max:       %sz	num_steps:     %sz	time_min:      %sz	time_max:      %sz	eps:           %s)r   r   r   rY   rV   rW   rX   r   debugr   rE   r   r   r   )r   rV   rW   rX   r   r   r   rY   r   r   r   r      s&   
z.OrnsteinUhlenbeckVarianceExplodingSDE.__init__r   c                 C   s   | j | j| j  S N)rX   rW   rY   r   r   r   r   	std_ratio      z/OrnsteinUhlenbeckVarianceExplodingSDE.std_ratioc                 C   s   t | j| j S r]   )r8   logr^   rY   r   r   r   r   log_std_ratio   r_   z3OrnsteinUhlenbeckVarianceExplodingSDE.log_std_ratior-   r.   r(   r,   r&   meanr3   r(   r,   r&   c                 C   s:   t | j | }|dddd}|| d| |  }|S )aB  Return the mean of the perturbation kernel for this SDE.

        Args:
            state: current state of the process, shape (B, C, D, T)
            prior_mean: mean of the prior distribution
            time: current time of the process, shape (B,)

        Returns:
            A tensor of shape (B, C, D, T)
           )r$   exprV   view)r   r(   r,   r&   weightrc   r   r   r   perturb_kernel_mean   s   z9OrnsteinUhlenbeckVarianceExplodingSDE.perturb_kernel_meanstdc                 C   sX   | j d | j }|t| jd| td| j |  9 }|| j| j  }t|}|S )a|  Return the standard deviation of the perturbation kernel for this SDE.

        Note that the standard deviation depends on the time and the noise schedule,
        which is parametrized using `self.stiffness`, `self.std_min` and `self.std_max`.

        Args:
            time: current time of the process, shape (B,)

        Returns:
            A tensor of shape (B,)
           )rW   ra   r$   powr^   rf   rV   r9   )r   r&   varrj   r   r   r   perturb_kernel_std   s
   *
z8OrnsteinUhlenbeckVarianceExplodingSDE.perturb_kernel_std)rc   rj   c                 C   s\   t || jks
J t || jksJ | j|||d}| j|d}|dddd}||fS )a  Return the mean and standard deviation of the perturbation kernel for this SDE.

        Args:
            state: current state of the process, shape (B, C, D, T)
            prior_mean: mean of the prior distribution
            time: current time of the process, shape (B,)
        rb   r&   rd   re   )r$   allr   r   ri   ro   rg   )r   r(   r,   r&   rc   rj   r   r   r   perturb_kernel_params  s   z;OrnsteinUhlenbeckVarianceExplodingSDE.perturb_kernel_paramsToptional)r(   r&   r,   r7   )r:   r;   Nr7   c                 C   sv   | j ||  }| jt| j| td| j  }|jdgdg|	 d  R  }|dur7t
||}t
||}||fS )a  Compute drift and diffusion coefficients for this SDE.

        Args:
            state: current state of the process, shape (B, C, D, T)
            time: current time of the process, shape (B,)
            prior_mean: mean of the prior distribution
            state_length: length of the valid time steps for each example in the batch

        Returns:
            Drift and diffusion coefficients.
        rk   rd   re   N)rV   rW   r$   rm   r^   r8   r9   ra   rg   dimr   )r   r(   r&   r,   r7   r:   r;   r   r   r   r+   7  s   $ 

z2OrnsteinUhlenbeckVarianceExplodingSDE.coefficientsc                 C   sN   | j tj|jd |jd }| j|d}|dddd}|t||  }|S )zGenerate a sample from the prior distribution p_T.

        Args:
            prior_mean: Mean of the prior distribution
        r   r#   rp   rd   re   )r   r$   onesshaper"   ro   rg   
randn_like)r   r,   r&   rj   r2   r   r   r   r6   c  s
   z4OrnsteinUhlenbeckVarianceExplodingSDE.prior_samplingc              	   C   s$   t | j| j| j| j| j| j| jdS )N)rV   rW   rX   r   r   r   rY   )rQ   rV   rW   rX   r   r   r   rY   r   r   r   r   r?   w  s   z*OrnsteinUhlenbeckVarianceExplodingSDE.copyc                 C   s   | j j d| j d| j d| j d| j d| j d| j d| j d}|d	| j	 7 }|d
| j
 7 }|d| j 7 }|d| j 7 }|S )Nz(stiffness=z
, std_min=z
, std_max=rA   z, time_min=r@   z, eps=rB   rC   rD   z
	std_ratio:  z
	log_std_ratio:  )r   rE   rV   rW   rX   r   r   r   rY   r   r    r^   ra   rF   r   r   r   rH     s   Fz.OrnsteinUhlenbeckVarianceExplodingSDE.__repr__)rR   rS   rT   rU   r]   )rE   rI   rJ   rK   rL   rM   r   rN   r^   ra   r	   r   r   tupler
   r$   rO   ri   ro   rr   r   r   r   r+   r6   r?   rH   rP   r   r   r   r   rQ      s    '

$




$



 rQ   c                       s   e Zd Zdee dee f fddZ		ddejdejde	ej d	e	ej d
e
ejejf f
ddZdejdejd
ejfddZddddejdejde	ej d	e	ej d
e
ejejf f
ddZdd Zdd Z  ZS )%ReverseStochasticDifferentialEquationsdescore_estimatorc                   s8   t  j|j|j|jd || _|| _td| j	j
 dS )zUse the forward SDE and a score estimator to define the reverse SDE.

        Args:
            sde: forward SDE
            score_estimator: neural score estimator
        rZ   zInitialized %sN)r   r   r   r   r   r|   forward_sder   r\   r   rE   r   r{   r|   r   r   r   r     s   z.ReverseStochasticDifferentialEquation.__init__Nr(   r&   score_conditionr7   r   c                 K      t d)zCompute drift and diffusion coefficients for the reverse SDE.

        Args:
            state: current state of the process, shape (B, C, D, T)
            time: current time of the process, shape (B,)
        z/Coefficients not necessary for the reverse SDE.NotImplementedErrorr   r(   r&   r   r7   r*   r   r   r   r+     s   z2ReverseStochasticDifferentialEquation.coefficientsrw   r"   c                 C   r   )z4Prior sampling is not necessary for the reverse SDE.z1Prior sampling not necessary for the reverse SDE.r   )r   rw   r"   r   r   r   r6     s   z4ReverseStochasticDifferentialEquation.prior_samplingr   r7   c                K   s   | j jd||d|\}}|du r|ntj||gdd}| j|||d\}	}
||d|	  }|}|dur@t||}t||}||fS )a  Discretize the reverse SDE.

        Args:
            state: current state of the process, shape (B, C, D, T)
            time: current time of the process, shape (B,)
            score_condition: condition for the score estimator
            state_length: length of the valid time steps for each example in the batch
            **kwargs: other parameters for discretization of the forward SDE
        )r(   r&   Nre   ru   inputinput_length	conditionrk   r   )r}   r>   r$   catr|   rm   r   )r   r(   r&   r   r7   r*   forward_driftforward_diffusionscore_inputscore_r<   r=   r   r   r   r>     s   

z0ReverseStochasticDifferentialEquation.discretizec                 C   s   t | j | jdS Nr{   r|   )rz   r}   r?   r|   r   r   r   r   r?        z*ReverseStochasticDifferentialEquation.copyc                 C   s"   | j j d| j d| j d}|S )Nz(sde=z, score_estimator=rB   )r   rE   r}   r|   rF   r   r   r   rH     s   z.ReverseStochasticDifferentialEquation.__repr__NN)rE   rI   rJ   r   r   r   r   r$   rO   r   r   r+   Sizer"   r6   r>   r?   rH   rP   r   r   r   r   rz     s>    
	
)rz   c                       s   e Zd ZdZ								d d	ed
edededee dee dedef fddZe	e
de e
de dde
ede ddde
de e
ede dddde 	d!dejdejdeej dejfddZ  ZS )"PredictorCorrectorSamplera  Predictor-Corrector sampler for the reverse SDE.

    Args:
        sde: forward SDE
        score_estimator: neural score estimator
        predictor: predictor for the reverse process
        corrector: corrector for the reverse process
        num_steps: number of time steps for the reverse process
        num_corrector_steps: number of corrector steps
        time_max: maximum time
        time_min: minimum time
        snr: SNR for Annealed Langevin Dynamics
        output_type: type of the output ('state' for the final state, or 'mean' for the mean of the final state)

    References:
        - Song et al., Score-based generative modeling through stochastic differential equations, 2021
    reverse_diffusionannealed_langevin_dynamics2   re   N      ?rc   	predictor	correctorr   num_corrector_stepsr   r   snroutput_typec                    sv  t    | | _|d ur|| j_td| jj |d ur*|| j_td| jj || j_td| jj | jj| _| jj| _| jj| _|dkrRt	| j|d| _
ntd| |dkrht| j||	|d| _ntd	| |
d
vrztd|
 |
| _td| jj td| td| td| j td| j td| j td| td|	 td| j d S )Nzsde.time_max set to: %szsde.time_min set to: %szsde.num_steps set to: %sr   r   zUnexpected predictor: r   )r{   r|   r   r   zUnexpected corrector: )rc   r(   Unexpected output type: r[   z	predictor:           %sz	corrector:           %sz	num_steps:           %sz	time_min:            %sz	time_max:            %sz	num_corrector_steps: %sz	snr:                 %sz	output_type:         %s)r   r   r?   r{   r   r   infor   r   ReverseDiffusionPredictorr   RuntimeErrorAnnealedLangevinDynamicsr   r   r   r\   r   rE   )r   r{   r|   r   r   r   r   r   r   r   r   r   r   r   r     sB   






z"PredictorCorrectorSampler.__init__r-   Trs   r.   )r,   r   r7   )r2   r7   r3   r,   r   r7   r   c                 C   s   | j j|d}|durt||}tj| j| j| j|jd}|D ]&}|tj	|j
d |jd }| j||||d\}}| j|||||d\}}	q| jdkrN|}
n| jdkrV|	}
ntd	| j |durgt|
|}
|
|fS )
a  Takes prior (noisy) mean and generates a sample by solving the reverse SDE.

        Args:
            prior_mean: mean for the prior distribution, e.g., noisy observation
            score_condition: conditioning for the score estimator
            state_length: length of the valid time steps for each example in the batch

        Returns:
            Generated `sample` and the corresponding `sample_length`.
        )r,   Nr#   r   r(   r&   r   r7   )r(   r&   r   r,   r7   r(   rc   r   )r{   r6   r   r$   linspacer   r   r   r"   rv   rw   r   r   r   r   )r   r,   r   r7   r(   
time_stepstr&   r   
state_meanr2   r   r   r   forward0  s0   


	

z!PredictorCorrectorSampler.forward)r   r   r   re   NNr   rc   r]   )rE   rI   rJ   rK   strrM   r   rL   r   r	   r   r   ry   r   r$   inference_moderO   r   rP   r   r   r   r   r     s^    	
>

r   c                       s^   e Zd ZdZ fddZee ddddejdejde	ej d	e	ej fd
dZ
  ZS )	Predictorz{Predictor for the reverse process.

    Args:
        sde: forward SDE
        score_estimator: neural score estimator
    c                    s   t    t||d| _d S r   )r   r   rz   reverse_sder~   r   r   r   r   {  s   
zPredictor.__init__Nr   r(   r&   r   r7   c                K   r)   )a  Predict the next state of the reverse process.

        Args:
            state: current state of the process, shape (B, C, D, T)
            time: current time of the process, shape (B,)
            score_condition: conditioning for the score estimator
            state_length: length of the valid time steps for each example in the batch

        Returns:
            New state and mean.
        Nr   r   r   r   r   r     s   zPredictor.forward)rE   rI   rJ   rK   r   r   r$   r   rO   r   r   rP   r   r   r   r   r   s  s     r   c                       s8   e Zd ZdZ fddZe dddddZ  ZS )r   zPredict the next state of the reverse process using the reverse diffusion process.

    Args:
        sde: forward SDE
        score_estimator: neural score estimator
    c                    s   t  j||d d S r   )r   r   r~   r   r   r   r     r   z"ReverseDiffusionPredictor.__init__Nr   c                K   sb   | j jd||||d|\}}t|}|| }	|	||  }
|dur-t|
|}
t|	|}	|
|	fS )a  Predict the next state of the reverse process using the reverse diffusion process.

        Args:
            state: current state of the process, shape (B, C, D, T)
            time: current time of the process, shape (B,)
            score_condition: conditioning for the score estimator
            state_length: length of the valid time steps for each example in the batch

        Returns:
            New state and mean of the diffusion process.
        r   Nr   )r   r>   r$   rx   r   )r   r(   r&   r   r7   r*   r<   r=   z_normrc   	new_stater   r   r   r     s   



z!ReverseDiffusionPredictor.forward	rE   rI   rJ   rK   r   r$   r   r   rP   r   r   r   r   r     s
    r   c                
       s   e Zd ZdZdee dee dedef fddZ	e
eede eed	e ede d
deed	e d
dddede ide dddZ  ZS )	CorrectorzCorrector for the reverse process.

    Args:
        sde: forward SDE
        score_estimator: neural score estimator
        snr: SNR for Annealed Langevin Dynamics
        num_steps: number of steps for the corrector
    r{   r|   r   r   c                    sN   t    || _|| _|| _|| _td| jj	 td| td| d S )Nr[   z	snr:             %sz	num_steps:       %s)
r   r   r{   r|   r   r   r   r\   r   rE   )r   r{   r|   r   r   r   r   r   r     s   
zCorrector.__init__r-   r.   Trs   r   r(   r3   Nc                 C   r)   )a`  
        Args:
            state: current state of the process, shape (B, C, D, T)
            time: current time of the process, shape (B,)
            score_condition: conditioning for the score estimator
            state_length: length of the valid time steps for each example in the batch

        Returns:
            New state and mean.
        Nr   )r   r(   r&   r   r7   r   r   r   r     s   zCorrector.forwardr   )rE   rI   rJ   rK   r   r   r   rL   rM   r   r   r	   r   r   ry   r
   r   r$   r   r   rP   r   r   r   r   r     s.    	
r   c                       s2   e Zd ZdZ fddZe dddZ  ZS )r   zAnnealed Langevin Dynamics for the reverse process.

    References:
        - Song et al., Score-based generative modeling through stochastic differential equations, 2021
    c                    s6   t |tstdt| t jdd|i| d S )NzCExpected an instance of OrnsteinUhlenbeckVarianceExplodingSDE, got r{   r   )
isinstancerQ   r   typer   r   )r   r{   r*   r   r   r   r     s   
z!AnnealedLangevinDynamics.__init__Nc                 C   s   | j j|d}|jdgdg| d  R  }t| jD ];}|du r$|ntj||gdd}| j|||d\}}	t	|}
d| j
| d }|||  }||
t|d   }q|durft||}t||}||fS )a  Correct the state using Annealed Langevin Dynamics.

        Args:
            state: current state of the process, shape (B, C, D, T)
            time: current time of the process, shape (B,)
            score_condition: conditioning for the score estimator
            state_length: length of the valid time steps for each example in the batch

        Returns:
            New state and mean of the diffusion process.

        References:
            Alg. 4 in http://arxiv.org/abs/2011.13456
        rp   rd   re   Nr   r   rk   )r{   ro   rg   ru   ranger   r$   r   r|   rx   r   rm   r9   r   )r   r(   r&   r   r7   rj   ir   r   r   r   	step_sizerc   r   r   r   r     s    


z AnnealedLangevinDynamics.forwardr   r   r   r   r   r   r     s
    r   )!abcr   r   typingr   r   r   numpyr8   r$   #nemo.collections.common.parts.utilsr   nemo.core.classesr   r	   nemo.core.neural_typesr
   r   r   r   r   
nemo.utilsr   r   rQ   rz   r   nnModuler   r   r   r   r   r   r   r   <module>   s&     nS %,6