o
    }oiT                     @   s   d dl Z d dlmZmZ d dlmZ d dlZd dlmZ d dl	m
Z
mZ d dlmZmZmZ d dlmZ G dd	 d	e
eZG d
d deZG dd deZG dd de
ZdS )    N)ABCabstractmethod)Optional)mask_sequence_tensor)NeuralModule	typecheck)LengthsType
NeuralTypeSpectrogramType)loggingc                	       s  e Zd ZdZ				d0dededed	ef fd
dZedefddZedefddZ	dede
jde
jfddZedd Zedd Zede
jde
jfddZede
jde
jfddZede
jde
jfddZd e
jde
je
jffd!d"Zde
jde
je
je
jffd#d$Zede
jde
jfd%d&Zd'e
jde
je
jffd(d)Zde
jde
je
je
jffd*d+Zed,d- Zd.d/ Z  ZS )1SBNoiseScheduleu^  Noise schedule for the Schrödinger Bridge

    Args:
        time_min: minimum time for the process
        time_max: maximum time for the process
        num_steps: number of steps for the process
        eps: small regularization

    References:
        Schrödinger Bridge for Generative Speech Enhancement, https://arxiv.org/abs/2407.16074
                  ?d   :0yE>time_mintime_max	num_stepsepsc                    s   t    |dk rtd| ||krtd| d| || _|| _|dkr/td| || _|dkr=td| || _td| j	j
 td| j td	| j td
| j td| j d S )Nr   z/time_min should be non-negative, current value z5time_max should be larger than time_min, current max z	 and min zExpected num_steps > 0, got Expected eps > 0, got Initialized %s with	time_min:  %s	time_max:  %s	num_steps: %s	eps:       %s)super__init__
ValueErrorr   r   r   r   r   debug	__class____name__)selfr   r   r   r   r    o/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/audio/parts/submodules/schroedinger_bridge.pyr   '   s$   
zSBNoiseSchedule.__init__returnc                 C   s   | j | j S )zTime step for the process.)r   r   r!   r#   r#   r$   dtJ      zSBNoiseSchedule.dtc                 C   s   | j | j S )zTime range for the process.)r   r   r&   r#   r#   r$   
time_deltaO   r(   zSBNoiseSchedule.time_deltasizedevicec                 C   s   t j||d| j | j }|S )z.Generate random time steps in the valid range.r+   )torchrandr)   r   )r!   r*   r+   timer#   r#   r$   generate_timeT   s   zSBNoiseSchedule.generate_timec                 C   s   t j| jgtjd}| |S )zReturn alpha_t at t_max.r,   )r-   tensorr   alphar+   r!   t_maxr#   r#   r$   alpha_t_maxY      
zSBNoiseSchedule.alpha_t_maxc                 C   s   t j| jgtjd}| |S )zReturn sigma_t at t_max.r,   )r-   r1   r   r2   r+   sigmar3   r#   r#   r$   sigma_t_max_   r6   zSBNoiseSchedule.sigma_t_maxr/   c                 C      dS )zDrift scaling f(t).

        Args:
            time: tensor with time steps

        Returns:
            Tensor the same size as time, representing drift scaling.
        Nr#   r!   r/   r#   r#   r$   fe      
zSBNoiseSchedule.fc                 C   r9   )zDiffusion scaling g(t).

        Args:
            time: tensor with time steps

        Returns:
            Tensor the same size as time, representing diffusion scaling.
        Nr#   r:   r#   r#   r$   gq   r<   zSBNoiseSchedule.gc                 C   r9   )zReturn alpha for SB noise schedule.

            alpha_t = exp( int_0^s f(s) ds  )

        Args:
            time: tensor with time steps

        Returns:
            Tensor the same size as time, representing alpha for each time.
        Nr#   r:   r#   r#   r$   r2   }      zSBNoiseSchedule.alphar2   c                 C   s0   |  tj| jg|jd}||| j  }||fS )zReturn alpha_bar for SB.

            alpha_bar = alpha_t / alpha_t_max

        Args:
            alpha: tensor with alpha values

        Returns:
            Tensors the same size as alpha, representing alpha_bar and alpha_t_max.
        r,   )r2   r-   r1   r   r+   r   )r!   r2   r5   	alpha_barr#   r#   r$   alpha_bar_from_alpha   s   z$SBNoiseSchedule.alpha_bar_from_alphac                 C   "   |  |}| |\}}|||fS )zReturn alpha, alpha_bar and alpha_t_max for SB.

        Args:
            time: tensor with time steps

        Returns:
            Tuple of tensors with alpha, alpha_bar and alpha_t_max.
        )r2   r@   )r!   r/   r2   r?   r5   r#   r#   r$   
get_alphas      
	
zSBNoiseSchedule.get_alphasc                 C   r9   )zReturn sigma_t for SB.

            sigma_t^2 = int_0^s g^2(s) / alpha_s^2 ds

        Args:
            time: tensor with time steps

        Returns:
            Tensor the same size as time, representing sigma for each time.
        Nr#   r:   r#   r#   r$   r7      r>   zSBNoiseSchedule.sigmar7   c                 C   s>   |  tj| jg|jd}|d |d  }t|| j |fS )zReturn sigma_bar_t for SB.

            sigma_bar_t^2 = sigma_t_max^2 - sigma_t^2

        Args:
            sigma: tensor with sigma values

        Returns:
            Tensors the same size as sigma, representing sigma_bar and sigma_t_max.
        r,      )r7   r-   r1   r   r+   sqrtr   )r!   r7   r8   sigma_bar_sqr#   r#   r$   sigma_bar_from_sigma   s   z$SBNoiseSchedule.sigma_bar_from_sigmac                 C   rA   )zReturn sigma, sigma_bar and sigma_t_max for SB.

        Args:
            time: tensor with time steps

        Returns:
            Tuple of tensors with sigma, sigma_bar and sigma_t_max.
        )r7   rG   )r!   r/   r7   	sigma_barr8   r#   r#   r$   
get_sigmas   rC   zSBNoiseSchedule.get_sigmasc                 C   r9   )z$Return a copy of the noise schedule.Nr#   r&   r#   r#   r$   copy   s   zSBNoiseSchedule.copyc                 C   sJ   | j j d| j d| j d| j d}|d| j 7 }|d| j 7 }|S )Nz
(time_min=z, time_max=z, num_steps=)z
	dt:         z
	time_delta: )r   r    r   r   r   r'   r)   r!   descr#   r#   r$   __repr__   s   &zSBNoiseSchedule.__repr__r   r   r   r   )r    
__module____qualname____doc__floatintr   propertyr'   r)   r-   r+   Tensorr0   r5   r8   r   r;   r=   r2   r@   rB   r7   rG   rI   rJ   rN   __classcell__r#   r#   r"   r$   r      sN    #

  
r   c                       s   e Zd ZdZ				ddededed	ed
edef fddZdejdejfddZ	dejdejfddZ
dejdejfddZdejdejfddZdd Z fddZ  ZS )SBNoiseScheduleVEu  Variance exploding noise schedule for the Schrödinger Bridge.

    Args:
        k: defines the base for the exponential diffusion coefficient
        c: scaling for the diffusion coefficient
        time_min: minimum time for the process
        time_max: maximum time for the process
        num_steps: number of steps for the process
        eps: small regularization

    References:
        Schrödinger Bridge for Generative Speech Enhancement, https://arxiv.org/abs/2407.16074
    r   r   r   r   kcr   r   r   r   c                    s   t  j||||d |dkrtd| |dkr td| || _|| _td| jj td| j td| j td	| j	 td
| j
 td| j td| j d S )Nr   r   r   r      zExpected k > 1, got r   Expected c > 0, got r   z	k:         %s	c:         %sr   r   r   r   )r   r   r   rZ   rY   r   r   r   r    r   r   r   r   )r!   rY   rZ   r   r   r   r   r"   r#   r$   r      s   	zSBNoiseScheduleVE.__init__r/   r%   c                 C   
   t |S N)r-   
zeros_liker:   r#   r#   r$   r;   	     
zSBNoiseScheduleVE.fc                 C   s   t | j| j| j  S r`   )r-   rE   rZ   rY   r/   r:   r#   r#   r$   r=     s   zSBNoiseScheduleVE.gc                 C   r_   r`   )r-   	ones_liker:   r#   r#   r$   r2     rb   zSBNoiseScheduleVE.alphac                 C   s8   | j | jd|  d  dt| j | j  }t|S NrD   r\   )rZ   rY   mathlogr   r-   rE   r!   r/   sigma_sqr#   r#   r$   r7     s   .
zSBNoiseScheduleVE.sigmac                 C   s    t | j| j| j| j| j| jdS )N)rY   rZ   r   r   r   r   )rX   rY   rZ   r   r   r   r   r&   r#   r#   r$   rJ     s   zSBNoiseScheduleVE.copyc                    s.   t   }|d| j 7 }|d| j 7 }|S )Nz
	k: z
	c: )r   rN   rY   rZ   rL   r"   r#   r$   rN      s   
zSBNoiseScheduleVE.__repr__rO   r    rP   rQ   rR   rS   rT   r   r-   rV   r;   r=   r2   r7   rJ   rN   rW   r#   r#   r"   r$   rX      s2    
rX   c                       s   e Zd ZdZ					ddededed	ed
ededef fddZdejdejfddZ	dejdejfddZ
dejdejfddZdejdejfddZdd Z fddZ  ZS )SBNoiseScheduleVPu  Variance preserving noise schedule for the Schrödinger Bridge.

    Args:
        beta_0: defines the lower bound for diffusion coefficient
        beta_1: defines upper bound for diffusion coefficient
        c: scaling for the diffusion coefficient
        time_min: minimum time for the process
        time_max: maximum time for the process
        num_steps: number of steps for the process
        eps: small regularization
    r   r   r   r   beta_0beta_1rZ   r   r   r   r   c                    s   t  j||||d |dk rtd| |dk r td| ||kr.td| d| |dkr9td| || _|| _|| _td| jj	 td	| j td
| j td| j td| j
 td| j td| j td| j d S )Nr[   r   zExpected beta_0 >= 0, got zExpected beta_1 >= 0, got z%Expected beta_0 < beta_1, got beta_0=z and beta_1=r]   r   z	beta_0:    %sz	beta_1:    %sr^   r   r   r   r   )r   r   r   rk   rl   rZ   r   r   r   r    r   r   r   r   )r!   rk   rl   rZ   r   r   r   r   r"   r#   r$   r   4  s(   
zSBNoiseScheduleVP.__init__r/   r%   c                 C   s   d| j || j| j     S )N      )rk   rl   r:   r#   r#   r$   r;   Z  s   zSBNoiseScheduleVP.fc                 C   s&   | j | j|| j| j    }t|S r`   )rZ   rk   rl   r-   rE   )r!   r/   g_sqr#   r#   r$   r=   ]  s   
zSBNoiseScheduleVP.gc                 C   s0   | j | | j| j  d |d   }td| S )NrD   rm   )rk   rl   r-   exp)r!   r/   tmpr#   r#   r$   r2   a  s   "zSBNoiseScheduleVP.alphac                 C   sD   | j | | j| j  d |d   }t|d }| j| }t|S rd   )rk   rl   r-   ro   rZ   rE   rg   r#   r#   r$   r7   e  s   "

zSBNoiseScheduleVP.sigmac              	   C   s$   t | j| j| j| j| j| j| jdS )N)rk   rl   rZ   r   r   r   r   )rj   rk   rl   rZ   r   r   r   r   r&   r#   r#   r$   rJ   k  s   zSBNoiseScheduleVP.copyc                    s>   t   }|d| j 7 }|d| j 7 }|d| j 7 }|S )Nz

	beta_0: z

	beta_1: z

	c:      )r   rN   rk   rl   rZ   rL   r"   r#   r$   rN   v  s
   
zSBNoiseScheduleVP.__repr__)r   r   r   r   r   ri   r#   r#   r"   r$   rj   '  s8    &rj   c                       s  e Zd ZdZ						d/deded	ed
ededee dee de	def fddZ
edd ZejdefddZedd ZejdefddZedd Zejde	fddZedd ZejdefddZedd  Zejdefd!d Zeed"e ed"e d#d$eed%e d#d$d&ed"e eed%e d#d$d'd(e 	d0d)ejd*ejd+eej d,ejfd-d.Z  ZS )1	SBSampleruM  Schrödinger Bridge sampler.

    Args:
        noise_schedule: noise schedule for the bridge
        estimator: neural estimator
        estimator_output: defines the output of the estimator, e.g., data_prediction
        estimator_time: time for conditioning the estimator, e.g., 'current'
                        or 'previous'. Default is 'previous'.
        process: defines the process, e.g., sde or ode
        time_max: maximum time for the process
        time_min: minimum time for the process
        num_steps: number of steps for the process
        eps: small regularization to prevent division by zero

    References:
        Schrödinger Bridge for Generative Speech Enhancement, https://arxiv.org/abs/2407.16074
        Schrodinger Bridges Beat Diffusion Models on Text-to-Speech Synthesis, https://arxiv.org/abs/2312.03491
    previoussdeN2   r   noise_schedule	estimatorestimator_outputestimator_timeprocessr   r   r   r   c
           
         s  t    | | _|d ur|| j_td| jj |d ur*|| j_td| jj || j_td| jj || _	|| _
|| _|| _|	dkrMtd|	 |	| _td| jj td| j
 td| j td	| j td
| j td| j td| j td| j d S )N"noise_schedule.time_max set to: %s"noise_schedule.time_min set to: %s#noise_schedule.num_steps set to: %sr   r   r   z	estimator_output: %sz	estimator_time:   %sz	process:          %sz	time_min:         %sz	time_max:         %sz	num_steps:        %sz	eps:              %s)r   r   rJ   ru   r   r   infor   r   rv   rw   rx   ry   r   r   r   r   r    )
r!   ru   rv   rw   rx   ry   r   r   r   r   r"   r#   r$   r     s2   

zSBSampler.__init__c                 C      | j jS r`   )ru   r   r&   r#   r#   r$   r        zSBSampler.time_maxvaluec                 C      || j _td| j j d S )Nrz   )ru   r   r   r   r!   r   r#   r#   r$   r        c                 C   r~   r`   )ru   r   r&   r#   r#   r$   r     r   zSBSampler.time_minc                 C   r   )Nr{   )ru   r   r   r   r   r#   r#   r$   r     r   c                 C   r~   r`   )ru   r   r&   r#   r#   r$   r     r   zSBSampler.num_stepsc                 C   r   )Nr|   )ru   r   r   r   r   r#   r#   r$   r     r   c                 C      | j S r`   )_processr&   r#   r#   r$   ry        zSBSampler.processc                 C   .   |dvrt d| || _td| j d S )N)rs   odeUnexpected process: zprocess set to: %s)r   r   r   r}   r   r#   r#   r$   ry        c                 C   r   r`   )_estimator_timer&   r#   r#   r$   rx     r   zSBSampler.estimator_timec                 C   r   )N)currentrr   zUnexpected estimator time: zestimator time set to: %s)r   r   r   r}   r   r#   r#   r$   rx     r   )BCDTT)optionalr   )
prior_meanestimator_conditionstate_length)sampler   )input_typesoutput_typesr   r   r   r%   c                 C   s  |}|durt ||}tj| j| j| jd |jd}|d tj|jd |jd }| j	
|\}}}	| j	|\}
}}|dd D ]%}|tj|jd |jd }|du rW|ntj||gdd}| jdkrg|n|}| jdkry| j|||d\}}ntd	| j | j	
|\}}}| j	|\}}}| jd
kr||d  ||
d  | j  }d|d |
d | j   }|| }|| t| }|dddd}|dddd}|dddd}t|}|| ||  ||  }nt| jdkrW|| | ||
 | | j  }||d | j  |d || | |
| j    }||	|d  | j  |d |
| | || j    }|dddd}|dddd}|dddd}|| ||  ||  }ntd| j |}|}|}
|}qA|durrt ||}||fS )z(Takes prior mean and generates a sample.Nr\   r,   r   )dimr   data_prediction)inputinput_length	conditionzUnexpected estimator output: rs   rD   r   r   )r   r-   linspacer   r   r   r+   onesshaperu   rB   rI   catrx   rw   rv   NotImplementedErrorry   r   rE   view
randn_likeRuntimeError)r!   r   r   r   state
time_steps	time_prev
alpha_prev_r5   
sigma_prevsigma_bar_prevr8   tr/   estimator_inputrx   current_estimatealpha_talpha_bar_tsigma_tsigma_bar_tweight_prevrp   weight_estimateweight_zz_normweight_prior_meanr#   r#   r$   forward  sl   





zSBSampler.forward)rr   rs   NNrt   r   r`   )r    rP   rQ   rR   r   r   strr   rS   rT   r   rU   r   setterr   r   ry   rx   r   r	   r
   tupler   r-   inference_moderV   r   rW   r#   r#   r"   r$   rq   ~  s    	
2






rq   )re   abcr   r   typingr   r-   #nemo.collections.common.parts.utilsr   nemo.core.classesr   r   nemo.core.neural_typesr   r	   r
   
nemo.utilsr   r   rX   rj   rq   r#   r#   r#   r$   <module>   s    DJW