o
    oix_                     @   st  d dl mZmZ d dlZd dlm  mZ d dlmZmZ d;dede	de	de	d	ef
d
dZ
G dd dejZdede	d	efddZdeded	efddZdeded	efddZG dd deZG dd deZG dd deZG dd deZG dd  d eZd<d#ejd$e	d%e	d	ejfd&d'Zd=d*ejd+ed,ed	ejfd-d.Zd/ed0e	fd1d2Z	4d>d5d6Zd?d7d8Zd@d9d:ZdS )A    )FinalListN)Tensornn   xwindow_lengthstepdimreturnc                 C   s   t | j}t |  }t|| | ||  ||< |dkr0||d | ||d ||  n|dkrA|| |||  n||| ||||  || | ||< | ||S )a  Returns a tensor with chunks of overlapping windows of the first dim of x.

    Args:
        x (Tensor): Input of shape [B, T, ...]
        window_length (int): Length of each window
        step (int): Step/hop of each window w.r.t. the original signal x
        dim (int): Dimension on to apply the windowing

    Returns:
        windowed tensor (Tensor): Output tensor with shape (if dim==1)
            [B, (N - window_length + step) // step, window_length, ...]
    r   r   )listshapestrideintinsertappend
as_strided)r   r   r	   r
   r   r    r   K/home/ubuntu/SoloSpeech/.venv/lib/python3.10/site-packages/df/multiframe.pyas_windowed   s   

r   c                	       s   e Zd ZU dZee ed< ee ed< ee ed< ee ed< ddedededef fd	d
Zde	fddZ
de	fddZeddedede	fddZede	de	de	fddZ  ZS )MultiFrameModulea  Multi-frame speech enhancement modules.

    Signal model and notation:
        Noisy: `x = s + n`
        Enhanced: `y = f(x)`
        Objective: `min ||s - y||`

        PSD: Power spectral density, notated eg. as `Rxx` for noisy PSD.
        IFC: Inter-frame correlation vector: PSD*u, u: selection vector. Notated as `rxx`
        RTF: Relative transfere function, also called steering vector.
    	num_freqs
frame_sizeneed_unfoldrealr   F	lookaheadc                    sv   t    || _|| _|| _|r"tdddd|d | |fd| _ntdd|d | |fd| _|dk| _	|| _
dS )ad  Multi-Frame filtering module.

        Args:
            num_freqs (int): Number of frequency bins used for filtering.
            frame_size (int): Frame size in FD domain.
            lookahead (int): Lookahead, may be used to select the output time step. Note: This
                module does not add additional padding according to lookahead!
        r   r           N)super__init__r   r   r   r   ConstantPad3dpadConstantPad2dr   r   )selfr   r   r   r   	__class__r   r   r   :   s   
	$

zMultiFrameModule.__init__specc                 C   s:   | j r| |d| jd}|ddddddS |dS )	Nr   r               r   )r   r!   unfoldr   permute	unsqueezer#   r&   r   r   r   spec_unfold_realN   s   
z!MultiFrameModule.spec_unfold_realc                 C   s&   | j r| |d| jdS |dS )a  Pads and unfolds the spectrogram according to frame_size.

        Args:
            spec (complex Tensor): Spectrogram of shape [B, C, T, F]
        Returns:
            spec (Tensor): Unfolded spectrogram of shape [B, C, T, F, N], where N: frame_size.
        r)   r   r   )r   r!   r,   r   r.   r/   r   r   r   spec_unfoldU   s   
zMultiFrameModule.spec_unfold:0yE>Hz>diag_epsepsr   c              	   C   s   t dt t| |||S )N...nm,...m->...n)torcheinsuminverse_tik_reg)Rxxrssr4   r5   r   r   r   solvea   s   zMultiFrameModule.solvecoefsc                 C      t d| |S )N...n,...n->...r7   r8   r&   r>   r   r   r   apply_coefsg   s   zMultiFrameModule.apply_coefsr   F)r2   r3   )__name__
__module____qualname____doc__r   r   __annotations__boolr   r   r0   r1   staticmethodfloatr=   rC   __classcell__r   r   r$   r   r   (   s   
   r   nc                 C   s4   t | dd|d dfd|d} td| |  S )a^  Compute the PSD correlation matrix Rxx for a spectrogram.

    That is, `X*conj(X)`, where `*` is the outer product.

    Args:
        x (complex Tensor): Spectrogram of shape [B, C, T, F]. Will be unfolded with `n` steps over
            the time axis.

    Returns:
        Rxx (complex Tensor): Correlation matrix of shape [B, C, T, F, N, N]
    r   r   ...n,...m->...mnFr!   r,   r7   r8   conj)r   rN   r   r   r   psdn   s   "rT   r&   r>   c                 C   r?   )a8  Deep filter implementation using `torch.einsum`. Requires unfolded spectrogram.

    Args:
        spec (complex Tensor): Spectrogram of shape [B, C, T, F, N]
        coefs (complex Tensor): Coefficients of shape [B, C, N, T, F]

    Returns:
        spec (complex Tensor): Spectrogram of shape [B, C, T, F]
    z...tfn,...ntf->...tfrA   rB   r   r   r   df~   s   
rU   c                 C   s   | j \}}}}}}tj||||df| j| jd}| d |d  jdd|d< |d  | d |d  jdd8  < | d |d  jdd|d< |d  | d |d  jdd7  < |S )ab  Deep filter implementation for real valued input Tensors. Requires unfolded spectrograms.

    Args:
        spec (real-valued Tensor): Spectrogram of shape [B, C, N, T, F, 2].
        coefs (real-valued Tensor): Coefficients of shape [B, C, N, T, F, 2].

    Returns:
        spec (real-valued Tensor): Filtered Spectrogram of shape [B, C, T, F, 2]
    r)   dtypedevice).r   r
   ).r   )r   r7   emptyrW   rX   sum)r&   r>   bc_tfoutr   r   r   df_real   s   
$$rb   c                	       T   e Zd ZU dZee ed< ddedededef fdd	Zd
e	de	fddZ
  ZS )DFDeep Filtering.rS   r   Fr   r   r   c                    s   t  ||| || _d S )Nr   r   rS   r#   r   r   r   rS   r$   r   r   r      s   
zDF.__init__r&   r>   c                 C   s   |  t|}t|}|dd| j}|j|jd d| jg|jdd  R  }| jr0| }t	||}| j
r<| }t||dd | jd d f< |S )NrO   r   r   r)   .)r1   r7   view_as_complexnarrowr   viewr   r   rS   rU   trainingcloneview_as_real)r#   r&   r>   spec_uspec_fr   r   r   forward   s   
(
z
DF.forwardrD   rE   rF   rG   rH   r   rJ   rI   r   r   r   rp   rM   r   r   r$   r   rd      
   
  rd   c                	       rc   )DFrealre   rS   r   Fr   r   r   c                    s   t  j|||dd || _d S )NT)r   rf   rg   r$   r   r   r      s   
zDFreal.__init__r&   r>   c                 C   s|   |  |}|dd| j}|jd d| jgt|jdd  }||}| jr+| }t||}||dd| jddf< |S )zPads and unfolds the spectrogram and applies deep filtering using only real valued types.

        Args:
            spec (Tensor): Spectrogram of shape [B, C, T, F, 2]
            coefs (Tensor): Spectrogram of shape [B, C, T, F, 2]
        rO   r   r   r)   N.)	r0   ri   r   r   r   r   rj   rS   rb   )r#   r&   r>   rn   ro   	new_shaper   r   r   rp      s   
$

zDFreal.forwardrD   rq   r   r   r$   r   rs      rr   rs   c                       sB   e Zd ZdZddededef fddZd	ed
efddZ  ZS )CRMzComplex ratio mask.r   r   r   r   r   c                    s.   |dkr|dksJ ||ft  |d d S )Nr   r   )r   r   )r#   r   r   r   r$   r   r   r      s   zCRM.__init__r&   r>   c                 C   s   | d|S )Nr   )squeezemul)r#   r&   r>   r   r   r   forward_impl   s   zCRM.forward_impl)r   r   )	rE   rF   rG   rH   r   r   r   rx   rM   r   r   r$   r   ru      s    ru   c                          e Zd ZU dZee ed< ee ed< ee ed< ee ed< ee ed< 							
	ddedededededef fddZ	dd Z
dedededefddZ  ZS )MfWfz&Multi-frame Wiener filter base module.cholesky_decompr9   enforce_constraintsr5   dloadr   FTr2   r3   r   r   r   c	           	         v   t  j|||d || _|| _|| _t| j| jd| _t	| j| _
| jd | j
d< | jd | j
d< || _|| _dS )a  Multi-frame Wiener Filter via an estimate of the inverse

        Args:
            num_freqs (int): Number of frequency bins to apply MVDR filtering to.
            frame_size (int): Frame size of the MF MVDR filter.
            lookahead (int): Lookahead of the frame.
            cholesky_decomp (bool): Whether the input is a cholesky decomposition of the correlation matrix. Defauls to `False`.
            inverse (bool): Whether the input is a normal or inverse correlation matrix. Defaults to `True`.
            enforce_constraints (bool): Enforce hermetian matrix for non-inverse input and a triangular matrix for cholesky decomposition inpiut.
        r   r   r   Nr   r   r{   r9   r|   r7   triu_indicesr   	triu_idcs
empty_like	tril_idcsr5   r}   	r#   r   r   r   r{   r9   r|   r5   r}   r$   r   r   r         
zMfWf.__init__c                 C   H   | j r| jrdS | j r| jsdS | j s| jrdS | j s | js"dS dS dS )z3Return an factor f such that Rxx/f in range [-1, 1]g     @@g    8|Ag-C6*?gh㈵>Nr9   r{   r#   r   r   r   get_r_factor     zMfWf.get_r_factorr&   ifciRxxr   c                 C   s  |  t|}t|d| j| jdf}| jr<| jr1d|dddddd| jd | jd f< ||	dd
 }| jrz| jsz| jszdtj|dd	d
_|dddddd| jd | jd f 
 }||dddddd| jd | jd f< t|d| jdf}|d	d| j}| jst|| j| j}tj||d}n
td||d}| ||}| jr| }t||dd| jddf< |S )a  Multi-frame Wiener filter based on Rxx**-1 and speech IFC vector.

        Args:
            spec (complex Tensor): Spectrogram of shape [B, 1, T, F]
            ifc (complex Tensor): Inter-frame speech correlation vector [B, T, F, N*2]
            iRxx (complex Tensor): (Inverse) noisy covariance matrix Rxx**-1 [B, T, F, (N**2)*2] OR
                cholesky_decomp Rxx=L*L^H of the same shape.

        Returns:
            spec (complex Tensor): Filtered spectrogram of shape [B, C, T, F]
        r*   r)   r   Nr   r   r+   r   rO   dim1dim2r6   .)r1   r7   rh   	unflattenr   r{   r|   r   matmul	transposerS   r9   diagonalimagr   ri   r   r:   r}   r5   linalgr=   r.   r8   rC   rk   rl   rm   )r#   r&   r   r   rn   	tril_conjro   wr   r   r   rp     s*   *.*zMfWf.forwardr   FTTr2   r3   rE   rF   rG   rH   r   rJ   rI   rL   r   r   r   r   rp   rM   r   r   r$   r   rz      8   
  "rz   c                       ry   )MfMvdrz^Multi-frame minimum variance distortionless beamformer based on Rnn**-1 and speech IFC vector.r{   r9   r|   r5   r}   r   FTr2   r3   r   r   r   c	           	         r~   )a  Multi-frame minimum variance distortionless beamformer.

        Args:
            num_freqs (int): Number of frequency bins to apply MVDR filtering to.
            frame_size (int): Frame size of the MF MVDR filter.
            lookahead (int): Lookahead of the frame.
            cholesky_decomp (bool): Whether the input is a cholesky decomposition of the correlation matrix. Defauls to `False`.
            inverse (bool): Whether the input is a normal or inverse correlation matrix. Defaults to `True`.
            enforce_constraints (bool): Enforce hermetian matrix for non-inverse input and a triangular matrix for cholesky decomposition inpiut.
        r   r   r   Nr   r   r$   r   r   r   F  r   zMfMvdr.__init__c                 C   r   )z3Return an factor f such that Rnn/f in range [-1, 1]g     @g    Ag-C6
?gư>Nr   r   r   r   r   r   f  r   zMfMvdr.get_r_factorr&   r   iRnnr   c                 C   s  |  t|}t|d| j| jdf}| jr<| jr1d|dddddd| jd | jd f< ||	dd
 }| jrz| jsz| jszdtj|dd	d
_|dddddd| jd | jd f 
 }||dddddd| jd | jd f< t|d| jdf}|d	d| j}| jst|| j| j}tj||}ntd||}td|
 |}|d 
 }	||	 |jd| j  d}
| ||
}| jr| }t||dd| jddf< |S )a  Multi-frame MVDR filter based on Rnn**-1 and speech IFC vector.

        Args:
            spec (complex Tensor): Spectrogram of shape [B, C, T, F]
            ifc (complex Tensor): Inter-frame speech correlation vector [B, C*N*2, T, F]
            iRnn (complex Tensor): (Inverse) noise covariance matrix Rnn**-1 [B, T, F (N**2)*2] OR
                cholesky_decomp Rnn=L*L^H of the same shape.

        Returns:
            spec (complex Tensor): Filtered spectrogram of shape [B, C, T, F]
        r*   r)   r   Nr   r   r+   r   rO   r   r6   r@   ).r   N.)r1   r7   rh   r   r   r{   r|   r   r   r   rS   r9   r   r   r   ri   r   r:   r}   r5   r   r=   r8   r   r.   rC   rk   rl   rm   )r#   r&   r   r   rn   r   ro   	numeratordenumeratorscaler   r   r   r   rp   q  s0   *.* zMfMvdr.forwardr   r   r   r   r$   r   r   =  r   r   rO   r   inputr   r   c                 C   sL   | j dks	J d| j| | j| ksJ dtj| d||d} | jddS )a  Compute the trace of a Tensor along ``dim1`` and ``dim2`` dimensions.
    Args:
        input (torch.Tensor): Tensor of dimension `(..., channel, channel)`
        dim1 (int, optional): the first dimension of the diagonal matrix
            (Default: -1)
        dim2 (int, optional): the second dimension of the diagonal matrix
            (Default: -2)
    Returns:
        Tensor: trace of the input Tensor
    r)   z/The dimension of the tensor must be at least 2.z3The size of ``dim1`` and ``dim2`` must be the same.r   r   r   rY   )ndimr   r7   r   r[   )r   r   r   r   r   r   _compute_mat_trace  s   r   r3   r2   matregr5   c                 C   sZ   |  d}tj|| j| jd}t| jd | }|| }| ||dddddf   } | S )a  Perform Tikhonov regularization (only modifying real part).
    Args:
        mat (torch.Tensor): input matrix (..., channel, channel)
        reg (float, optional): regularization factor (Default: 1e-8)
        eps (float, optional): a value to avoid the correlation matrix is all-zero (Default: ``1e-8``)
    Returns:
        Tensor: regularized matrix (..., channel, channel)
    r   rV   ).NN.N)sizer7   eyerW   rX   r   r   )r   r   r5   Cr   epsilonr   r   r   r:     s   

r:   XNc                 C   s8   t | dd|d dfd|d}td|| }|S )Nr   r   rP   rQ   )r   r   Xwr;   r   r   r   compute_corr  s   "r   TFc           !         s  ddl m}m} dd l}ddlm} ddlm}	m}
 ddl	m
} tjdd d d	}d
}|r4|r4d}d}|  d|_|jdd | }d|_d|_d|_|jd d }|j|j|j|j|jd|	d|jd|j dd jddd}|	d|jd|j dd jddd}|| }|
d||j t| |||d}fdd|||fD \}}}t|dd d dfd d} fdd|||fD \}}}|d }t|||}| r|| }nt|||}|}|rtj|}|rtj|\}}tdt|dkdd  ||    |rt!d ||}t!d!||}nt"|t#|$dt#|%d"t#|%d"&d}'|( } |
d#)t*|t*|| |j d S )$Nr   icinstallconfig
load_audio
save_audioModelParams   	linewidthr(   r3   r2   -C6?h㈵>Tallow_reload`      ]  r)   r   srfft_sizehop_sizenb_bands assets/clean_freesound_33711.wav
num_frameskeepdimassets/noise_freesound_2530.wavout/noisy.wavr{   r9   r|   c                        g | ]}t  | qS r   r7   
from_numpyanalysisnumpy.0r   rU   r   r   
<listcomp>       z$compute_ideal_wf.<locals>.<listcomp>c                       g | ]}t | qS r   r   )r   AORDERr   r   r         .r   (Number of errors during cholesky_decomp:r6   ...fn,...fn->...fr*   zout/ideal_mfwf_c{}_i{}.wav)+icecreamr   r   libdf	df.configr   df.ior   r   df.modelr   r7   set_printoptionsincludeContextuse_defaultsr   r   r   rd   nb_erbmeanrz   rR   r!   r,   r:   r   invcholesky_exprintwherer[   absr8   rh   rm   r.   flattenrv   	synthesisr   formatr   )!rxx_via_rssrnnr{   r9   r|   manualr   r   r   r   r   r   r   DLOADEPSpn_freqssrN   r   wfr   Sr   r   RssRnnr;   r   r   infor   Yyr   r   rU   r   compute_ideal_wf  s   "
"r  c           $         s  ddl m}m} dd l}ddlm} ddlm}m}	 ddl	m
}
 d|_tjdd d	 d
}d}| r7|r7d}d}|  |jdd |
 }d|_d|_d|_|jd d }|j|j|j|j|jd|d|jd	|j dd jddd}|d|jd	|j dd jddd}|| }|	d||j t| | ||d}fdd|||fD \}}}t|dd d dfd d} fdd||fD \}}|d }tj|\}}|d }t|||}|rtj|}| rtj|\}}tdt|dkdd   ||!   |rM|d }||"d |  }| r!|#|$ %d d!}|r,t&d"||}ntj'||}t&d#|$ |} || "d |  }!t&d$||!}"nt(|t)|"dt)|*d%t)|*d%+d}",|"- }#|	d&.t/| t/||#|j d S )'Nr   r   r   r   r   Tx   r   r(   r3   r2   r   r   r   r   r   r   r)   r   r   r   r   r   r   r   r   c                    r   r   r   r   r   r   r   r   K  r   z&compute_ideal_mvdr.<locals>.<listcomp>c                    r   r   r   r   r   r   r   r   M  r   r   r   r   rO   r6   r@   r   r*   zout/ideal_mfmvdr_c{}_i{}.wav)0r   r   r   r   r   r   r   r   r   r   r   r   r7   r   r   r   r   r   rd   r   r   r   rR   r!   r,   r   eighr:   r   r   r   r   r[   r   r.   r   rS   r   r8   r=   rh   rm   r   rv   r   r   r   r   )$r{   r9   r|   r   r   r   r   r   r   r   r   r   r   r   r   r   rN   r   mvdrr   r   r   r   r   r   r   r^   vr   r   ifc0numdenumr   r   r  r   r  r   compute_ideal_mvdr  s   ""r  c                 C   s4   t tfD ]}dD ]}dD ]	}|||| d qqqd S )N)TFr   )r  r  )r|   mr]   ir   r   r   compute_all_mfw  s   r  )r   r   )rO   r   )r3   r2   )TFTTF)FTTF)T)typingr   r   r7   torch.nn.functionalr   
functionalrR   r   r   r   Moduler   rT   rU   rb   rd   rs   ru   rz   r   r   rL   r:   r   r  r  r  r   r   r   r   <module>   s*      F` d 

QY