o
    siM                  	   @   s^  d dl mZ d dlZd dlmZ d dlmZ G dd dejZG dd dejZ	G d	d
 d
e	Z
G dd de	ZG dd de	ZG dd de	ZG dd de	Zd;dejdejdefddZ	d<dejdejdejdefddZd=d"d#Zd>d$d%Zd&d' Zd<d(d)Zd?d+d,Zd?d-d.Zd/d0 Zd1d2 Zd3d4 Zdad5d6 Zd7d8 Zd9d: Z e	Z!eZ"e
Z#dS )@    )UnionN)nn)
functionalc                   @   s(   e Zd ZddejdejdefddZdS )	SCMNTxmask	normalizec                 C   s   t |||dS )zSee :func:`compute_scm`.)r   r   )compute_scm)selfr   r   r    r   L/home/ubuntu/.local/lib/python3.10/site-packages/asteroid/dsp/beamforming.pyforward   s   zSCM.forwardNT)__name__
__module____qualname__torchTensorboolr   r   r   r   r   r      s     r   c                   @   sP   e Zd ZdZedejdejfddZe		ddejdejd	ejfd
dZdS )
Beamformerz#Base class for beamforming modules.	bf_vectormixc                 C   s   t d|  |S )zApply the beamforming vector to the mixture. Output (batch, freqs, frames).

        Args:
            bf_vector: shape (batch, mics, freqs)
            mix: shape (batch, mics, freqs, frames).
        z...mf,...mft->...ft)r   einsumconj)r   r   r   r   r   apply_beamforming_vector   s   z#Beamformer.apply_beamforming_vectorNbf_mat
target_scm	noise_scmc                 C   s   t | tjr| jdkr| S |du s|du r| du rd} | du r't|||d}n(t | tr<t| g|jd  |j	}nt | tjrE| }n
t
dt|  dtj||jd dddddddf }||j|j	S )	a  Return the reference channel indices over the batch.

        Args:
            ref_mic (Optional[Union[int, torch.Tensor]]): The reference channel.
                If torch.Tensor (ndim>1), return it, it is the reference mic vector,
                If torch.LongTensor of size `batch`, select independent reference mic of the batch.
                If int, select the corresponding reference mic,
                If None, the optimal reference mics are computed with :func:`get_optimal_reference_mic`,
                If None, and either SCM is None, `ref_mic` is set to `0`,
            bf_mat: beamforming matrix of shape (batch, freq, mics, mics).
            target_scm (torch.ComplexTensor): (batch, freqs, mics, mics).
            noise_scm (torch.ComplexTensor): (batch, freqs, mics, mics).

        Returns:
            torch.LongTensor of size ``batch`` to select with the reference channel indices.
           Nr   )r   r   r   znUnsupported reference microphone format. Support None, int and 1D torch.LongTensor and torch.Tensor, received .)num_classes)
isinstancer   r   ndimget_optimal_reference_micint
LongTensorshapetodevice
ValueErrortypeFone_hotdtype)ref_micr   r   r   batch_mic_idxref_mic_vectsr   r   r   get_reference_mic_vects   s&   
 (z"Beamformer.get_reference_mic_vectsNN)	r   r   r   __doc__staticmethodr   r   r   r2   r   r   r   r   r      s    	r   c                   @   sD   e Zd ZdejdejdejfddZdejdejdejfddZd	S )
RTFMVDRBeamformerr   r   r   c                 C   s<   t j|dddd\}}|d }| j||dd|dS )	ae  Compute and apply MVDR beamformer from the speech and noise SCM matrices.

        :math:`\mathbf{w} =  \displaystyle \frac{\Sigma_{nn}^{-1} \mathbf{a}}{
        \mathbf{a}^H \Sigma_{nn}^{-1} \mathbf{a}}` where :math:`\mathbf{a}` is the
        ATF estimated from the target SCM.

        Args:
            mix (torch.ComplexTensor): shape (batch, mics, freqs, frames)
            target_scm (torch.ComplexTensor): (batch, mics, mics, freqs)
            noise_scm (torch.ComplexTensor): (batch, mics, mics, freqs)

        Returns:
            Filtered mixture. torch.ComplexTensor (batch, freqs, frames)
        r      r      .r    r    )r   rtf_vecr   )r   linalgeighpermutefrom_rtf_vect	transpose)r
   r   r   r   e_vale_vecrtf_vectr   r   r   r   K   s   zRTFMVDRBeamformer.forwardr;   c           
      C   sl   | dddd}|ddd}t||}t| dd|}|| ddd}| j||d}	|	S )a  Compute and apply MVDR beamformer from the ATF vector and noise SCM matrix.

        Args:
            mix (torch.ComplexTensor): shape (batch, mics, freqs, frames)
            rtf_vec (torch.ComplexTensor): (batch, mics, freqs)
            noise_scm (torch.ComplexTensor): (batch, mics, mics, freqs)

        Returns:
            Filtered mixture. torch.ComplexTensor (batch, freqs, frames)
        r   r7   r   r8   r    r:   r   )	r>   r@   	unsqueezestable_solver   matmulr   squeezer   )
r
   r   r;   r   noise_scm_t	rtf_vec_t	numeratordenominatorbf_vectoutputr   r   r   r?   e   s   
zRTFMVDRBeamformer.from_rtf_vectN)r   r   r   r   r   r   r?   r   r   r   r   r6   J   s    
r6   c                   @   s@   e Zd Z		d
dejdejdejdeejejef fddZd	S )SoudenMVDRBeamformerr   :0yE>r   r   r   r/   c                 C   s   | dddd}| dddd}t||}|t|d |  }| j||||d}t||}	|	ddd}	| j|	|d	}
|
S )
a  Compute and apply MVDR beamformer from the speech and noise SCM matrices.
        This class uses Souden's formulation [1].

        :math:`\mathbf{w} =  \displaystyle \frac{\Sigma_{nn}^{-1} \Sigma_{ss}}{
        Tr\left( \Sigma_{nn}^{-1} \Sigma_{ss} \right) }\mathbf{u}` where :math:`\mathbf{a}`
        is the steering vector.


        Args:
            mix (torch.ComplexTensor): shape (batch, mics, freqs, frames)
            target_scm (torch.ComplexTensor): (batch, mics, mics, freqs)
            noise_scm (torch.ComplexTensor): (batch, mics, mics, freqs)
            ref_mic (int): reference microphone.
            eps: numerical stabilizer.

        Returns:
            Filtered mixture. torch.ComplexTensor (batch, freqs, frames)

        References
            [1] Souden, M., Benesty, J., & Affes, S. (2009). On optimal frequency-domain multichannel
            linear filtering for noise reduction. IEEE Transactions on audio, speech, and language processing, 18(2), 260-276.
        r   r7   r   r8   .NNr   r   r    r:   rD   )	r>   rF   batch_tracer2   r   rG   rH   r@   r   )r
   r   r   r   r/   epsrK   r   batch_mic_vectsrM   rN   r   r   r   r      s   
zSoudenMVDRBeamformer.forwardN)r   rP   )	r   r   r   r   r   r   r&   r%   r   r   r   r   r   rO      s    rO   c                       sP   e Zd Zd fdd	Z	ddejdejdejdeejejef fd	d
Z	  Z
S )SDWMWFBeamformer      ?c                    s   t    || _d S N)super__init__mu)r
   r[   	__class__r   r   rZ      s   

zSDWMWFBeamformer.__init__Nr   r   r   r/   c                 C   sz   | dddd}| dddd}|| j|  }t||}| j||||d}	t||	}
|
ddd}
| j|
|d}|S )	a  Compute and apply SDW-MWF beamformer.

        :math:`\mathbf{w} =  \displaystyle (\Sigma_{ss} + \mu \Sigma_{nn})^{-1} \Sigma_{ss}`.

        Args:
            mix (torch.ComplexTensor): shape (batch, mics, freqs, frames)
            target_scm (torch.ComplexTensor): (batch, mics, mics, freqs)
            noise_scm (torch.ComplexTensor): (batch, mics, mics, freqs)
            ref_mic (int): reference microphone.

        Returns:
            Filtered mixture. torch.ComplexTensor (batch, freqs, frames)
        r   r7   r   r8   rR   r    r:   rD   )	r>   r[   rF   r2   r   rG   rH   r@   r   )r
   r   r   r   r/   rI   target_scm_trL   r   rU   rM   rN   r   r   r   r      s   
zSDWMWFBeamformer.forward)rW   rX   )r   r   r   rZ   r   r   r   r&   r%   r   __classcell__r   r   r\   r   rV      s    	rV   c                   @   sB   e Zd ZdejdejdejfddZedejdejfddZdS )	GEVBeamformerr   r   r   c                 C   s   |  ||}| j||d}|S )a  Compute and apply the GEV beamformer.

        :math:`\mathbf{w} =  \displaystyle MaxEig\{ \Sigma_{nn}^{-1}\Sigma_{ss} \}`, where
        MaxEig extracts the eigenvector corresponding to the maximum eigenvalue
        (using the GEV decomposition).

        Args:
            mix: shape (batch, mics, freqs, frames)
            target_scm: (batch, mics, mics, freqs)
            noise_scm: (batch, mics, mics, freqs)

        Returns:
            Filtered mixture. torch.ComplexTensor (batch, freqs, frames)
        rD   compute_beamforming_vectorr   )r
   r   r   r   rM   rN   r   r   r   r      s   zGEVBeamformer.forwardc                 C   sf   | dddd}t|d}t|  dddd|\}}|d }|tj|ddd	 }|ddd
}|S )Nr   r7   r   r8   ư>r9   r    T)dimkeepdimr:   )r>   condition_scm$generalized_eigenvalue_decompositionr   normrH   r@   )r   r   rI   rA   rB   rM   r   r   r   rb      s   
z(GEVBeamformer.compute_beamforming_vectorN)r   r   r   r   r   r   r5   rb   r   r   r   r   r`      s    r`   c                   @   sV   e Zd ZdZddedefddZdejd	ejfd
dZ	dejdejd	ejfddZ
dS )GEVDBeamformera?  Generalized eigenvalue decomposition speech distortion weighted multichannel Wiener filter.

        Compare to SDW-MWF, spatial covariance matrix are computed from low rank approximation
        based on eigen values decomposition,
        see equation 62 in `[1] <https://hal.inria.fr/hal-01390918/file/14-1.pdf>`_.

    Attributes:
        mu (float): Speech distortion constant.
        rank (int): Rank for the approximation of target covariance matrix,
            no approximation is made if `rank` is None.

    References:
        [1] R. Serizel, M. Moonen, B. Van Dijk and J. Wouters,
        "Low-rank Approximation Based Multichannel Wiener Filter Algorithms for
        Noise Reduction with Application in Cochlear Implants,"
        in IEEE/ACM Transactions on Audio, Speech, and Language Processing, April 2014.
    rW   r   r[   rankc                 C   s   || _ || _d S rX   )r[   rj   )r
   r[   rj   r   r   r   rZ     s   
zGEVDBeamformer.__init__r   r   c           	      C   s   t |dddd|dddd\}}t|jj}tj||dd}tt|dg}t|dg}| j	rBd|d	| j	d
d
d
f< |j}|| j
t|jd |  }||| tj|||  }|d dddS )a4  Compute beamforming vectors for GEVD beamFormer.

        Args:
            target_scm (torch.ComplexTensor): shape (batch, mics, mics, freqs)
            noise_scm (torch.ComplexTensor): shape (batch, mics, mics, freqs)

        Returns:
            torch.ComplexTensor: shape (batch, mics, freqs)

        r   r7   r   r8   g    .A)minmaxr    g        .N).r   )%_generalized_eigenvalue_decompositionr>   r   finfor.   rT   clamp
diag_embedfliprj   r[   eyer'   	expand_asr(   r<   inv)	r
   r   r   e_values	e_vectorsrT   complex_type
ev_plus_murM   r   r   r   rb     s&    z)GEVDBeamformer.compute_beamforming_vectorr   c                 C   s   |  ||}| j||dS )at  Compute and apply the GEVD beamformer.

        Args:
            mix (torch.ComplexTensor): shape (batch, mics, freqs, frames)
            target_scm (torch.ComplexTensor): (batch, mics, mics, freqs)
            noise_scm (torch.ComplexTensor): (batch, mics, mics, freqs)

        Returns:
            Filtered mixture. torch.ComplexTensor (batch, freqs, frames)
        rD   ra   )r
   r   r   r   rM   r   r   r   r   :  s   zGEVDBeamformer.forwardN)rW   r   )r   r   r   r4   floatr%   rZ   r   r   rb   r   r   r   r   r   ri      s    (ri   Tr   r   r   c                 C   sx   | j \}}}}|du rt|d||}|jdkr |dddf }td||  |  }|r:||jddddd }|S )	a  Compute the spatial covariance matrix from a STFT signal x.

    Args:
        x (torch.ComplexTensor): shape  [batch, mics, freqs, frames]
        mask (torch.Tensor): [batch, 1, freqs, frames] or [batch, 1, freqs, frames]. Optional
        normalize (bool): Whether to normalize with the mask mean per bin.

    Returns:
        torch.ComplexTensor, the SCM with shape (batch, mics, mics, freqs)
    Nr   r7   zbmft,bnft->bmnfr    T)re   r:   )r'   r   onesr#   r   r   sumr@   )r   r   r   batchmicsfreqsframesscmr   r   r   r	   N  s   
r	   rc   r   r   r   rT   c                 C   s`   t jt d|  || j|d}t d|  || j| }t t |s)J |t j|ddS )a
  Compute the optimal reference mic given the a posteriori SNR, see [1].

    Args:
        bf_mat: (batch, freq, mics, mics)
        target_scm (torch.ComplexTensor): (batch, freqs, mics, mics)
        noise_scm (torch.ComplexTensor): (batch, freqs, mics, mics)
        eps: value to clip the denominator.

    Returns:
        torch.

    References
        Erdogan et al. 2016: "Improved MVDR beamforming using single-channel maskprediction networks"
            https://www.merl.com/publications/docs/TR2016-072.pdf
    z...flm,...fln,...fnm->...m)rk   r    )rd   )r   ro   r   r   realallisfiniteargmax)r   r   r   rT   densnr_postr   r   r   r$   f  s   r$   r:   r    c                 C   sb   |dks|dkr
t |t| ||dd  | j|  }tj| j| | jdd | }| | d|  S )zCondition input SCM with (x + eps tr(x) I) / (1 + eps) along `dim1` and `dim2`.

    See https://stt.msu.edu/users/mauryaas/Ashwini_JPEN.pdf (2.3).
    r:   r    dim1dim2rQ   )r)   r3   r   )NotImplementedErrorrS   r'   r   rr   r)   )r   rT   r   r   scale
scaled_eyer   r   r   rf     s
    rf   c                 C   s   t j| ||ddS )zECompute the trace along `dim1` and `dim2` for a any matrix `ndim>=2`.r   r    )r   diagonalr{   )r   r   r   r   r   r   rS     s   rS   c                 C   sD   t | |}|}|tjtjfvrt | }t| ||||S )zVReturn torch.solve if `a` is non-singular, else regularize `a` and return torch.solve.)_common_dtyper   float64
complex128_precision_mapping_stable_solver(   )bainput_dtypesolve_dtyper   r   r   rF     s
   

rF   c                 C   s<   zt j|| W S  ty   t||}t j||  Y S w rX   )r   r<   solveRuntimeErrorrf   )r   r   rT   r   r   r   r     s   
r   Fc                 C   s@   | j }|}|tjtjfvrt | }t| ||||d|S )as  Compute the Cholesky decomposition of ``input``.
    If ``input`` is only p.s.d, add a small jitter to the diagonal.

    Args:
        input (Tensor): The tensor to compute the Cholesky decomposition of
        upper (bool, optional): See torch.cholesky
        out (Tensor, optional): See torch.cholesky
        eps (int): small jitter added to the diagonal if PD.
    )upperoutrT   )r.   r   r   r   r   _stable_choleskyr(   )inputr   r   rT   r   r   r   r   r   stable_cholesky  s
   
r   c                 C   sr   z|rt jj| |djW S t jj| |dW S  ty8   t| |} |r.t jj| |dj Y S t jj| |d Y S w )N)r   )r   r<   choleskymHr   rf   )r   r   r   rT   r   r   r   r     s   
r   c                 C   sX   t | |}|}|tjtjfvrt | }t| |||\}}||j||fS )zSolves the generalized eigenvalue decomposition through Cholesky decomposition.
    Returns eigen values and eigen vectors (ascending order).
    )r   r   r   r   r   rm   r(   r   )r   r   r   r   rA   rB   r   r   r   rg     s   

rg   c                 C   sZ   t |}t|}||  | dd }tj|\}}t| dd|}||fS )Nr    r:   )r   r   inverser   r@   r<   r=   rG   )r   r   r   inv_choleskycmatrA   rB   r   r   r   rm     s   
rm   c                  G   s6   dd | D }t t|dkrtd| d|d S )Nc                 S   s   g | ]}|j qS r   )r.   ).0r   r   r   r   
<listcomp>  s    z!_common_dtype.<locals>.<listcomp>r   z.Expected inputs from the same dtype. Received r   r   )lensetr   )args
all_dtypesr   r   r   r     s   r   c                   C      da d S )NF
USE_DOUBLEr   r   r   r   force_float_linalg     r   c                   C   r   r   r   r   r   r   r   force_double_linalg  r   r   c                  C   sn   t td} trtjtjtjtjtjtji}| rtj|tj< |S tjtjtjtjtjtji}| r5tj|tj< |S )N	complex32)	hasattrr   r   float16r   float32	complex64r   r   )has_complex32precision_mapr   r   r   r     s    
	r   r   )rc   )rc   r:   r    )r:   r    )FNrc   )$typingr   r   r   torch.nnr   r,   Moduler   r   r6   rO   rV   r`   ri   r   r   r	   ry   r$   rf   rS   rF   r   r   r   rg   rm   r   r   r   r   r   
BeamFormerSdwMwfBeamformerMvdrBeamformerr   r   r   r   <module>   sL    =6/*"S






