o
    s·¯i–M  ã                	   @   s^  d dl mZ d dlZd dlmZ d dlmZ G dd„ dejƒZG dd„ dejƒZ	G d	d
„ d
e	ƒZ
G dd„ de	ƒZG dd„ de	ƒZG dd„ de	ƒZG dd„ de	ƒZd;dejdejdefdd„Z	d<dejdejdejdefdd„Zd=d"d#„Zd>d$d%„Zd&d'„ Zd<d(d)„Zd?d+d,„Zd?d-d.„Zd/d0„ Zd1d2„ Zd3d4„ Zdad5d6„ Zd7d8„ Zd9d:„ Z e	Z!eZ"e
Z#dS )@é    )ÚUnionN)Únn)Ú
functionalc                   @   s(   e Zd Zddejdejdefdd„ZdS )	ÚSCMNTÚxÚmaskÚ	normalizec                 C   s   t |||dS )zSee :func:`compute_scm`.)r   r   )Úcompute_scm)Úselfr   r   r   © r   úL/home/ubuntu/.local/lib/python3.10/site-packages/asteroid/dsp/beamforming.pyÚforward   s   zSCM.forward©NT)Ú__name__Ú
__module__Ú__qualname__ÚtorchÚTensorÚboolr   r   r   r   r   r      s     r   c                   @   sP   e Zd ZdZedejdejfdd„ƒZe		ddejdejd	ejfd
d„ƒZdS )Ú
Beamformerz#Base class for beamforming modules.Ú	bf_vectorÚmixc                 C   s   t  d|  ¡ |¡S )zÊApply the beamforming vector to the mixture. Output (batch, freqs, frames).

        Args:
            bf_vector: shape (batch, mics, freqs)
            mix: shape (batch, mics, freqs, frames).
        z...mf,...mft->...ft)r   ÚeinsumÚconj)r   r   r   r   r   Úapply_beamforming_vector   s   z#Beamformer.apply_beamforming_vectorNÚbf_matÚ
target_scmÚ	noise_scmc                 C   sÚ   t | tjƒr| jdkr| S |du s|du r| du rd} | du r't|||d}n(t | tƒr<t | g|jd  ¡ |j	¡}nt | tjƒrE| }n
t
dt| ƒ› dƒ‚tj||jd ddd…ddd…df }| |j¡ |j	¡S )	a–  Return the reference channel indices over the batch.

        Args:
            ref_mic (Optional[Union[int, torch.Tensor]]): The reference channel.
                If torch.Tensor (ndim>1), return it, it is the reference mic vector,
                If torch.LongTensor of size `batch`, select independent reference mic of the batch.
                If int, select the corresponding reference mic,
                If None, the optimal reference mics are computed with :func:`get_optimal_reference_mic`,
                If None, and either SCM is None, `ref_mic` is set to `0`,
            bf_mat: beamforming matrix of shape (batch, freq, mics, mics).
            target_scm (torch.ComplexTensor): (batch, freqs, mics, mics).
            noise_scm (torch.ComplexTensor): (batch, freqs, mics, mics).

        Returns:
            torch.LongTensor of size ``batch`` to select with the reference channel indices.
        é   Nr   )r   r   r   znUnsupported reference microphone format. Support None, int and 1D torch.LongTensor and torch.Tensor, received Ú.éÿÿÿÿ)Únum_classes)Ú
isinstancer   r   ÚndimÚget_optimal_reference_micÚintÚ
LongTensorÚshapeÚtoÚdeviceÚ
ValueErrorÚtypeÚFÚone_hotÚdtype)Úref_micr   r   r   Úbatch_mic_idxÚref_mic_vectsr   r   r   Úget_reference_mic_vects   s&   ÿ
 ÿÿ(z"Beamformer.get_reference_mic_vects©NN)	r   r   r   Ú__doc__Ústaticmethodr   r   r   r2   r   r   r   r   r      s    	üþýür   c                   @   sD   e Zd Zdejdejdejfdd„Zdejdejdejfdd„Zd	S )
ÚRTFMVDRBeamformerr   r   r   c                 C   s<   t j | dddd¡¡\}}|d }| j|| dd¡|dS )	ae  Compute and apply MVDR beamformer from the speech and noise SCM matrices.

        :math:`\mathbf{w} =  \displaystyle \frac{\Sigma_{nn}^{-1} \mathbf{a}}{
        \mathbf{a}^H \Sigma_{nn}^{-1} \mathbf{a}}` where :math:`\mathbf{a}` is the
        ATF estimated from the target SCM.

        Args:
            mix (torch.ComplexTensor): shape (batch, mics, freqs, frames)
            target_scm (torch.ComplexTensor): (batch, mics, mics, freqs)
            noise_scm (torch.ComplexTensor): (batch, mics, mics, freqs)

        Returns:
            Filtered mixture. torch.ComplexTensor (batch, freqs, frames)
        r   é   r   é   ©.r    r    éþÿÿÿ)r   Úrtf_vecr   )r   ÚlinalgÚeighÚpermuteÚfrom_rtf_vectÚ	transpose)r
   r   r   r   Úe_valÚe_vecÚrtf_vectr   r   r   r   K   s   zRTFMVDRBeamformer.forwardr;   c           
      C   sl   |  dddd¡}| dd¡ d¡}t||ƒ}t | ¡  dd¡|¡}||  d¡ dd¡}| j||d}	|	S )a  Compute and apply MVDR beamformer from the ATF vector and noise SCM matrix.

        Args:
            mix (torch.ComplexTensor): shape (batch, mics, freqs, frames)
            rtf_vec (torch.ComplexTensor): (batch, mics, freqs)
            noise_scm (torch.ComplexTensor): (batch, mics, mics, freqs)

        Returns:
            Filtered mixture. torch.ComplexTensor (batch, freqs, frames)
        r   r7   r   r8   r    r:   ©r   )	r>   r@   Ú	unsqueezeÚstable_solver   Úmatmulr   Úsqueezer   )
r
   r   r;   r   Únoise_scm_tÚ	rtf_vec_tÚ	numeratorÚdenominatorÚbf_vectÚoutputr   r   r   r?   e   s   
zRTFMVDRBeamformer.from_rtf_vectN)r   r   r   r   r   r   r?   r   r   r   r   r6   J   s    þý
üþýür6   c                   @   s@   e Zd Z		d
dejdejdejdeejejef fdd„Zd	S )ÚSoudenMVDRBeamformerr   ç:Œ0âŽyE>r   r   r   r/   c                 C   s€   |  dddd¡}|  dddd¡}t||ƒ}|t|ƒd |  }| j||||d}t ||¡}	|	 d¡ dd¡}	| j|	|d	}
|
S )
aè  Compute and apply MVDR beamformer from the speech and noise SCM matrices.
        This class uses Souden's formulation [1].

        :math:`\mathbf{w} =  \displaystyle \frac{\Sigma_{nn}^{-1} \Sigma_{ss}}{
        Tr\left( \Sigma_{nn}^{-1} \Sigma_{ss} \right) }\mathbf{u}` where :math:`\mathbf{a}`
        is the steering vector.


        Args:
            mix (torch.ComplexTensor): shape (batch, mics, freqs, frames)
            target_scm (torch.ComplexTensor): (batch, mics, mics, freqs)
            noise_scm (torch.ComplexTensor): (batch, mics, mics, freqs)
            ref_mic (int): reference microphone.
            eps: numerical stabilizer.

        Returns:
            Filtered mixture. torch.ComplexTensor (batch, freqs, frames)

        References
            [1] Souden, M., Benesty, J., & Affes, S. (2009). On optimal frequency-domain multichannel
            linear filtering for noise reduction. IEEE Transactions on audio, speech, and language processing, 18(2), 260-276.
        r   r7   r   r8   ©.NN©r   r   r    r:   rD   )	r>   rF   Úbatch_tracer2   r   rG   rH   r@   r   )r
   r   r   r   r/   ÚepsrK   r   Úbatch_mic_vectsrM   rN   r   r   r   r      s   
ÿzSoudenMVDRBeamformer.forwardN)r   rP   )	r   r   r   r   r   r   r&   r%   r   r   r   r   r   rO   €   s    úþýüûrO   c                       sP   e Zd Zd‡ fdd„	Z	ddejdejdejdeejejef fd	d
„Z	‡  Z
S )ÚSDWMWFBeamformerç      ð?c                    s   t ƒ  ¡  || _d S ©N)ÚsuperÚ__init__Úmu)r
   r[   ©Ú	__class__r   r   rZ   °   s   

zSDWMWFBeamformer.__init__Nr   r   r   r/   c                 C   sz   |  dddd¡}|  dddd¡}|| j|  }t||ƒ}| j||||d}	t ||	¡}
|
 d¡ dd¡}
| j|
|d}|S )	a  Compute and apply SDW-MWF beamformer.

        :math:`\mathbf{w} =  \displaystyle (\Sigma_{ss} + \mu \Sigma_{nn})^{-1} \Sigma_{ss}`.

        Args:
            mix (torch.ComplexTensor): shape (batch, mics, freqs, frames)
            target_scm (torch.ComplexTensor): (batch, mics, mics, freqs)
            noise_scm (torch.ComplexTensor): (batch, mics, mics, freqs)
            ref_mic (int): reference microphone.

        Returns:
            Filtered mixture. torch.ComplexTensor (batch, freqs, frames)
        r   r7   r   r8   rR   r    r:   rD   )	r>   r[   rF   r2   r   rG   rH   r@   r   )r
   r   r   r   r/   rI   Útarget_scm_trL   r   rU   rM   rN   r   r   r   r   ´   s   
ÿzSDWMWFBeamformer.forward)rW   rX   )r   r   r   rZ   r   r   r   r&   r%   r   Ú__classcell__r   r   r\   r   rV   ¯   s    	ûþýüûrV   c                   @   sB   e Zd Zdejdejdejfdd„Zedejdejfdd„ƒZdS )	ÚGEVBeamformerr   r   r   c                 C   s   |   ||¡}| j||d}|S )a  Compute and apply the GEV beamformer.

        :math:`\mathbf{w} =  \displaystyle MaxEig\{ \Sigma_{nn}^{-1}\Sigma_{ss} \}`, where
        MaxEig extracts the eigenvector corresponding to the maximum eigenvalue
        (using the GEV decomposition).

        Args:
            mix: shape (batch, mics, freqs, frames)
            target_scm: (batch, mics, mics, freqs)
            noise_scm: (batch, mics, mics, freqs)

        Returns:
            Filtered mixture. torch.ComplexTensor (batch, freqs, frames)
        rD   ©Úcompute_beamforming_vectorr   )r
   r   r   r   rM   rN   r   r   r   r   Ú   s   zGEVBeamformer.forwardc                 C   sf   |  dddd¡}t|dƒ}t|   dddd¡|ƒ\}}|d }|tj|ddd	 }| d¡ dd
¡}|S )Nr   r7   r   r8   çíµ ÷Æ°>r9   r    T)ÚdimÚkeepdimr:   )r>   Úcondition_scmÚ$generalized_eigenvalue_decompositionr   ÚnormrH   r@   )r   r   rI   rA   rB   rM   r   r   r   rb   í   s   
ÿz(GEVBeamformer.compute_beamforming_vectorN)r   r   r   r   r   r   r5   rb   r   r   r   r   r`   Ù   s    r`   c                   @   sV   e Zd ZdZddedefdd„Zdejd	ejfd
d„Z	dejdejd	ejfdd„Z
dS )ÚGEVDBeamformera?  Generalized eigenvalue decomposition speech distortion weighted multichannel Wiener filter.

        Compare to SDW-MWF, spatial covariance matrix are computed from low rank approximation
        based on eigen values decomposition,
        see equation 62 in `[1] <https://hal.inria.fr/hal-01390918/file/14-1.pdf>`_.

    Attributes:
        mu (float): Speech distortion constant.
        rank (int): Rank for the approximation of target covariance matrix,
            no approximation is made if `rank` is None.

    References:
        [1] R. Serizel, M. Moonen, B. Van Dijk and J. Wouters,
        "Low-rank Approximation Based Multichannel Wiener Filter Algorithms for
        Noise Reduction with Application in Cochlear Implants,"
        in IEEE/ACM Transactions on Audio, Speech, and Language Processing, April 2014.
    rW   r   r[   Úrankc                 C   s   || _ || _d S rX   )r[   rj   )r
   r[   rj   r   r   r   rZ     s   
zGEVDBeamformer.__init__r   r   c           	      C   sà   t | dddd¡| dddd¡ƒ\}}t |j¡j}tj||dd}t t |dg¡¡}t |dg¡}| j	rBd|d	| j	d
…d
d
…f< |j}|| j
t |jd ¡ |¡  }|| |¡ tj || |¡ ¡ }|d  ddd¡S )a4  Compute beamforming vectors for GEVD beamFormer.

        Args:
            target_scm (torch.ComplexTensor): shape (batch, mics, mics, freqs)
            noise_scm (torch.ComplexTensor): shape (batch, mics, mics, freqs)

        Returns:
            torch.ComplexTensor: shape (batch, mics, freqs)

        r   r7   r   r8   g    €„.A)ÚminÚmaxr    g        .N).r   )Ú%_generalized_eigenvalue_decompositionr>   r   Úfinfor.   rT   ÚclampÚ
diag_embedÚfliprj   r[   Úeyer'   Ú	expand_asr(   r<   Úinv)	r
   r   r   Úe_valuesÚ	e_vectorsrT   Úcomplex_typeÚ
ev_plus_murM   r   r   r   rb     s&   þ ÿþÿz)GEVDBeamformer.compute_beamforming_vectorr   c                 C   s   |   ||¡}| j||dS )at  Compute and apply the GEVD beamformer.

        Args:
            mix (torch.ComplexTensor): shape (batch, mics, freqs, frames)
            target_scm (torch.ComplexTensor): (batch, mics, mics, freqs)
            noise_scm (torch.ComplexTensor): (batch, mics, mics, freqs)

        Returns:
            Filtered mixture. torch.ComplexTensor (batch, freqs, frames)
        rD   ra   )r
   r   r   r   rM   r   r   r   r   :  s   zGEVDBeamformer.forwardN)rW   r   )r   r   r   r4   Úfloatr%   rZ   r   r   rb   r   r   r   r   r   ri   û   s    (þýüri   Tr   r   r   c                 C   sx   | j \}}}}|du rt |d||¡}|jdkr |dd…df }t d||  |  ¡ ¡}|r:||jddd dd¡ }|S )	a’  Compute the spatial covariance matrix from a STFT signal x.

    Args:
        x (torch.ComplexTensor): shape  [batch, mics, freqs, frames]
        mask (torch.Tensor): [batch, 1, freqs, frames] or [batch, 1, freqs, frames]. Optional
        normalize (bool): Whether to normalize with the mask mean per bin.

    Returns:
        torch.ComplexTensor, the SCM with shape (batch, mics, mics, freqs)
    Nr   r7   zbmft,bnft->bmnfr    T)re   r:   )r'   r   Úonesr#   r   r   Úsumr@   )r   r   r   ÚbatchÚmicsÚfreqsÚframesÚscmr   r   r   r	   N  s   
r	   rc   r   r   r   rT   c                 C   s`   t jt  d|  ¡ || ¡j|d}t  d|  ¡ || ¡j| }t  t  |¡¡s)J |ƒ‚t j|ddS )a
  Compute the optimal reference mic given the a posteriori SNR, see [1].

    Args:
        bf_mat: (batch, freq, mics, mics)
        target_scm (torch.ComplexTensor): (batch, freqs, mics, mics)
        noise_scm (torch.ComplexTensor): (batch, freqs, mics, mics)
        eps: value to clip the denominator.

    Returns:
        torch.

    References
        Erdogan et al. 2016: "Improved MVDR beamforming using single-channel maskprediction networks"
            https://www.merl.com/publications/docs/TR2016-072.pdf
    z...flm,...fln,...fnm->...m)rk   r    )rd   )r   ro   r   r   ÚrealÚallÚisfiniteÚargmax)r   r   r   rT   ÚdenÚsnr_postr   r   r   r$   f  s   ÿÿr$   r:   r    c                 C   sb   |dks|dkr
t ‚|t| ||dd  | j|  }tj| j| | jdd | }| | d|  S )z˜Condition input SCM with (x + eps tr(x) I) / (1 + eps) along `dim1` and `dim2`.

    See https://stt.msu.edu/users/mauryaas/Ashwini_JPEN.pdf (2.3).
    r:   r    ©Údim1Údim2rQ   )r)   r3   r   )ÚNotImplementedErrorrS   r'   r   rr   r)   )r   rT   rˆ   r‰   ÚscaleÚ
scaled_eyer   r   r   rf   …  s
    rf   c                 C   s   t j| ||d d¡S )zECompute the trace along `dim1` and `dim2` for a any matrix `ndim>=2`.r‡   r    )r   Údiagonalr{   )r   rˆ   r‰   r   r   r   rS   ’  s   rS   c                 C   sD   t | |ƒ}|}|tjtjfvrtƒ | }t|  |¡| |¡ƒ |¡S )zVReturn torch.solve if `a` is non-singular, else regularize `a` and return torch.solve.)Ú_common_dtyper   Úfloat64Ú
complex128Ú_precision_mappingÚ_stable_solver(   )ÚbÚaÚinput_dtypeÚsolve_dtyper   r   r   rF   —  s
   

rF   c                 C   s<   zt j || ¡W S  ty   t||ƒ}t j || ¡ Y S w rX   )r   r<   ÚsolveÚRuntimeErrorrf   )r“   r”   rT   r   r   r   r’   ¡  s   
þr’   Fc                 C   s@   | j }|}|tjtjfvrtƒ | }t|  |¡|||d |¡S )as  Compute the Cholesky decomposition of ``input``.
    If ``input`` is only p.s.d, add a small jitter to the diagonal.

    Args:
        input (Tensor): The tensor to compute the Cholesky decomposition of
        upper (bool, optional): See torch.cholesky
        out (Tensor, optional): See torch.cholesky
        eps (int): small jitter added to the diagonal if PD.
    )ÚupperÚoutrT   )r.   r   r   r   r‘   Ú_stable_choleskyr(   )Úinputr™   rš   rT   r•   r–   r   r   r   Ústable_cholesky©  s
   
r   c                 C   sr   z|rt jj| |djW S t jj| |dW S  ty8   t| |ƒ} |r.t jj| |dj Y S t jj| |d Y S w )N)rš   )r   r<   ÚcholeskyÚmHr˜   rf   )rœ   r™   rš   rT   r   r   r   r›   »  s   
ür›   c                 C   sX   t | |ƒ}|}|tjtjfvrtƒ | }t|  |¡| |¡ƒ\}}| |¡j| |¡fS )z’Solves the generalized eigenvalue decomposition through Cholesky decomposition.
    Returns eigen values and eigen vectors (ascending order).
    )rŽ   r   r   r   r‘   rm   r(   r   )r”   r“   r•   r–   rA   rB   r   r   r   rg   Ç  s   

rg   c                 C   sZ   t |ƒ}t |¡}||  | ¡  dd¡ }tj |¡\}}t | ¡  dd¡|¡}||fS )Nr    r:   )r   r   Úinverser   r@   r<   r=   rG   )r”   r“   rž   Úinv_choleskyÚcmatrA   rB   r   r   r   rm   Ô  s   
rm   c                  G   s6   dd„ | D ƒ}t t|ƒƒdkrtd|› dƒ‚|d S )Nc                 S   s   g | ]}|j ‘qS r   )r.   )Ú.0r”   r   r   r   Ú
<listcomp>á  s    z!_common_dtype.<locals>.<listcomp>r   z.Expected inputs from the same dtype. Received r   r   )ÚlenÚsetr˜   )ÚargsÚ
all_dtypesr   r   r   rŽ   à  s   rŽ   c                   C   ó   da d S )NF©Ú
USE_DOUBLEr   r   r   r   Úforce_float_linalgê  ó   r¬   c                   C   r©   r   rª   r   r   r   r   Úforce_double_linalgï  r­   r®   c                  C   sn   t tdƒ} trtjtjtjtjtjtji}| rtj|tj< |S tjtjtjtjtjtji}| r5tj|tj< |S )NÚ	complex32)	Úhasattrr   r«   Úfloat16r   Úfloat32Ú	complex64r   r¯   )Úhas_complex32Úprecision_mapr   r   r   r‘   ô  s    
ý	úýr‘   r   )rc   )rc   r:   r    )r:   r    )FNrc   )$Útypingr   r   r   Útorch.nnr   r,   ÚModuler   r   r6   rO   rV   r`   ri   r   r   r	   ry   r$   rf   rS   rF   r’   r   r›   rg   rm   rŽ   r«   r¬   r®   r‘   Ú
BeamFormerÚSdwMwfBeamformerÚMvdrBeamformerr   r   r   r   Ú<module>   sL    =6/*"Süÿþý
ü





