o
    p’×i´Õ  ã                   @   sÞ   d Z ddlZddlmZ ddlm  mZ G dd„ dejj	ƒZ
G dd„ dejj	ƒZG dd	„ d	ejj	ƒZG d
d„ dejj	ƒZG dd„ dejj	ƒZG dd„ dejj	ƒZG dd„ dejj	ƒZddd„Zdd„ Zdd„ Zddd„ZdS )ak	  Multi-microphone components.

This library contains functions for multi-microphone signal processing.

Example
-------
>>> import torch
>>>
>>> from speechbrain.dataio.dataio import read_audio
>>> from speechbrain.processing.features import STFT, ISTFT
>>> from speechbrain.processing.multi_mic import Covariance
>>> from speechbrain.processing.multi_mic import GccPhat, SrpPhat, Music
>>> from speechbrain.processing.multi_mic import DelaySum, Mvdr, Gev
>>>
>>> xs_speech = read_audio(
...    'tests/samples/multi-mic/speech_-0.82918_0.55279_-0.082918.flac'
... )
>>> xs_speech = xs_speech.unsqueeze(0) # [batch, time, channels]
>>> xs_noise_diff = read_audio('tests/samples/multi-mic/noise_diffuse.flac')
>>> xs_noise_diff = xs_noise_diff.unsqueeze(0)
>>> xs_noise_loc = read_audio('tests/samples/multi-mic/noise_0.70225_-0.70225_0.11704.flac')
>>> xs_noise_loc =  xs_noise_loc.unsqueeze(0)
>>> fs = 16000 # sampling rate

>>> ss = xs_speech
>>> nn_diff = 0.05 * xs_noise_diff
>>> nn_loc = 0.05 * xs_noise_loc
>>> xs_diffused_noise = ss + nn_diff
>>> xs_localized_noise = ss + nn_loc

>>> # Delay-and-Sum Beamforming with GCC-PHAT localization
>>> stft = STFT(sample_rate=fs)
>>> cov = Covariance()
>>> gccphat = GccPhat()
>>> delaysum = DelaySum()
>>> istft = ISTFT(sample_rate=fs)

>>> Xs = stft(xs_diffused_noise)
>>> Ns = stft(nn_diff)
>>> XXs = cov(Xs)
>>> NNs = cov(Ns)
>>> tdoas = gccphat(XXs)
>>> Ys_ds = delaysum(Xs, tdoas)
>>> ys_ds = istft(Ys_ds)

>>> # Mvdr Beamforming with SRP-PHAT localization
>>> mvdr = Mvdr()
>>> mics = torch.zeros((4,3), dtype=torch.float)
>>> mics[0,:] = torch.FloatTensor([-0.05, -0.05, +0.00])
>>> mics[1,:] = torch.FloatTensor([-0.05, +0.05, +0.00])
>>> mics[2,:] = torch.FloatTensor([+0.05, +0.05, +0.00])
>>> mics[3,:] = torch.FloatTensor([+0.05, +0.05, +0.00])
>>> srpphat = SrpPhat(mics=mics)
>>> doas = srpphat(XXs)
>>> Ys_mvdr = mvdr(Xs, NNs, doas, doa_mode=True, mics=mics, fs=fs)
>>> ys_mvdr = istft(Ys_mvdr)

>>> # Mvdr Beamforming with MUSIC localization
>>> music = Music(mics=mics)
>>> doas = music(XXs)
>>> Ys_mvdr2 = mvdr(Xs, NNs, doas, doa_mode=True, mics=mics, fs=fs)
>>> ys_mvdr2 = istft(Ys_mvdr2)

>>> # GeV Beamforming
>>> gev = Gev()
>>> Xs = stft(xs_localized_noise)
>>> Ss = stft(ss)
>>> Ns = stft(nn_loc)
>>> SSs = cov(Ss)
>>> NNs = cov(Ns)
>>> Ys_gev = gev(Xs, SSs, NNs)
>>> ys_gev = istft(Ys_gev)

Authors:
 * William Aris
 * Francois Grondin

é    N)Úversionc                       s8   e Zd ZdZd	‡ fdd„	Zdd„ Zed	dd„ƒZ‡  ZS )
Ú
Covariancea  Computes the covariance matrices of the signals.

    Arguments
    ---------
    average : bool
        Informs the module if it should return an average
        (computed on the time dimension) of the covariance
        matrices. The Default value is True.

    Example
    -------
    >>> import torch
    >>> from speechbrain.dataio.dataio import read_audio
    >>> from speechbrain.processing.features import STFT
    >>> from speechbrain.processing.multi_mic import Covariance
    >>>
    >>> xs_speech = read_audio(
    ...    'tests/samples/multi-mic/speech_-0.82918_0.55279_-0.082918.flac'
    ... )
    >>> xs_speech = xs_speech.unsqueeze(0) # [batch, time, channels]
    >>> xs_noise = read_audio('tests/samples/multi-mic/noise_diffuse.flac')
    >>> xs_noise = xs_noise.unsqueeze(0)
    >>> xs = xs_speech + 0.05 * xs_noise
    >>> fs = 16000

    >>> stft = STFT(sample_rate=fs)
    >>> cov = Covariance()
    >>>
    >>> Xs = stft(xs)
    >>> XXs = cov(Xs)
    >>> XXs.shape
    torch.Size([1, 1001, 201, 2, 10])
    Tc                    ó   t ƒ  ¡  || _d S ©N)ÚsuperÚ__init__Úaverage)Úselfr   ©Ú	__class__© ú^/home/ubuntu/SoloSpeech/.venv/lib/python3.10/site-packages/speechbrain/processing/multi_mic.pyr   y   s   

zCovariance.__init__c                 C   s   t j|| jd}|S )a.  This method uses the utility function _cov to compute covariance
        matrices. Therefore, the result has the following format:
        (batch, time_step, n_fft/2 + 1, 2, n_mics + n_pairs).

        The order on the last dimension corresponds to the triu_indices for a
        square matrix. For instance, if we have 4 channels, we get the following
        order: (0, 0), (0, 1), (0, 2), (0, 3), (1, 1), (1, 2), (1, 3), (2, 2), (2, 3)
        and (3, 3). Therefore, XXs[..., 0] corresponds to channels (0, 0) and XXs[..., 1]
        corresponds to channels (0, 1).

        Arguments:
        ----------
        Xs : torch.Tensor
            A batch of audio signals in the frequency domain.
            The tensor must have the following format:
            (batch, time_step, n_fft/2 + 1, 2, n_mics)
        )ÚXsr   )r   Ú_covr   )r	   r   ÚXXsr   r   r   Úforward~   s   zCovariance.forwardc                 C   s
  | j d }| dddd…f  d¡}| dddd…f  d¡}t || dd¡¡t || dd¡¡ }t || dd¡¡t || dd¡¡ }t ||¡}|d|d |d f }|d|d |d f }	t ||	fd¡}
|du rƒ|
j d }tj|
ddd}
|
 d|ddd¡}
|
S )	a\  Computes the covariance matrices (XXs) of the signals. The result will
        have the following format: (batch, time_step, n_fft/2 + 1, 2, n_mics + n_pairs).

        Arguments:
        ----------
        Xs : torch.Tensor
            A batch of audio signals in the frequency domain.
            The tensor must have the following format:
            (batch, time_step, n_fft/2 + 1, 2, n_mics)

        average : boolean
            Informs the function if it should return an average
            (computed on the time dimension) of the covariance
            matrices. Default value is True.
        é   .r   Né   é   T)Úkeepdim)	ÚshapeÚ	unsqueezeÚtorchÚmatmulÚ	transposeÚtriu_indicesÚstackÚmeanÚrepeat)r   r   Ún_micsÚXs_reÚXs_imÚRxx_reÚRxx_imÚidxÚXXs_reÚXXs_imr   Ún_time_framesr   r   r   r   “   s$   
ÿÿ
zCovariance._cov)T)	Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r   Ústaticmethodr   Ú__classcell__r   r   r
   r   r   V   s    "r   c                       s>   e Zd ZdZ‡ fdd„Z				ddd„Zed	d
„ ƒZ‡  ZS )ÚDelaySumaQ  Performs delay and sum beamforming by using the TDOAs and
    the first channel as a reference.

    Example
    -------
    >>> import torch

    >>> from speechbrain.dataio.dataio import read_audio
    >>> from speechbrain.processing.features import STFT, ISTFT
    >>> from speechbrain.processing.multi_mic import Covariance
    >>> from speechbrain.processing.multi_mic import GccPhat, DelaySum
    >>>
    >>> xs_speech = read_audio(
    ...    'tests/samples/multi-mic/speech_-0.82918_0.55279_-0.082918.flac'
    ... )
    >>> xs_speech = xs_speech. unsqueeze(0) # [batch, time, channel]
    >>> xs_noise  = read_audio('tests/samples/multi-mic/noise_diffuse.flac')
    >>> xs_noise = xs_noise.unsqueeze(0) #[batch, time, channels]
    >>> fs = 16000
    >>> xs = xs_speech + 0.05 * xs_noise
    >>>
    >>> stft = STFT(sample_rate=fs)
    >>> cov = Covariance()
    >>> gccphat = GccPhat()
    >>> delaysum = DelaySum()
    >>> istft = ISTFT(sample_rate=fs)
    >>>
    >>> Xs = stft(xs)
    >>> XXs = cov(Xs)
    >>> tdoas = gccphat(XXs)
    >>> Ys = delaysum(Xs, tdoas)
    >>> ys = istft(Ys)
    c                    ó   t ƒ  ¡  d S r   ©r   r   ©r	   r
   r   r   r   è   ó   zDelaySum.__init__FNç     pu@c                 C   sT   |j d }| |j¡}|rt||||d}nt|d}t||d}	tj||	d}
|
S )a6  This method computes a steering vector by using the TDOAs/DOAs and
        then calls the utility function _delaysum to perform beamforming.
        The result has the following format: (batch, time_step, n_fft, 2, 1).

        Arguments
        ---------
        Xs : torch.Tensor
            A batch of audio signals in the frequency domain.
            The tensor must have the following format:
            (batch, time_step, n_fft/2 + 1, 2, n_mics)
        localization_tensor : torch.Tensor
            A tensor containing either time differences of arrival (TDOAs)
            (in samples) for each timestamp or directions of arrival (DOAs)
            (xyz coordinates in meters). If localization_tensor represents
            TDOAs, then its format is (batch, time_steps, n_mics + n_pairs).
            If localization_tensor represents DOAs, then its format is
            (batch, time_steps, 3)
        doa_mode : bool
            The user needs to set this parameter to True if localization_tensor
            represents DOAs instead of TDOAs. Its default value is set to False.
        mics : torch.Tensor
            The cartesian position (xyz coordinates in meters) of each microphone.
            The tensor must have the following format (n_mics, 3). This
            parameter is only mandatory when localization_tensor represents
            DOAs.
        fs : int
            The sample rate in Hertz of the signals. This parameter is only
            mandatory when localization_tensor represents DOAs.
        c : float
            The speed of sound in the medium. The speed is expressed in meters
            per second and the default value of this parameter is 343 m/s. This
            parameter is only used when localization_tensor represents DOAs.

        Returns
        -------
        Ys : torch.Tensor
        é   ©ÚdoasÚmicsÚfsÚc©Útdoas©ÚtausÚn_fft)r   ÚAs)r   ÚtoÚdeviceÚ	doas2tausÚ
tdoas2tausÚsteeringr.   Ú	_delaysum)r	   r   Úlocalization_tensorÚdoa_moder7   r8   r9   r>   r=   r?   ÚYsr   r   r   r   ì   s   
/
zDelaySum.forwardc           
      C   sª   | j d }|dddd…f | }d|dddd…f  | }| dddd…f }| dddd…f }tj|| ||  ddd	}tj|| ||  ddd	}t ||fd¡}	|	S )
al  Perform delay and sum beamforming. The result has
        the following format: (batch, time_step, n_fft, 2, 1).

        Arguments
        ---------
        Xs : torch.Tensor
            A batch of audio signals in the frequency domain.
            The tensor must have the following format:
            (batch, time_step, n_fft/2 + 1, 2, n_mics)
        As : torch.Tensor
            The steering vector to point in the direction of
            the target source. The tensor must have the format
            (batch, time_step, n_fft/2 + 1, 2, n_mics)

        Returns
        -------
        Ys : torch.Tensor
        r   .r   Néÿÿÿÿr   r   T©Údimr   )r   r   Úsumr   )
r   r?   r   ÚWs_reÚWs_imr    r!   ÚYs_reÚYs_imrH   r   r   r   rE   ,  s   
zDelaySum._delaysum©FNNr3   )	r(   r)   r*   r+   r   r   r,   rE   r-   r   r   r
   r   r.   Å   s    "
ù@r.   c                       sB   e Zd ZdZd‡ fdd„	Z				ddd	„Zedd
d„ƒZ‡  ZS )ÚMvdraS  Perform minimum variance distortionless response (MVDR) beamforming
    by using an input signal in the frequency domain, its covariance matrices
    and tdoas (to compute a steering vector).

        Example
        -------
        >>> import torch

        >>> from speechbrain.dataio.dataio import read_audio
        >>> from speechbrain.processing.features import STFT, ISTFT
        >>> from speechbrain.processing.multi_mic import Covariance
        >>> from speechbrain.processing.multi_mic import GccPhat, DelaySum
        >>>
        >>> xs_speech = read_audio(
        ...    'tests/samples/multi-mic/speech_-0.82918_0.55279_-0.082918.flac'
        ... )
        >>> xs_speech = xs_speech.unsqueeze(0) # [batch, time, channel]
        >>> xs_noise  = read_audio('tests/samples/multi-mic/noise_diffuse.flac')
        >>> xs_noise = xs_noise.unsqueeze(0) #[batch, time, channels]
        >>> fs = 16000
        >>> xs = xs_speech + 0.05 * xs_noise
        >>>
        >>> stft = STFT(sample_rate=fs)
        >>> cov = Covariance()
        >>> gccphat = GccPhat()
        >>> mvdr = Mvdr()
        >>> istft = ISTFT(sample_rate=fs)
        >>>
        >>> Xs = stft(xs)
        >>> Ns = stft(xs_noise)
        >>> XXs = cov(Xs)
        >>> NNs = cov(Ns)
        >>> tdoas = gccphat(XXs)
        >>> Ys = mvdr(Xs, NNs, tdoas)
        >>> ys = istft(Ys)
    ç#B’¡œÇ;c                    r   r   )r   r   Úeps)r	   rT   r
   r   r   r   {  s   

zMvdr.__init__FNr3   c                 C   sv   |j d }| |j¡}| |j¡}|dur| |j¡}|r&t||||d}	nt|d}	t|	|d}
tj|||
d}|S )aÎ  This method computes a steering vector before using the
        utility function _mvdr to perform beamforming. The result has
        the following format: (batch, time_step, n_fft, 2, 1).

        Arguments
        ---------
        Xs : torch.Tensor
            A batch of audio signals in the frequency domain.
            The tensor must have the following format:
            (batch, time_step, n_fft/2 + 1, 2, n_mics)
        NNs : torch.Tensor
            The covariance matrices of the noise signal. The tensor must
            have the format (batch, time_steps, n_fft/2 + 1, 2, n_mics + n_pairs)
        localization_tensor : torch.Tensor
            A tensor containing either time differences of arrival (TDOAs)
            (in samples) for each timestamp or directions of arrival (DOAs)
            (xyz coordinates in meters). If localization_tensor represents
            TDOAs, then its format is (batch, time_steps, n_mics + n_pairs).
            If localization_tensor represents DOAs, then its format is
            (batch, time_steps, 3)
        doa_mode : bool
            The user needs to set this parameter to True if localization_tensor
            represents DOAs instead of TDOAs. Its default value is set to False.
        mics : torch.Tensor
            The cartesian position (xyz coordinates in meters) of each microphone.
            The tensor must have the following format (n_mics, 3). This
            parameter is only mandatory when localization_tensor represents
            DOAs.
        fs : int
            The sample rate in Hertz of the signals. This parameter is only
            mandatory when localization_tensor represents DOAs.
        c : float
            The speed of sound in the medium. The speed is expressed in meters
            per second and the default value of this parameter is 343 m/s. This
            parameter is only used when localization_tensor represents DOAs.

        Returns
        -------
        Ys : torch.Tensor
        r4   Nr5   r:   r<   )r   ÚNNsr?   )r   r@   rA   rB   rC   rD   rR   Ú_mvdr)r	   r   rU   rF   rG   r7   r8   r9   r>   r=   r?   rH   r   r   r   r     s   
3
zMvdr.forwardc                 C   sx  t j|ddd\}}t |¡}|d dd…|f }|d dd…|f }|dddd…f  d	¡}	d
|dddd…f  d	¡ }
|	 dd	¡}d|
 dd	¡ }t  ||	¡t  ||
¡ }t  ||
¡t  ||	¡ }d
t  ||¡t  ||¡  }t  ||¡ d	¡}t  ||¡ d	¡ }| dddd…f }| dddd…f }t j|| ||  ddd}t j|| ||  ddd}t  	||fd¡}|S )a;  Perform minimum variance distortionless response beamforming.

        Arguments
        ---------
        Xs : torch.Tensor
            A batch of audio signals in the frequency domain.
            The tensor must have the following format:
            (batch, time_step, n_fft/2 + 1, 2, n_mics).
        NNs : torch.Tensor
            The covariance matrices of the noise signal. The tensor must
            have the format (batch, time_steps, n_fft/2 + 1, 2, n_mics + n_pairs).
        As : torch.Tensor
            The steering vector to point in the direction of
            the target source. The tensor must have the format
            (batch, time_step, n_fft/2 + 1, 2, n_mics).
        eps : float
            A small value to avoid division by zero.

        Returns
        -------
        Ys : torch.Tensor
        Tr   ©Úreturn_inverserK   ©.r   N©.r   .r   r   ç      ð?r   ç      ð¿rJ   éþÿÿÿ)
r   ÚuniqueÚeigÚinvr   r   r   ÚsqueezerL   r   )r   rU   r?   rT   ÚNNs_valÚNNs_idxÚNNs_invÚ
NNs_inv_reÚ
NNs_inv_imÚAsC_reÚAsC_imÚAsT_reÚAsT_imÚNNs_inv_AsC_reÚNNs_inv_AsC_imÚalpharM   rN   r    r!   rO   rP   rH   r   r   r   rV   É  s6   
ÿÿ

ÿÿz
Mvdr._mvdr©rS   rQ   )	r(   r)   r*   r+   r   r   r,   rV   r-   r   r   r
   r   rR   U  s    %
øHrR   c                       s4   e Zd ZdZ‡ fdd„Zdd„ Zedd„ ƒZ‡  ZS )ÚGevaG  Generalized EigenValue decomposition (GEV) Beamforming.

    Example
    -------
    >>> from speechbrain.dataio.dataio import read_audio
    >>> import torch
    >>>
    >>> from speechbrain.processing.features import STFT, ISTFT
    >>> from speechbrain.processing.multi_mic import Covariance
    >>> from speechbrain.processing.multi_mic import Gev
    >>>
    >>> xs_speech = read_audio(
    ...    'tests/samples/multi-mic/speech_-0.82918_0.55279_-0.082918.flac'
    ... )
    >>> xs_speech  = xs_speech.unsqueeze(0) # [batch, time, channels]
    >>> xs_noise = read_audio('tests/samples/multi-mic/noise_0.70225_-0.70225_0.11704.flac')
    >>> xs_noise = xs_noise.unsqueeze(0)
    >>> fs = 16000
    >>> ss = xs_speech
    >>> nn = 0.05 * xs_noise
    >>> xs = ss + nn
    >>>
    >>> stft = STFT(sample_rate=fs)
    >>> cov = Covariance()
    >>> gev = Gev()
    >>> istft = ISTFT(sample_rate=fs)
    >>>
    >>> Ss = stft(ss)
    >>> Nn = stft(nn)
    >>> Xs = stft(xs)
    >>>
    >>> SSs = cov(Ss)
    >>> NNs = cov(Nn)
    >>>
    >>> Ys = gev(Xs, SSs, NNs)
    >>> ys = istft(Ys)
    c                    r/   r   r0   r1   r
   r   r   r   6  r2   zGev.__init__c                 C   s   t j|||d}|S )ag  This method uses the utility function _gev to perform generalized
        eigenvalue decomposition beamforming. Therefore, the result has
        the following format: (batch, time_step, n_fft, 2, 1).

        Arguments
        ---------
        Xs : torch.Tensor
            A batch of audio signals in the frequency domain.
            The tensor must have the following format:
            (batch, time_step, n_fft/2 + 1, 2, n_mics).
        SSs : torch.Tensor
            The covariance matrices of the target signal. The tensor must
            have the format (batch, time_steps, n_fft/2 + 1, 2, n_mics + n_pairs).
        NNs : torch.Tensor
            The covariance matrices of the noise signal. The tensor must
            have the format (batch, time_steps, n_fft/2 + 1, 2, n_mics + n_pairs).

        Returns
        -------
        Ys : torch.Tensor
        )r   ÚSSsrU   )ro   Ú_gev)r	   r   rp   rU   rH   r   r   r   r   :  s   zGev.forwardc                 C   sˆ  |  | j¡}|  | j¡}| jd }|jd }tj||fdd}tj|ddd\}}|dtd|ƒf }|dt|d| ƒf }t |¡}t 	||¡\}}	|d|d df }
|d|d df }d	tj
|
d |d  d
ddd  ddd|¡ }|
|9 }
||9 }|
dd…|f }|dd…|f }| dddd…f }| dddd…f }tj
|| ||  d
dd}tj
|| ||  d
dd}t ||fd
¡}|S )a&  Perform generalized eigenvalue decomposition beamforming. The result
        has the following format: (batch, time_step, n_fft, 2, 1).

        Arguments
        ---------
        Xs : torch.Tensor
            A batch of audio signals in the frequency domain.
            The tensor must have the following format:
            (batch, time_step, n_fft/2 + 1, 2, n_mics).
        SSs : torch.Tensor
            The covariance matrices of the target signal. The tensor must
            have the format (batch, time_steps, n_fft/2 + 1, 2, n_mics + n_pairs).
        NNs : torch.Tensor
            The covariance matrices of the noise signal. The tensor must
            have the format (batch, time_steps, n_fft/2 + 1, 2, n_mics + n_pairs).

        Returns
        -------
        Ys : torch.Tensor
        r   ©rK   Tr   rW   .r   r4   r[   r   rJ   ç      à?N)r@   rA   r   r   Úcatr^   Úranger_   Úpos_defÚgevdrL   r   r   )r   rp   rU   r   Ún_mics_pairsÚSSs_NNsÚSSs_NNs_valÚSSs_NNs_idxÚVsÚDsÚF_reÚF_imÚF_normrM   rN   r    r!   rO   rP   rH   r   r   r   rq   T  s8   


ÿ
þzGev._gev)	r(   r)   r*   r+   r   r   r,   rq   r-   r   r   r
   r   ro     s    &ro   c                       sR   e Zd ZdZd‡ fdd„	Zdd„ Zeddd	„ƒZedd
d„ƒZedd„ ƒZ	‡  Z
S )ÚGccPhata¶  Generalized Cross-Correlation with Phase Transform localization.

    Arguments
    ---------
    tdoa_max : int
        Specifies a range to search for delays. For example, if
        tdoa_max = 10, the method will restrict its search for delays
        between -10 and 10 samples. This parameter is optional and its
        default value is None. When tdoa_max is None, the method will
        search for delays between -n_fft/2 and n_fft/2 (full range).
    eps : float
        A small value to avoid divisions by 0 with the phase transformation.
        The default value is 1e-20.

    Example
    -------
    >>> import torch

    >>> from speechbrain.dataio.dataio import read_audio
    >>> from speechbrain.processing.features import STFT, ISTFT
    >>> from speechbrain.processing.multi_mic import Covariance
    >>> from speechbrain.processing.multi_mic import GccPhat, DelaySum
    >>>
    >>> xs_speech = read_audio(
    ...    'tests/samples/multi-mic/speech_-0.82918_0.55279_-0.082918.flac'
    ... )
    >>> xs_speech = xs_speech.unsqueeze(0) # [batch, time, channel]
    >>> xs_noise  = read_audio('tests/samples/multi-mic/noise_diffuse.flac')
    >>> xs_noise = xs_noise.unsqueeze(0) #[batch, time, channels]
    >>> fs = 16000
    >>> xs = xs_speech + 0.05 * xs_noise
    >>>
    >>> stft = STFT(sample_rate=fs)
    >>> cov = Covariance()
    >>> gccphat = GccPhat()
    >>> Xs = stft(xs)
    >>> XXs = cov(Xs)
    >>> tdoas = gccphat(XXs)
    NrS   c                    s   t ƒ  ¡  || _|| _d S r   )r   r   Útdoa_maxrT   )r	   r‚   rT   r
   r   r   r   À  s   

zGccPhat.__init__c                 C   s2   t j|| jd}t j|| jd}t j||d}|S )a  Perform generalized cross-correlation with phase transform localization
        by using the utility function _gcc_phat and by extracting the delays (in samples)
        before performing a quadratic interpolation to improve the accuracy.
        The result has the format: (batch, time_steps, n_mics + n_pairs).

        The order on the last dimension corresponds to the triu_indices for a
        square matrix. For instance, if we have 4 channels, we get the following
        order: (0, 0), (0, 1), (0, 2), (0, 3), (1, 1), (1, 2), (1, 3), (2, 2), (2, 3)
        and (3, 3). Therefore, delays[..., 0] corresponds to channels (0, 0) and delays[..., 1]
        corresponds to channels (0, 1).

        Arguments:
        ----------
        XXs : torch.Tensor
            The covariance matrices of the input signal. The tensor must
            have the format (batch, time_steps, n_fft/2 + 1, 2, n_mics + n_pairs).
        )r   rT   )Úxxsr‚   )rƒ   Údelays)r   Ú	_gcc_phatrT   Ú_extract_delaysr‚   Ú_interpolate)r	   r   rƒ   r„   r;   r   r   r   r   Æ  s   zGccPhat.forwardc                 C   s   | j d d d }tj| ddd\}}|dddd…f }|dddd…f }t |d |d  ¡| }|| }|| }	t ||	fd¡}
|
 dd	¡}
t tj¡t d
¡krft 	|
d |
d ¡}
tj
j|
|d}n	tj|
d|gd}|d|dd…f }| dd	¡}|S )aI  Evaluate GCC-PHAT for each timestamp. It returns the result in the time
        domain. The result has the format: (batch, time_steps, n_fft, n_mics + n_pairs).

        Arguments
        ---------
        XXs : torch.Tensor
            The covariance matrices of the input signal. The tensor must
            have the format (batch, time_steps, n_fft/2 + 1, 2, n_mics + n_pairs).
        eps : float
            A small value to avoid divisions by 0 with the phase transform. The
            default value is 1e-20.

        Returns
        -------
        xxs : torch.Tensor
        r4   r   Tr   rW   .r   Nr   z1.8.0rY   rZ   )Ún)Úsignal_ndimÚsignal_sizes)r   r   r^   Úsqrtr   r   r   ÚparseÚ__version__ÚcomplexÚfftÚirfft)r   rT   Ú	n_samplesÚXXs_valÚXXs_idxr%   r&   ÚXXs_absÚXXs_re_phatÚXXs_im_phatÚXXs_phatrƒ   r   r   r   r…   Ý  s    zGccPhat._gcc_phatc           
      C   s°   | j d }|du rtj|ddd}| dd|…dd…f }| d| d…dd…f }t ||fd¡}t |d¡\}}||j d  }||j d k}	||	  |7  < ||	  |8  < |S )aB  Extract the rounded delays from the cross-correlation for each timestamp.
        The result has the format: (batch, time_steps, n_mics + n_pairs).

        Arguments
        ---------
        xxs : torch.Tensor
            The correlation signals obtained after a gcc-phat operation. The tensor
            must have the format (batch, time_steps, n_fft, n_mics + n_pairs).
        tdoa_max : int
            Specifies a range to search for delays. For example, if
            tdoa_max = 10, the method will restrict its search for delays
            between -10 and 10 samples. This parameter is optional and its
            default value is None. When tdoa_max is None, the method will
            search for delays between -n_fft/2 and +n_fft/2 (full range).

        Returns
        -------
        delays : torch.Tensor
        r4   NÚfloor©Úrounding_mode.r   )r   r   Údivrt   Úmax)
rƒ   r‚   r>   Úslice_1Úslice_2Ú
xxs_slicedÚ_r„   Úoffsetr$   r   r   r   r†     s   
zGccPhat._extract_delaysc                 C   s¸   | j d }t |d | |¡ d¡}t | d|¡ d¡}t || |¡ d¡}t | d|¡ d¡}t |d | |¡ d¡}t | d|¡ d¡}||| d| d|  d|    }|S )a¯  Perform quadratic interpolation on the cross-correlation to
        improve the tdoa accuracy. The result has the format:
        (batch, time_steps, n_mics + n_pairs)

        Arguments
        ---------
        xxs : torch.Tensor
            The correlation signals obtained after a gcc-phat operation. The tensor
            must have the format (batch, time_steps, n_fft, n_mics + n_pairs).
        delays : torch.Tensor
            The rounded tdoas obtained by selecting the sample with the highest
            amplitude. The tensor must have the format
            (batch, time_steps, n_mics + n_pairs).

        Returns
        -------
        delays_frac : torch.Tensor
        r4   r   r   )r   r   Úfmodr   Úgatherra   )rƒ   r„   r>   ÚtpÚy1Úy2Úy3Údelays_fracr   r   r   r‡   =  s   
$zGccPhat._interpolate)NrS   rn   r   )r(   r)   r*   r+   r   r   r,   r…   r†   r‡   r-   r   r   r
   r   r   —  s    (0.r   c                       s@   e Zd ZdZ				d‡ fdd„	Zdd	„ Zedd
d„ƒZ‡  ZS )ÚSrpPhataV	  Steered-Response Power with Phase Transform Localization.

    Arguments
    ---------
    mics : torch.Tensor
        The cartesian coordinates (xyz) in meters of each microphone.
        The tensor must have the following format (n_mics, 3).
    space : string
        If this parameter is set to 'sphere', the localization will
        be done in 3D by searching in a sphere of possible doas. If
        it set to 'circle', the search will be done in 2D by searching
        in a circle. By default, this parameter is set to 'sphere'.
        Note: The 'circle' option isn't implemented yet.
    sample_rate : int
        The sample rate in Hertz of the signals to perform SRP-PHAT on.
        By default, this parameter is set to 16000 Hz.
    speed_sound : float
        The speed of sound in the medium. The speed is expressed in meters
        per second and the default value of this parameter is 343 m/s.
    eps : float
        A small value to avoid errors like division by 0. The default value
        of this parameter is 1e-20.

    Example
    -------
    >>> import torch

    >>> from speechbrain.dataio.dataio import read_audio
    >>> from speechbrain.processing.features import STFT
    >>> from speechbrain.processing.multi_mic import Covariance
    >>> from speechbrain.processing.multi_mic import SrpPhat

    >>> xs_speech = read_audio('tests/samples/multi-mic/speech_-0.82918_0.55279_-0.082918.flac')
    >>> xs_noise = read_audio('tests/samples/multi-mic/noise_diffuse.flac')
    >>> fs = 16000

    >>> xs_speech = xs_speech.unsqueeze(0) # [batch, time, channels]
    >>> xs_noise = xs_noise.unsqueeze(0)

    >>> ss1 = xs_speech
    >>> ns1 = 0.05 * xs_noise
    >>> xs1 = ss1 + ns1

    >>> ss2 = xs_speech
    >>> ns2 = 0.20 * xs_noise
    >>> xs2 = ss2 + ns2

    >>> ss = torch.cat((ss1,ss2), dim=0)
    >>> ns = torch.cat((ns1,ns2), dim=0)
    >>> xs = torch.cat((xs1,xs2), dim=0)

    >>> mics = torch.zeros((4,3), dtype=torch.float)
    >>> mics[0,:] = torch.FloatTensor([-0.05, -0.05, +0.00])
    >>> mics[1,:] = torch.FloatTensor([-0.05, +0.05, +0.00])
    >>> mics[2,:] = torch.FloatTensor([+0.05, +0.05, +0.00])
    >>> mics[3,:] = torch.FloatTensor([+0.05, +0.05, +0.00])

    >>> stft = STFT(sample_rate=fs)
    >>> cov = Covariance()
    >>> srpphat = SrpPhat(mics=mics)

    >>> Xs = stft(xs)
    >>> XXs = cov(Xs)
    >>> doas = srpphat(XXs)
    Úsphereé€>  r3   rS   c                    sB   t ƒ  ¡  |dkrtƒ | _|dkr	 t| j|||d| _|| _d S ©Nrª   Úcircle)r7   r8   r9   )r   r   rª   r6   rB   r=   rT   )r	   r7   ÚspaceÚsample_rateÚspeed_soundrT   r
   r   r   r   ¥  s   
	
ÿ
zSrpPhat.__init__c                 C   s8   |j d }t| j |j¡|ƒ}tj||| j| jd}|S )aG  Perform SRP-PHAT localization on a signal by computing a steering
        vector and then by using the utility function _srp_phat to extract the doas.
        The result is a tensor containing the directions of arrival (xyz coordinates
        (in meters) in the direction of the sound source). The output tensor
        has the format (batch, time_steps, 3).

        This localization method uses Global Coherence Field (GCF):
        https://www.researchgate.net/publication/221491705_Speaker_localization_based_on_oriented_global_coherence_field

        Arguments
        ---------
        XXs : torch.Tensor
            The covariance matrices of the input signal. The tensor must
            have the format (batch, time_steps, n_fft/2 + 1, 2, n_mics + n_pairs).

        Returns
        -------
        doas : torch.Tensor
        r4   )r   r?   r6   rT   )	r   rD   r=   r@   rA   r©   Ú	_srp_phatr6   rT   ©r	   r   r>   r?   r6   r   r   r   r   ¿  s   
zSrpPhat.forwardc                 C   s$  |  | j¡}|  | j¡}|jd }t ||¡}|dd…dd…d|ddd…f f }|dd…dd…d|ddd…f f }|dd…dd…d|ddd…f f }|dd…dd…d|ddd…f f }	|| ||	  }
||	 ||  }|
 |
jd d¡}
| |jd d¡}tj| ddd\}}|dd…dd…dd…ddd…f }|dd…dd…dd…ddd…f }| |jd |jd df¡}| |jd |jd df¡}t |d |d  ¡| }|| }|| }t ||
 	dd¡¡}t || 	dd¡¡}|| }tj
|dd	\}}||dd…f dd…|dd…f }|S )
aô  Perform srp-phat to find the direction of arrival
        of the sound source. The result is a tensor containing the directions
        of arrival (xyz coordinates (in meters) in the direction of the sound source).
        The output tensor has the format: (batch, time_steps, 3).

        Arguments
        ---------
        XXs : torch.Tensor
            The covariance matrices of the input signal. The tensor must
            have the format (batch, time_steps, n_fft/2 + 1, 2, n_mics + n_pairs).
        As : torch.Tensor
            The steering vector that cover the all the potential directions
            of arrival. The tensor must have the format
            (n_doas, n_fft/2 + 1, 2, n_mics).
        doas : torch.Tensor
            All the possible directions of arrival that will be scanned. The
            tensor must have the format (n_doas, 3).
        eps : float
            A very small value used to avoid division by 0.

        Returns
        -------
        doas : torch.Tensor
        r   Nr   r   rI   TrW   r4   rr   )r@   rA   r   r   r   Úreshaper^   r‹   r   r   rœ   )r   r?   r6   rT   r   r$   ÚAs_1_reÚAs_1_imÚAs_2_reÚAs_2_imrM   rN   r’   r“   r%   r&   r”   ÚXXs_re_normÚXXs_im_normÚYs_AÚYs_BrH   r    Údoas_idxr   r   r   r±   Þ  s4   
$$$$"""zSrpPhat._srp_phat)rª   r«   r3   rS   rn   )	r(   r)   r*   r+   r   r   r,   r±   r-   r   r   r
   r   r©   b  s    Eúr©   c                       sB   e Zd ZdZ					d‡ fdd„	Zd	d
„ Zeddd„ƒZ‡  ZS )ÚMusicaÁ	  Multiple Signal Classification (MUSIC) localization.

    Arguments
    ---------
    mics : torch.Tensor
        The cartesian coordinates (xyz) in meters of each microphone.
        The tensor must have the following format (n_mics, 3).
    space : string
        If this parameter is set to 'sphere', the localization will
        be done in 3D by searching in a sphere of possible doas. If
        it set to 'circle', the search will be done in 2D by searching
        in a circle. By default, this parameter is set to 'sphere'.
        Note: The 'circle' option isn't implemented yet.
    sample_rate : int
        The sample rate in Hertz of the signals to perform SRP-PHAT on.
        By default, this parameter is set to 16000 Hz.
    speed_sound : float
        The speed of sound in the medium. The speed is expressed in meters
        per second and the default value of this parameter is 343 m/s.
    eps : float
        A small value to avoid errors like division by 0. The default value
        of this parameter is 1e-20.
    n_sig : int
        An estimation of the number of sound sources. The default value is set
        to one source.

    Example
    -------
    >>> import torch

    >>> from speechbrain.dataio.dataio import read_audio
    >>> from speechbrain.processing.features import STFT
    >>> from speechbrain.processing.multi_mic import Covariance
    >>> from speechbrain.processing.multi_mic import SrpPhat

    >>> xs_speech = read_audio('tests/samples/multi-mic/speech_-0.82918_0.55279_-0.082918.flac')
    >>> xs_noise = read_audio('tests/samples/multi-mic/noise_diffuse.flac')
    >>> fs = 16000

    >>> xs_speech = xs_speech.unsqueeze(0) # [batch, time, channels]
    >>> xs_noise = xs_noise.unsqueeze(0)

    >>> ss1 = xs_speech
    >>> ns1 = 0.05 * xs_noise
    >>> xs1 = ss1 + ns1

    >>> ss2 = xs_speech
    >>> ns2 = 0.20 * xs_noise
    >>> xs2 = ss2 + ns2

    >>> ss = torch.cat((ss1,ss2), dim=0)
    >>> ns = torch.cat((ns1,ns2), dim=0)
    >>> xs = torch.cat((xs1,xs2), dim=0)

    >>> mics = torch.zeros((4,3), dtype=torch.float)
    >>> mics[0,:] = torch.FloatTensor([-0.05, -0.05, +0.00])
    >>> mics[1,:] = torch.FloatTensor([-0.05, +0.05, +0.00])
    >>> mics[2,:] = torch.FloatTensor([+0.05, +0.05, +0.00])
    >>> mics[3,:] = torch.FloatTensor([+0.05, +0.05, +0.00])

    >>> stft = STFT(sample_rate=fs)
    >>> cov = Covariance()
    >>> music = Music(mics=mics)

    >>> Xs = stft(xs)
    >>> XXs = cov(Xs)
    >>> doas = music(XXs)
    rª   r«   r3   rS   r   c                    sH   t ƒ  ¡  |dkrtƒ | _|dkr	 t| j|||d| _|| _|| _d S r¬   )r   r   rª   r6   rB   r=   rT   Ún_sig)r	   r7   r®   r¯   r°   rT   r¾   r
   r   r   r   l  s   


ÿ
zMusic.__init__c                 C   s<   |j d }t| j |j¡|ƒ}tj||| j| j| j	d}|S )aƒ  Perform MUSIC localization on a signal by computing a steering
        vector and then by using the utility function _music to extract the doas.
        The result is a tensor containing the directions of arrival (xyz coordinates
        (in meters) in the direction of the sound source). The output tensor
        has the format (batch, time_steps, 3).

        Arguments
        ---------
        XXs : torch.Tensor
            The covariance matrices of the input signal. The tensor must
            have the format (batch, time_steps, n_fft/2 + 1, 2, n_mics + n_pairs).

        Returns
        -------
        doas : torch.Tensor
        r4   )r   r?   r6   r¾   rT   )
r   rD   r=   r@   rA   r½   Ú_musicr6   r¾   rT   r²   r   r   r   r   Š  s   
ÿzMusic.forwardc              	   C   sÒ  |  | j¡}|  | j¡}|jd }|jd }|jd }|| }tj| ddd\}	}
t |	¡\}}| d¡ dd|dddd¡}|dt	d|ƒdf }|dt	d|ƒdf }| d¡ d¡ d¡ 
dddddd	d
¡}| |jd |jd ddddd¡}|d }|d }t ||¡t ||¡ }t ||¡t ||¡ }t |d |d  ¡}tj|d	d}tj|d d	dtj|d d	d }|||   d
¡}tj|dd| }tj|dd\}}||dd…f dd…|
dd…f }|S )aÞ  Perform multiple signal classification to find the
        direction of arrival of the sound source. The result
        has the format: (batch, time_steps, 3).

        Arguments
        ---------
        XXs : torch.Tensor
            The covariance matrices of the input signal. The tensor must
            have the format (batch, time_steps, n_fft/2 + 1, 2, n_mics + n_pairs).
        As : torch.Tensor
            The steering vector that covers the all the potential directions
            of arrival. The tensor must have the format.
            (n_doas, n_fft/2 + 1, 2, n_mics).
        doas : torch.Tensor
            All the possible directions of arrival that will be scanned. The
            tensor must have the format (n_doas, 3).
        n_sig : int
            The number of signals in the signal + noise subspace (default is 1).
        eps : float
            A small number to avoid div by zero errors.

        Returns
        -------
        doas : torch.Tensor
        r   r   r4   Tr   rW   .é   é   r   rY   rZ   rr   N)r@   rA   r   r   r^   r_   Úsvdlr   r   ru   Úpermuter   r‹   rL   ra   rœ   )r   r?   r6   r¾   rT   r   Ún_doasÚn_binsÚ	svd_ranger’   r“   ÚUsr    ÚUs_reÚUs_imÚAs_reÚAs_imÚAs_mm_Us_reÚAs_mm_Us_imÚAs_mm_Us_absÚAs_mm_Us_sumÚ	As_As_absÚPsrH   r¼   r   r   r   r¿   ¨  s:   


ü"$"zMusic._music)rª   r«   r3   rS   r   rn   )	r(   r)   r*   r+   r   r   r,   r¿   r-   r   r   r
   r   r½   &  s    Hùr½   r3   c                 C   s(   || t  |  |j¡| dd¡¡ }|S )a  This function converts directions of arrival (xyz coordinates
    expressed in meters) in time differences of arrival (expressed in
    samples). The result has the following format: (batch, time_steps, n_mics).

    Arguments
    ---------
    doas : torch.Tensor
        The directions of arrival expressed with cartesian coordinates (xyz)
        in meters. The tensor must have the following format: (batch, time_steps, 3).
    mics : torch.Tensor
        The cartesian position (xyz) in meters of each microphone.
        The tensor must have the following format (n_mics, 3).
    fs : int
        The sample rate in Hertz of the signals.
    c : float
        The speed of sound in the medium. The speed is expressed in meters
        per second and the default value of this parameter is 343 m/s.

    Returns
    -------
    taus : torch.Tensor

    Example
    -------
    >>> import torch

    >>> from speechbrain.dataio.dataio import read_audio
    >>> from speechbrain.processing.multi_mic import sphere, doas2taus

    >>> xs = read_audio('tests/samples/multi-mic/speech_-0.82918_0.55279_-0.082918.flac')
    >>> xs = xs.unsqueeze(0) # [batch, time, channels]
    >>> fs = 16000
    >>> mics = torch.zeros((4,3), dtype=torch.float)
    >>> mics[0,:] = torch.FloatTensor([-0.05, -0.05, +0.00])
    >>> mics[1,:] = torch.FloatTensor([-0.05, +0.05, +0.00])
    >>> mics[2,:] = torch.FloatTensor([+0.05, +0.05, +0.00])
    >>> mics[3,:] = torch.FloatTensor([+0.05, +0.05, +0.00])

    >>> doas = sphere()
    >>> taus = doas2taus(doas, mics, fs)
    r   r   )r   r   r@   rA   r   )r6   r7   r8   r9   r=   r   r   r   rB   ù  s   $*rB   c                 C   sF   | j t| j ƒd  }tdd|  d d d ƒ}| dtd|ƒf }|S )a½  This function selects the tdoas of each channel and put them
    in a tensor. The result has the following format:
    (batch, time_steps, n_mics).

    Arguments
    ---------
    tdoas : torch.Tensor
       The time difference of arrival (TDOA) (in samples) for
       each timestamp. The tensor has the format
       (batch, time_steps, n_mics + n_pairs).

    Returns
    -------
    taus : torch.Tensor

    Example
    -------
    >>> import torch
    >>> from speechbrain.dataio.dataio import read_audio
    >>> from speechbrain.processing.features import STFT
    >>> from speechbrain.processing.multi_mic import Covariance
    >>> from speechbrain.processing.multi_mic import GccPhat, tdoas2taus
    >>>
    >>> xs_speech = read_audio(
    ...    'tests/samples/multi-mic/speech_-0.82918_0.55279_-0.082918.flac'
    ... )
    >>> xs_noise = read_audio('tests/samples/multi-mic/noise_diffuse.flac')
    >>> xs = xs_speech + 0.05 * xs_noise
    >>> xs = xs.unsqueeze(0)
    >>> fs = 16000
    >>>
    >>> stft = STFT(sample_rate=fs)
    >>> cov = Covariance()
    >>> gccphat = GccPhat()
    >>>
    >>> Xs = stft(xs)
    >>> XXs = cov(Xs)
    >>> tdoas = gccphat(XXs)
    >>> taus = tdoas2taus(tdoas)
    r   é   rs   r4   .r   )r   ÚlenÚintru   )r;   Ún_pairsÚ
n_channelsr=   r   r   r   rC   (  s   )rC   c                 C   sÞ   d}t |d d ƒ}d| tjd|| jd | }| | jd ¡}|  t| jƒ¡ dt| jƒ |f ¡} t | |  ¡}t 	| |  ¡}t 
||ft|jƒ¡}| t|jƒd t|jƒd ¡ t|jƒd t|jƒd ¡}|S )aÂ  This function computes a steering vector by using the time differences
    of arrival for each channel (in samples) and the number of bins (n_fft).
    The result has the following format: (batch, time_step, n_fft/2 + 1, 2, n_mics).

    Arguments:
    ----------
    taus : torch.Tensor
        The time differences of arrival for each channel. The tensor must have
        the following format: (batch, time_steps, n_mics).

    n_fft : int
        The number of bins resulting of the STFT. It is assumed that the
        argument "onesided" was set to True for the STFT.

    Example:
    --------f
    >>> import torch
    >>> from speechbrain.dataio.dataio import read_audio
    >>> from speechbrain.processing.features import STFT
    >>> from speechbrain.processing.multi_mic import Covariance
    >>> from speechbrain.processing.multi_mic import GccPhat, tdoas2taus, steering
    >>>
    >>> xs_speech = read_audio(
    ...    'tests/samples/multi-mic/speech_-0.82918_0.55279_-0.082918.flac'
    ... )
    >>> xs_noise = read_audio('tests/samples/multi-mic/noise_diffuse.flac')
    >>> xs = xs_speech + 0.05 * xs_noise
    >>> xs = xs.unsqueeze(0) # [batch, time, channels]
    >>> fs = 16000

    >>> stft = STFT(sample_rate=fs)
    >>> cov = Covariance()
    >>> gccphat = GccPhat()
    >>>
    >>> Xs = stft(xs)
    >>> n_fft = Xs.shape[2]
    >>> XXs = cov(Xs)
    >>> tdoas = gccphat(XXs)
    >>> taus = tdoas2taus(tdoas)
    >>> As = steering(taus, n_fft)
    g-DTû!	@r   r4   r   )rA   )r   r   )rÔ   r   ÚarangerA   r   r   r   rÓ   ÚcosÚsinr   r   )r=   r>   ÚpiÚ
frame_sizeÚomegasÚa_reÚa_imÚar   r   r   rD   X  s   +ÿ ÿrD   r   c              
   C   sÌ  d}d}d}t jdt jd}t  g d¢¡|ddd…f< t  g d	¢¡|d
dd…f< |t  d| t  dd¡ d ¡ |tddƒdf< |t  d| t  dd¡ d ¡ |tddƒdf< ||tddƒdf< d| t  d| t  dd¡ d ¡ |tdd
ƒdf< d| t  d| t  dd¡ d ¡ |tdd
ƒdf< d| |tdd
ƒdf< t jdt jd}t  	g d¢¡|ddd…f< t  	g d¢¡|ddd…f< t  	g d¢¡|ddd…f< t  	g d¢¡|ddd…f< t  	g d¢¡|ddd…f< t  	g d¢¡|ddd…f< t  	g d¢¡|ddd…f< t  	g d¢¡|ddd…f< t  	g d¢¡|ddd…f< t  	g d ¢¡|d!dd…f< t  	g d"¢¡|d#dd…f< t  	g d$¢¡|d
dd…f< t  	g d%¢¡|d&dd…f< t  	g d'¢¡|d(dd…f< t  	g d)¢¡|d*dd…f< t  	g d+¢¡|d,dd…f< t  	g d-¢¡|d.dd…f< t  	g d/¢¡|d0dd…f< t  	g d1¢¡|d2dd…f< t  	g d3¢¡|d4dd…f< td| ƒD ]±}|j
d }|d }t j|dft jd}	|dd…df |	d| t  d|¡ df< |dd…df |	d| t  d|¡ df< |dd…df |	d| t  d|¡ df< |dd…df |	d| t  d|¡ df< |dd…df |	d| t  d|¡ df< |dd…df |	d| t  d|¡ df< |dd…df |	d| t  d|¡ df< |dd…df |	d| t  d|¡ df< |dd…df |	d| t  d|¡ df< |dd…df |	d| t  d|¡ df< |dd…df |	d| t  d|¡ df< |dd…df |	d| t  d|¡ df< |dd…df |	d| t  d|¡ df< |dd…df |	d| t  d|¡ df< |dd…df |	d| t  d|¡ df< |dd…df |	d| t  d|¡ df< |dd…df |	d| t  d|¡ df< |dd…df |	d| t  d|¡ df< |dd…df |	d| t  d|¡ df< |dd…df |	d| t  d|¡ df< |dd…df |	d| t  d|¡ df< |dd…df |	d| t  d|¡ df< |dd…df |	d| t  d|¡ df< |dd…df |	d| t  d|¡ df< t j|	dd…ddgf |	dd…ddgf |	dd…ddgf fdd5}
t j|
dd5\}}t  |¡}|dd…df |d  |dd…df  }t j|d6d7\}}t j|j
d df|jd}t j||d d8d9|dd…df< ||dd…df |d   |dd…df< t  t  |d:¡dd¡}||dd…df dd…f ||dd…df dd…f  }|t  t  t j|d dd5d; d¡dd¡ }q±|S )<aõ  This function generates cartesian coordinates (xyz) for a set
    of points forming a 3D sphere. The coordinates are expressed in
    meters and can be used as doas. The result has the format:
    (n_points, 3).

    Arguments
    ---------
    levels_count : int
        A number proportional to the number of points that the user
        wants to generate.
            - If levels_count = 1, then the sphere will have 42 points
            - If levels_count = 2, then the sphere will have 162 points
            - If levels_count = 3, then the sphere will have 642 points
            - If levels_count = 4, then the sphere will have 2562 points
            - If levels_count = 5, then the sphere will have 10242 points
            - ...
        By default, levels_count is set to 4.

    Returns
    -------
    pts : torch.Tensor
        The list of xyz points in the sphere.

    Example
    -------
    >>> import torch
    >>> from speechbrain.processing.multi_mic import sphere
    >>> doas = sphere()
    gÚí¿Å%ŸÜ?gÚí¿Å%Ÿì?gPERTû!	@)é   r   )Údtype)r   r   r   r   N)r   r   rI   é   g       @rÁ   g      @r   rÀ   r4   r\   )é   r   )r   r4   r   )r   r   r4   )r   r   r   )r   rÁ   r   r   )r   r   rÁ   r   )é	   r   r4   )é
   r4   r   )rÀ   r   r   é   )ræ   r   rÁ   rÒ   )rÒ   rÁ   r   rä   )r   ræ   rÀ   rå   )rÁ   rÒ   ræ   )r   rä   rÒ   rà   )r4   rå   rä   é   )r   rÀ   rå   é   )râ   rÀ   ræ   é   )râ   ræ   rÒ   é   )râ   rÒ   rä   é   )râ   rä   rå   é   )râ   rå   rÀ   é   )ÚaxisT)rX   r˜   r™   )r   rI   rs   )r   ÚzerosÚfloatÚFloatTensorrÙ   r×   ru   rØ   ÚlongÚ
LongTensorr   rt   Úsortrœ   r^   rá   r›   r   r³   Úrepeat_interleaver   rL   )Úlevels_countÚhÚrrÚ   ÚptsÚtrsÚlevels_indexÚ	trs_countÚsubtrs_countÚsubtrsÚsubtrs_flattenÚsubtrs_sortedr    Ú	index_maxÚsubtrs_scalarÚunique_scalarÚunique_indicesÚunique_valuesr   r   r   rª   ™  s°    00$ÿ$ÿ
((((((((((((((((((((((((:ÿ
&ÿ
ÿÿ
ÿÿ8 
ÿrª   )r3   )r   )r+   r   Ú	packagingr   Ú$speechbrain.processing.decompositionÚ
processingÚdecompositionr_   ÚnnÚModuler   r.   rR   ro   r   r©   r½   rB   rC   rD   rª   r   r   r   r   Ú<module>   s*    Oo  ; 	 L E 
T/0A