o
    %ݫi                     @   s,   d Z ddlZddlmZ G dd deZdS )aQ   Specifies the inference interfaces for speaker recognition modules.

Authors:
 * Aku Rouhe 2021
 * Peter Plantinga 2021
 * Loren Lugosch 2020
 * Mirco Ravanelli 2020
 * Titouan Parcollet 2021
 * Abdel Heba 2021
 * Andreas Nautsch 2022, 2023
 * Pooneh Mousavi 2023
 * Sylvain de Langen 2023
 * Adel Moumen 2023
 * Pradnya Kandarkar 2023
    N)EncoderClassifierc                       s<   e Zd ZdZg dZ fddZ	dddZd	d
 Z  ZS )SpeakerRecognitionaz  A ready-to-use model for speaker recognition. It can be used to
    perform speaker verification with verify_batch().

    Arguments
    ---------
    *args : tuple
    **kwargs : dict
        Arguments are forwarded to ``Pretrained`` parent class.

    Example
    -------
    >>> import torchaudio
    >>> from speechbrain.inference.speaker import SpeakerRecognition
    >>> # Model is downloaded from the speechbrain HuggingFace repo
    >>> tmpdir = getfixture("tmpdir")
    >>> verification = SpeakerRecognition.from_hparams(
    ...     source="speechbrain/spkrec-ecapa-voxceleb",
    ...     savedir=tmpdir,
    ... )

    >>> # Perform verification
    >>> signal, fs = torchaudio.load("tests/samples/single-mic/example1.wav")
    >>> signal2, fs = torchaudio.load("tests/samples/single-mic/example2.flac")
    >>> score, prediction = verification.verify_batch(signal, signal2)
    )compute_featuresmean_var_normembedding_modelmean_var_norm_embc                    s(   t  j|i | tjjddd| _d S )Ngư>)dimeps)super__init__torchnnCosineSimilarity
similarity)selfargskwargs	__class__ Q/home/ubuntu/.local/lib/python3.10/site-packages/speechbrain/inference/speaker.pyr   8   s   zSpeakerRecognition.__init__N      ?c           	      C   s8   | j ||dd}| j ||dd}| ||}|||kfS )a  Performs speaker verification with cosine distance.

        It returns the score and the decision (0 different speakers,
        1 same speakers).

        Arguments
        ---------
        wavs1 : Torch.Tensor
            torch.Tensor containing the speech waveform1 (batch, time).
            Make sure the sample rate is fs=16000 Hz.
        wavs2 : Torch.Tensor
            torch.Tensor containing the speech waveform2 (batch, time).
            Make sure the sample rate is fs=16000 Hz.
        wav1_lens : Torch.Tensor
            torch.Tensor containing the relative length for each sentence
            in the length (e.g., [0.8 0.6 1.0])
        wav2_lens : Torch.Tensor
            torch.Tensor containing the relative length for each sentence
            in the length (e.g., [0.8 0.6 1.0])
        threshold : Float
            Threshold applied to the cosine distance to decide if the
            speaker is different (0) or the same (1).

        Returns
        -------
        score
            The score associated to the binary verification output
            (cosine distance).
        prediction
            The prediction is 1 if the two signals in input are from the same
            speaker and 0 otherwise.
        F)	normalize)encode_batchr   )	r   wavs1wavs2	wav1_lens	wav2_lens	thresholdemb1emb2scorer   r   r   verify_batch<   s   #zSpeakerRecognition.verify_batchc           
      K   sX   | j |fi |}| j |fi |}|d}|d}| ||\}}	|d |	d fS )ak  Speaker verification with cosine distance

        Returns the score and the decision (0 different speakers,
        1 same speakers).

        Arguments
        ---------
        path_x : str
            Path to file x
        path_y : str
            Path to file y
        **kwargs : dict
            Arguments to ``load_audio``

        Returns
        -------
        score
            The score associated to the binary verification output
            (cosine distance).
        prediction
            The prediction is 1 if the two signals in input are from the same
            speaker and 0 otherwise.
        r   )
load_audio	unsqueezer#   )
r   path_xpath_yr   
waveform_x
waveform_ybatch_xbatch_yr"   decisionr   r   r   verify_filesd   s   

zSpeakerRecognition.verify_files)NNr   )	__name__
__module____qualname____doc__MODULES_NEEDEDr   r#   r-   __classcell__r   r   r   r   r      s    
(r   )r1   r   !speechbrain.inference.classifiersr   r   r   r   r   r   <module>   s    