o
    ei)                     @   s   d Z ddlZddlmZ ddlZddlmZ ddlm	Z	 ddl
mZ ddlmZ G dd dejjZG d	d
 d
ejjjZG dd dejjjZdS )zqA popular speaker recognition and diarization model.

Authors
 * Nauman Dawalatabad 2020
 * Mirco Ravanelli 2020
    N)Conv1d)Linear)BatchNorm1d)StatisticsPoolingc                       sL   e Zd ZdZdejjdg dg dg dddf fd	d
	ZdddZ  Z	S )Xvectora  This model extracts X-vectors for speaker recognition and diarization.

    Arguments
    ---------
    device : str
        Device used e.g. "cpu" or "cuda".
    activation : torch class
        A class for constructing the activation layers.
    tdnn_blocks : int
        Number of time-delay neural (TDNN) layers.
    tdnn_channels : list of ints
        Output channels for TDNN layer.
    tdnn_kernel_sizes : list of ints
        List of kernel sizes for each TDNN layer.
    tdnn_dilations : list of ints
        List of dilations for kernels in each TDNN layer.
    lin_neurons : int
        Number of neurons in linear layers.
    in_channels : int
        Expected size of input features.

    Example
    -------
    >>> compute_xvect = Xvector('cpu')
    >>> input_feats = torch.rand([5, 10, 40])
    >>> outputs = compute_xvect(input_feats)
    >>> outputs.shape
    torch.Size([5, 1, 512])
    cpu   )   r	   r	   r	   i  )r      r
      r   )r      r
   r   r   r	   (   c	              	      s   t    t | _t|D ]!}	||	 }
| jt||
||	 ||	 d| t|
dg ||	 }q| j	t
  | j	t|
d |ddd d S )N)in_channelsout_channelskernel_sizedilation)
input_sizer   TF)r   	n_neuronsbiascombine_dims)super__init__nn
ModuleListblocksrangeextendr   r   appendr   r   )selfdevice
activationtdnn_blockstdnn_channelstdnn_kernel_sizestdnn_dilationslin_neuronsr   block_indexr   	__class__ ^/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/speechbrain/lobes/models/Xvector.pyr   2   s2   


zXvector.__init__Nc              	   C   s:   | j D ]}z|||d}W q ty   ||}Y qw |S )a>  Returns the x-vectors.

        Arguments
        ---------
        x : torch.Tensor
            Inputs features for extracting x-vectors.
        lens : torch.Tensor
            The corresponding relative lengths of the inputs.

        Returns
        -------
        x : torch.Tensor
            X-vectors.
        )lengths)r   	TypeError)r   xlenslayerr)   r)   r*   forward^   s   
zXvector.forward)N)
__name__
__module____qualname____doc__torchr   	LeakyReLUr   r0   __classcell__r)   r)   r'   r*   r      s     ,r   c                       s.   e Zd ZdZejjdddf fdd	Z  ZS )
Classifiera  This class implements the last MLP on the top of xvector features.

    Arguments
    ---------
    input_shape : tuple
        Expected shape of an example input.
    activation : torch class
        A class for constructing the activation layers.
    lin_blocks : int
        Number of linear layers.
    lin_neurons : int
        Number of neurons in linear layers.
    out_neurons : int
        Number of output neurons.

    Example
    -------
    >>> input_feats = torch.rand([5, 10, 40])
    >>> compute_xvect = Xvector()
    >>> xvects = compute_xvect(input_feats)
    >>> classify = Classifier(input_shape=xvects.shape)
    >>> output = classify(xvects)
    >>> output.shape
    torch.Size([5, 1, 1211])
    r   r	   i  c                    s   t  j|d | j| dd | jtjjjdd |dkr'| jtjjjdd t	|D ]9}d| }| j
jtjjj|d | j
| jtjjj|dd	d
 | j
| j| dd | j
| jtjjjdd q+| jtjjj|dd | jtjjjdddd d S )Ninput_shapeact
layer_namenormr   DNNblock_Tlinear)r   r   r=   outr   r=   )	apply_logsoftmax)r   r   r   sbnnetnormalizationr   
containers
Sequentialr   r?   rA   r   activationsSoftmaxr   r:   r    
lin_blocksr%   out_neuronsr&   
block_namer'   r)   r*   r      s4   





zClassifier.__init__	r1   r2   r3   r4   r5   r   r6   r   r7   r)   r)   r'   r*   r8   v       r8   c                       s.   e Zd ZdZejjdddf fdd	Z  ZS )Discriminatora  This class implements a discriminator on the top of xvector features.

    Arguments
    ---------
    input_shape : tuple
        Expected shape of the input tensor.
    activation : torch class
        A class for constructing the activation layers.
    lin_blocks : int
        Number of linear layers.
    lin_neurons : int
        Number of neurons in linear layers.
    out_neurons : int
        Size of the output vector.

    Example
    -------
    >>> input_feats = torch.rand([5, 10, 40])
    >>> compute_xvect = Xvector()
    >>> xvects = compute_xvect(input_feats)
    >>> discriminate = Discriminator(xvects.shape)
    >>> output = discriminate(xvects)
    >>> output.shape
    torch.Size([5, 1, 1])
    r   r	   c                    s   t  j|d |dkr| jtjjjdd t|D ]:}d| }| jjtjjj|d | j| jtjj	j
|dddd	 | j| jtjjjd
d | j| j| dd q| jtjj	j
|dd d S )Nr9   r   r?   r<   r@   TFrA   )r   r   r   r=   r>   r;   rB   rC   )r   r   r   rF   rG   rI   rJ   r   r?   rA   r   rH   r   rM   r'   r)   r*   r      s,   





zDiscriminator.__init__rQ   r)   r)   r'   r*   rS      rR   rS   )r4   r5   torch.nnr   speechbrainrF   speechbrain.nnet.CNNr   speechbrain.nnet.linearr   speechbrain.nnet.normalizationr   speechbrain.nnet.poolingr   Moduler   rG   rI   rJ   r8   rS   r)   r)   r)   r*   <module>   s    cD