o
    %ݫiO                     @   sH   d Z ddlZddlmZ ddlmZmZ ddlmZ G dd deZdS )zWCNN Transformer model for SE in the SpeechBrain style.

Authors
* Chien-Feng Liao 2020
    N)nn)TransformerInterfaceget_lookahead_mask)Linearc                	       sB   e Zd ZdZejddddejdddf	 fdd		Zdd
dZ  Z	S )CNNTransformerSEa  This is an implementation of transformer model with CNN pre-encoder for SE.

    Arguments
    ---------
    d_model : int
        The number of expected features in the encoder inputs.
    output_size : int
        The number of neurons in the output layer.
    output_activation : torch class
        The activation function of the output layer (default=ReLU).
    nhead : int
        The number of heads in the multi-head attention models (default=8).
    num_layers : int
        The number of sub-layers in the transformer (default=8).
    d_ffn : int
        The number of expected features in the encoder layers (default=512).
    dropout : int
        The dropout value (default=0.1).
    activation : torch class
        The activation function of intermediate layers (default=LeakyReLU).
    causal : bool
        True for causal setting, the model is forbidden to see future frames (default=True).
    custom_emb_module : torch class
        Module that processes the input features before the transformer model.
    normalize_before : bool
        Whether to normalize before each layer.

    Example
    -------
    >>> src = torch.rand([8, 120, 256])
    >>> net = CNNTransformerSE(d_model=256, output_size=257)
    >>> out = net(src)
    >>> out.shape
    torch.Size([8, 120, 257])
       i   g?TNFc                    sB   t  j|||d|||d ||	d
 |
| _t||dd| _| | _d S )Nr   )
d_modelnheadnum_encoder_layersnum_decoder_layersd_ffndropout
activationpositional_encodingnormalize_beforecausalF)
input_sizebias)super__init__custom_emb_moduler   output_layeroutput_activation)selfr   output_sizer   r	   
num_layersr   r   r   r   r   r   	__class__ f/home/ubuntu/.local/lib/python3.10/site-packages/speechbrain/lobes/models/transformer/TransformerSE.pyr   6   s   zCNNTransformerSE.__init__c                 C   sZ   | j r	t|| _nd| _| jdur| |}| j|| j|d\}}| |}| |}|S )z:Processes the input tensor x and returns an output tensor.N)srcsrc_masksrc_key_padding_mask)r   r   	attn_maskr   encoderr   r   )r   xr"   encoder_output_outputr   r   r   forwardU   s   




zCNNTransformerSE.forward)N)
__name__
__module____qualname____doc__r   ReLU	LeakyReLUr   r)   __classcell__r   r   r   r   r      s    (r   )	r-   torchr   0speechbrain.lobes.models.transformer.Transformerr   r   speechbrain.nnet.linearr   r   r   r   r   r   <module>   s    