o
    %ݫiZ                     @   s   d Z ddlZddlmZ ddlmZ ddlZddejfddZdd	 Z	G d
d dej
ZG dd dej
ZG dd dej
ZdS )zJGenerator and discriminator used in MetricGAN

Authors:
* Szu-Wei Fu 2020
    N)nn)spectral_normTc                 K   sN   |du r| }|| |fi |}|rt |}tjj|jdd tj|j |S )zDCreate a layer with spectral norm, xavier uniform init and zero biasNg      ?)gain)r   r   initxavier_uniform_weightzeros_bias)in_sizeout_size	spec_norm
layer_typekwargslayer r   V/home/ubuntu/.local/lib/python3.10/site-packages/speechbrain/lobes/models/MetricGAN.pyxavier_init_layer   s   r   c                 C   s   ddt d|    S )zComputes the shifted sigmoid.333333?   g      )torchexp)xr   r   r   shifted_sigmoid    s   r   c                       s*   e Zd ZdZd fdd	Zdd Z  ZS )Learnable_sigmoidzzImplementation of a leanable sigmoid.

    Arguments
    ---------
    in_features : int
        Input dimensionality
      c                    s(   t    tt|| _d| j_d S )NT)super__init__r   	Parameterr   onessloperequiresGrad)selfin_features	__class__r   r   r   .   s   
zLearnable_sigmoid.__init__c                 C   s   dt | j|  S ):Processes the input tensor x and returns an output tensor.r   )r   sigmoidr   )r!   r   r   r   r   forward6   s   zLearnable_sigmoid.forward)r   __name__
__module____qualname____doc__r   r'   __classcell__r   r   r#   r   r   %   s    r   c                       s2   e Zd ZdZ				d
 fdd	Zdd	 Z  ZS )EnhancementGeneratorau  Simple LSTM for enhancement with custom initialization.

    Arguments
    ---------
    input_size : int
        Size of the input tensor's last dimension.
    hidden_size : int
        Number of neurons to use in the LSTM layers.
    num_layers : int
        Number of layers to use in the LSTM.
    dropout : int
        Fraction of neurons to drop during training.
    r         r   c                    s   t    tjdd| _tjjj||||dd| _		 | j	
 D ]$\}}d|v r.tj| qd|v r9tj| qd|v rCtj| qtdd	d
d| _td	dd
d| _t | _t | _d S )N333333?negative_slopeT)
input_sizehidden_size
num_layersdropoutbidirectionalr	   	weight_ih	weight_hhi  i,  F)r   r   )r   r   r   	LeakyReLU
activationsbnnetRNNLSTMblstmnamed_parametersr   r   r   orthogonal_r   linear1linear2r   Sigmoidr&   )r!   r4   r5   r6   r7   nameparamr#   r   r   r   J   s,   
zEnhancementGenerator.__init__c                 C   s>   | j ||d\}}| |}| |}| |}| |}|S )r%   )lengths)rA   rD   r<   rE   r   )r!   r   rI   out_r   r   r   r'   m   s   



zEnhancementGenerator.forward)r   r/   r0   r   r(   r   r   r#   r   r.   ;   s    #r.   c                       s2   e Zd ZdZddejf fdd	Zdd Z  ZS )MetricDiscriminatora  Metric estimator for enhancement training.

    Consists of:
     * four 2d conv layers
     * channel averaging
     * three linear layers

    Arguments
    ---------
    kernel_size : tuple
        The dimensions of the 2-d kernel used for convolution.
    base_channels : int
        Number of channels used in each conv layer.
    activation : Callable
        Function to apply between layers.
    )   rM      c                    s   t    |dd| _tjddd| _td|tj|d| _t|tj|d| _	t|tj|d| _
t|tj|d| _t|dd| _tdd	d
| _td	dd
| _d S )Nr1   r2   r0   g{Gz?)num_featuresmomentum)r   kernel_size2   )r   
   )r
   r   r   )r   r   r<   r   BatchNorm2dBNr   Conv2dconv1conv2conv3conv4Linear1Linear2Linear3)r!   rQ   base_channelsr<   r#   r   r   r      s$   

zMetricDiscriminator.__init__c                 C   s   |  |}| |}| |}| |}| |}| |}| |}| |}| |}t|d}| |}| |}| 	|}| |}| 
|}|S )r%   )r0      )rU   rW   r<   rX   rY   rZ   r   meanr[   r\   r]   )r!   r   rJ   r   r   r   r'      s    













zMetricDiscriminator.forward)	r)   r*   r+   r,   r   r;   r   r'   r-   r   r   r#   r   rL   z   s    rL   )r,   r   r   torch.nn.utilsr   speechbrainr=   Linearr   r   Moduler   r.   rL   r   r   r   r   <module>   s    
?