o
    i
                     @   s:   d dl Z dgZG dd de jjZG dd de jjZdS )    N
DeepSpeechc                       sN   e Zd ZdZddededededdf
 fd	d
ZdejdejfddZ	  Z
S )FullyConnectedzh
    Args:
        n_feature: Number of input features
        n_hidden: Internal hidden unit size.
       	n_featuren_hiddendropoutrelu_max_clipreturnNc                    s2   t t|   tjj||dd| _|| _|| _d S )NT)bias)	superr   __init__torchnnLinearfcr   r   )selfr   r   r   r   	__class__ Z/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/torchaudio/models/deepspeech.pyr      s   
zFullyConnected.__init__xc                 C   sL   |  |}tjj|}tjj|d| j}| jr$tjj|| j| j}|S )Nr   )	r   r   r   
functionalreluhardtanhr   r   training)r   r   r   r   r   forward   s   
zFullyConnected.forward)r   __name__
__module____qualname____doc__intfloatr   r   Tensorr   __classcell__r   r   r   r   r      s    $r   c                       sT   e Zd ZdZ			ddedededed	d
f
 fddZdejd	ejfddZ	  Z
S )r   a  DeepSpeech architecture introduced in
    *Deep Speech: Scaling up end-to-end speech recognition* :cite:`hannun2014deep`.

    Args:
        n_feature: Number of input features
        n_hidden: Internal hidden unit size.
        n_class: Number of output classes
       (           r   r   n_classr   r	   Nc                    sx   t t|   || _t|||| _t|||| _t|||| _tj	j
||dddd| _t|||| _tj	||| _d S )N   r   T)
num_layersnonlinearitybidirectional)r   r   r   r   r   fc1fc2fc3r   r   RNNbi_rnnfc4r   out)r   r   r   r(   r   r   r   r   r   &   s   zDeepSpeech.__init__r   c                 C   s   |  |}| |}| |}|d}|dd}| |\}}|ddddd| jf |dddd| jdf  }| |}| |}|	ddd}t
jjj|dd}|S )z
        Args:
            x (torch.Tensor): Tensor of dimension (batch, channel, time, feature).
        Returns:
            Tensor: Predictor tensor of dimension (batch, time, class).
        r)   r   N   )dim)r-   r.   r/   squeeze	transposer1   r   r2   r3   permuter   r   r   log_softmax)r   r   _r   r   r   r   6   s   



8

zDeepSpeech.forward)r%   r&   r'   r   r   r   r   r   r      s"    )r   __all__r   Moduler   r   r   r   r   r   <module>   s    