o
    wi                     @   st   d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZmZmZ dgZG dd de
eZdS )	    )OrderedDictN)	typecheck)
Exportable)NeuralModule)AcousticEncodedRepresentationLogprobsType
NeuralTypeLSTMDecoderc                       sj   e Zd ZdZedd Zedd Zd fd	d
	Ze dd Z	dddZ
edd Zedd Z  ZS )r	   a  
    Simple LSTM Decoder for ASR models
    Args:
        feat_in (int): size of the input features
        num_classes (int): the size of the vocabulary
        lstm_hidden_size (int): hidden size of the LSTM layers
        vocabulary (vocab): The vocabulary
        bidirectional (bool): default is False. Whether LSTMs are bidirectional or not
        num_layers (int): default is 1. Number of LSTM layers stacked
    c                 C      t dtdt iS )Nencoder_output)BDT)r   r   r   self r   f/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/nemo/collections/asr/modules/lstm_decoder.pyinput_types(      zLSTMDecoder.input_typesc                 C   r
   )Nlogprobs)r   r   r   )r   r   r   r   r   r   r   output_types,   r   zLSTMDecoder.output_typesNF   c                    s   t    |d ur|t|krtd| dt| || _|| _|d | _tj|||d|d| _	|r7d| n|}t
jj|| jd| _d S )NzeIf vocabulary is specified, it's length should be equal to the num_classes. Instead got: num_classes=z and len(vocabulary)=r   T)
input_sizehidden_size
num_layersbatch_firstbidirectional   )in_featuresout_features)super__init__len
ValueError_LSTMDecoder__vocabulary_feat_in_num_classesnnLSTM
lstm_layertorchLinearlinear_layer)r   feat_innum_classeslstm_hidden_size
vocabularyr   r   	__class__r   r   r!   0   s,   

zLSTMDecoder.__init__c                 C   s6   | dd}| |\}}| |}tjjj|ddS )Nr   r   )dim)	transposer)   r,   r*   r'   
functionallog_softmax)r   r   output_r   r   r   forwardH   s   
zLSTMDecoder.forward   c                 C   s*   t || j|t|  j}t|gS )zs
        Generates input examples for tracing etc.
        Returns:
            A tuple of input examples.
        )r*   randnr%   tonext
parametersdevicetuple)r   	max_batchmax_diminput_exampler   r   r   rD   O   s    
zLSTMDecoder.input_examplec                 C      | j S N)r$   r   r   r   r   r0   X      zLSTMDecoder.vocabularyc                 C   rE   rF   )r&   r   r   r   r   num_classes_with_blank\   rG   z"LSTMDecoder.num_classes_with_blank)NFr   )r   r;   )__name__
__module____qualname____doc__propertyr   r   r!   r   r:   rD   r0   rH   __classcell__r   r   r1   r   r	      s    



	
)collectionsr   r*   torch.nnr'   nemo.core.classes.commonr   nemo.core.classes.exportabler   nemo.core.classes.moduler   nemo.core.neural_typesr   r   r   __all__r	   r   r   r   r   <module>   s   