o
    }oi4                     @   s   d dl mZmZ d dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZ G dd de
eZG dd deeZG dd deeZG dd deeZdS )    )ListOptional)Trainer)
DictConfig)EncDecCTCModelBPE)EncDecCTCModel)EncDecRNNTBPEModel)EncDecRNNTModel)
ASRK2Mixin)PretrainedModelInfo	typecheck)loggingc                       sx   e Zd ZdZddedef fddZedee	e
  fdd	Zd
e	e f fddZe 				d fdd	Z  ZS )EncDecK2SeqModel3Encoder decoder models with various lattice losses.Ncfgtrainerc                    R   |j dd}|dkr|dkrtd| jj d| t j||d |   d S N	loss_typectcmmiClass z does not support `loss_type`=r   r   graph_module_cfgget
ValueError	__class____name__super__init___init_k2selfr   r   r   r    b/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/asr/models/k2_sequence_models.pyr        
   zEncDecK2SeqModel.__init__returnc                 C      dS z
        This method returns a list of pre-trained model which can be instantiated directly from NVIDIA's NGC cloud.

        Returns:
            List of available pre-trained models.
        Nr%   clsr%   r%   r&   list_available_models'      z&EncDecK2SeqModel.list_available_modelsnew_vocabularyc                    >   t  | | jrd| _td| jj d | | j	 dS a  
        Changes vocabulary used during CTC decoding process. Use this method when fine-tuning on from pre-trained model.
        This method changes only decoder and leaves encoder and pre-processing modules unchanged. For example, you would
        use it if you want to use pretrained encoder when fine-tuning on a data in another language, or when you'd need
        model to learn capitalization, punctuation and/or special characters.

        If new_vocabulary == self.decoder.vocabulary then nothing will be changed.

        Args:
            new_vocabulary: list with new vocabulary. Must contain at least 2 elements. Typically,             this is target alphabet.

        Returns: None

        N@With .change_vocabulary() call for a model with criterion_type=``, 
                a new token_lm has to be set manually: call .update_k2_modules(new_cfg) 
                or update .graph_module_cfg.backend_cfg.token_lm before calling this method.
r   change_vocabularyuse_graph_lmtoken_lmr   warninglosscriterion_typeupdate_k2_modulesr   r#   r/   r$   r%   r&   r5   1      z"EncDecK2SeqModel.change_vocabularyc                    *   t  j||||d\}}}| j|||dS a/  
        Forward pass of the model.

        Args:
            input_signal: Tensor that represents a batch of raw audio signals,
                of shape [B, T]. T here represents timesteps, with 1 second of audio represented as
                `self.sample_rate` number of floating point values.
            input_signal_length: Vector of length B, that contains the individual lengths of the audio
                sequences.
            processed_signal: Tensor that represents a batch of processed audio signals,
                of shape (B, D, T) that has undergone processing via some DALI preprocessor.
            processed_signal_length: Vector of length B, that contains the individual lengths of the
                processed audio sequences.

        Returns:
            A tuple of 3 elements -
            1) The log probabilities tensor of shape [B, T, D].
            2) The lengths of the acoustic sequence after propagation through the encoder, of shape [B].
            3) The greedy token predictions of the model of shape [B, T] (via argmax)
        )input_signalinput_signal_lengthprocessed_signalprocessed_signal_length)	log_probsencoded_lengthgreedy_predictionsr   forward_forward_k2_post_processingr#   r@   rA   rB   rC   rD   encoded_lenrF   r$   r%   r&   rH   M      zEncDecK2SeqModel.forwardNNNNNr   
__module____qualname____doc__r   r   r    classmethodr   r   r   r-   strr5   r   rH   __classcell__r%   r%   r$   r&   r          	r   c                       sx   e Zd ZdZddedef fddZedee	e
  fdd	Zd
edef fddZe 				d fdd	Z  ZS )EncDecK2SeqModelBPEJEncoder decoder models with Byte Pair Encoding and various lattice losses.Nr   r   c                    r   r   r   r"   r$   r%   r&   r    w   r'   zEncDecK2SeqModelBPE.__init__r(   c                 C   r)   r*   r%   r+   r%   r%   r&   r-   ~   r.   z)EncDecK2SeqModelBPE.list_available_modelsnew_tokenizer_dirnew_tokenizer_typec                    @   t  || | jrd| _td| jj d | | j	 dS a  
        Changes vocabulary of the tokenizer used during CTC decoding process.
        Use this method when fine-tuning on from pre-trained model.
        This method changes only decoder and leaves encoder and pre-processing modules unchanged. For example, you would
        use it if you want to use pretrained encoder when fine-tuning on a data in another language, or when you'd need
        model to learn capitalization, punctuation and/or special characters.

        Args:
            new_tokenizer_dir: Path to the new tokenizer directory.
            new_tokenizer_type: Either `bpe` or `wpe`. `bpe` is used for SentencePiece tokenizers,
                whereas `wpe` is used for `BertTokenizer`.

        Returns: None

        Nr2   r3   r4   r#   rY   rZ   r$   r%   r&   r5         z%EncDecK2SeqModelBPE.change_vocabularyc                    r>   r?   rG   rJ   r$   r%   r&   rH      rL   zEncDecK2SeqModelBPE.forwardrM   rN   rO   r%   r%   r$   r&   rW   t   rV   rW   c                       sX   e Zd ZdZddedef fddZedee	 fdd	Z
d
ee f fddZ  ZS )EncDecK2RnntSeqModelr   Nr   r   c                    f   |j dd}|j dd}|dks|dkr%td| jj d| d| t j||d |   d S 	Nr   rnntr:   mlr   z0 does not support 
            `criterion_type`=z with `loss_type`=r   r   r#   r   r   r   r:   r$   r%   r&   r          zEncDecK2RnntSeqModel.__init__r(   c                 C   r)   r*   r%   r+   r%   r%   r&   r-      r.   z*EncDecK2RnntSeqModel.list_available_modelsr/   c                    r0   r1   r4   r<   r$   r%   r&   r5      r=   z&EncDecK2RnntSeqModel.change_vocabularyrM   )r   rP   rQ   rR   r   r   r    rS   r   r   r-   r   rT   r5   rU   r%   r%   r$   r&   r_          	r_   c                       sX   e Zd ZdZddedef fddZedee	 fdd	Z
d
edef fddZ  ZS )EncDecK2RnntSeqModelBPErX   Nr   r   c                    r`   ra   r   rd   r$   r%   r&   r      re   z EncDecK2RnntSeqModelBPE.__init__r(   c                 C   r)   r*   r%   r+   r%   r%   r&   r-     r.   z-EncDecK2RnntSeqModelBPE.list_available_modelsrY   rZ   c                    r[   r\   r4   r]   r$   r%   r&   r5     r^   z)EncDecK2RnntSeqModelBPE.change_vocabularyrM   )r   rP   rQ   rR   r   r   r    rS   r   r   r-   rT   r5   rU   r%   r%   r$   r&   rg      rf   rg   N)typingr   r   lightning.pytorchr   	omegaconfr   *nemo.collections.asr.models.ctc_bpe_modelsr   &nemo.collections.asr.models.ctc_modelsr   +nemo.collections.asr.models.rnnt_bpe_modelsr   'nemo.collections.asr.models.rnnt_modelsr	   %nemo.collections.asr.parts.k2.classesr
   nemo.core.classes.commonr   r   
nemo.utilsr   r   rW   r_   rg   r%   r%   r%   r&   <module>   s   WW5