o
    wi                     @   s   d dl mZ d dlmZmZ d dlmZmZmZ d dl	Z	d dl
mZ d dlmZ eG dd dZeG d	d
 d
ZG dd deZdS )    )ABC)	dataclassfield)AnyOptionalTupleN)
DictConfig)loggingc                   @   sB   e Zd ZU dZdZeed< dZeed< dZe	ed< dZ
e	ed	< d
S )GraphIntersectDenseConfigz%Graph dense intersection config.
    g      4@search_beamg      $@output_beam   min_active_statesi'  max_active_statesN)__name__
__module____qualname____doc__r   float__annotations__r   r   intr    r   r   b/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/nemo/collections/asr/parts/k2/classes.pyr
      s   
 r
   c                   @   s   e Zd ZU dZdZeed< dZeed< dZ	e
e ed< dZeed	< ed
d dZeed< dZeed< dZeed< dZeed< dS )GraphModuleConfigzQConfig for graph modules.
    Typically used with graph losses and decoders.
    default	topo_typeTtopo_with_self_loopsNtoken_lmFintersect_prunedc                   C   s   t  S )N)r
   r   r   r   r   <lambda>.   s    zGraphModuleConfig.<lambda>)default_factoryintersect_confg        boost_coeffr   predictor_window_size   predictor_step_size)r   r   r   r   r   strr   r   boolr   r   r   r   r   r!   r
   r"   r   r#   r   r%   r   r   r   r   r   $   s   
 r   c                   @   sV   e Zd ZdZdd ZdefddZdejdejd	ejd
e	ejejejf fddZ
dS )
ASRK2Mixinac  k2 Mixin class that simplifies the construction of various models with k2-based losses.
    
    It does the following:
        -   Sets up the graph loss and decoder (methods _init_k2 and update_k2_modules).
        -   Registers external graphs, if needed.
        -   Augments forward(...) with optional graph decoding to get accurate predictions.
    c                 C   s   t | ds	tdt | jdr| jjdu rtd| jj| _| jdd}|dk| _| jrM| jjd	d}|du rAtd
| d| d|}|| jjd	< | | j dS )z
        k2-related initialization implementation.

        This method is expected to run after the __init__ which sets self._cfg
        self._cfg is expected to have the attribute graph_module_cfg
        _cfgz0self._cfg must be set before calling _init_k2().graph_module_cfgNz:self._cfg.graph_module_cfg must be set and cannot be None.criterion_typemlmapr   zVgraph_module_cfg.backend_cfg.token_lm is empty. It must be set for criterion_type == ``z%graph_module_cfg.backend_cfg.token_lm)	hasattr
ValueErrorr)   r*   getuse_graph_lmbackend_cfgregister_artifactupdate_k2_modules)selfr+   token_lm_pathr   r   r   _init_k2=   s    



zASRK2Mixin._init_k2	input_cfgc                 C   s0  | ` t| dr	| `t| dr| jjd }n| jjd }|jddo*|jdddv}|| j_	d	d
l
m} ||| jddd|dd|dd|dd	|jd| _ | j j}|dk| _|dd}|rv|dkrvtd| d| d d}|| _| jrd	dlm} ||ddddd|dd	|jd| _dS dS )a   
        Helper function to initialize or update k2 loss and transcribe_decoder.

        Args:
            input_cfg: DictConfig to take new parameters from. Schema is expected as in
                nemo.collections.asr.models.configs.k2_sequence_models_config.GraphModuleConfig
        transcribe_decoderjointr$   r   Tr   r   )forced_blankidentityr   )LatticeLossctc_reduction
mean_batchk2r+   r,   	loss_typectcsplit_batch_size)num_classes	reductionbackendr+   rB   rD   r*   r-   transcribe_trainingFz,You do not need to use transcribe_training=`z4` 
                            with criterion_type=`z,`. transcribe_training will be set to False.)ViterbiDecoderWithGraphr   1best)rE   rG   dec_typereturn_typereturn_ilabelsoutput_alignedrD   r*   N)lossr/   r:   r;   num_classes_with_blankdecoderr3   r1   _werremove_consecutive*nemo.collections.asr.losses.lattice_lossesr>   r)   r+   r2   r	   warningrH   *nemo.collections.asr.modules.graph_decoderrI   )r6   r9   rE   rS   r>   r+   rH   rI   r   r   r   r5   X   s\   







zASRK2Mixin.update_k2_modules	log_probsencoded_lengthgreedy_predictionsreturnc                 C   s2   | j r| jr	| jr| jj||d\}}}|||fS )a  
        k2-related post-processing parf of .forward()

        Args:
            log_probs: The log probabilities tensor of shape [B, T, D].
            encoded_length: The lengths of the acoustic sequence after propagation through the encoder, of shape [B].
            greedy_predictions: The greedy token predictions of the model of shape [B, T]

        Returns:
            A tuple of 3 elements -
            1) The log probabilities tensor of shape [B, T, D].
            2) The lengths of the acoustic sequence after propagation through the encoder, of shape [B].
            3) The greedy token predictions of the model of shape [B, T] (via argmax)
        )rW   log_probs_length)r2   trainingrH   r:   forward)r6   rW   rX   rY   _r   r   r   _forward_k2_post_processing   s
   
z&ASRK2Mixin._forward_k2_post_processingN)r   r   r   r   r8   r   r5   torchTensorr   r_   r   r   r   r   r(   4   s    ;r(   )abcr   dataclassesr   r   typingr   r   r   r`   	omegaconfr   
nemo.utilsr	   r
   r   r(   r   r   r   r   <module>   s   
