o
    wi%                     @   sX   d dl mZ d dlZd dlmZ d dlmZ d dlmZm	Z	m
Z
mZ G dd deZdS )    )OptionalN)
DictConfig)NeuralModule)LengthsTypeLogprobsType
NeuralTypePredictionsTypec                       s   e Zd ZdZedd Zedd Z								
	ddededededede	de
e f fddZdd ZdddZe dd Ze dd Z  ZS )ViterbiDecoderWithGraphaD  Viterbi Decoder with WFSA (Weighted Finite State Automaton) graphs.

    Note:
        Requires k2 v1.14 or later to be installed to use this module.

    Decoder can be set up via the config, and optionally be passed keyword arguments as follows.

    Examples:
        .. code-block:: yaml

            model:  # Model config
                ...
                graph_module_cfg:  # Config for graph modules, e.g. ViterbiDecoderWithGraph
                    split_batch_size: 0
                    backend_cfg:
                        topo_type: "default"       # other options: "compact", "shared_blank", "minimal"
                        topo_with_self_loops: true
                        token_lm: <token_lm_path>  # must be provided for criterion_type: "map"

    Args:
        num_classes: Number of target classes for the decoder network to predict.
            (Excluding the blank token).

        backend: Which backend to use for decoding. Currently only `k2` is supported.

        dec_type: Type of decoding graph to use. Choices: `topo` and `token_lm`, 
            with `topo` standing for the loss topology graph only 
            and `token_lm` for the topology composed with a token_lm graph.

        return_type: Type of output. Choices: `1best` and `lattice`.
            `1best` is represented as a list of 1D tensors.
            `lattice` can be of type corresponding to the backend (e.g. k2.Fsa).

        return_ilabels: For return_type=`1best`.
            Whether to return input labels of a lattice (otherwise output labels).

        output_aligned: For return_type=`1best`.
            Whether the tensors length will correspond to log_probs_length 
            and the labels will be aligned to the frames of emission 
            (otherwise there will be only the necessary labels).

        split_batch_size: Local batch size. Used for memory consumption reduction at the cost of speed performance.
            Effective if complies 0 < split_batch_size < batch_size.

        graph_module_cfg: Optional Dict of (str, value) pairs that are passed to the backend graph decoder.
    c                 C   s(   t | jrdndt t tdt dS )z3Returns definitions of module input ports.
        )BTD)r
   r   r   r   r
   )	log_probsinput_lengths)r   	_3d_inputr   tupler   self r   g/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/nemo/collections/asr/modules/graph_decoder.pyinput_typesH   s   z#ViterbiDecoderWithGraph.input_typesc                 C   s   dt dt iS )z4Returns definitions of module output ports.
        predictions)r
   r   )r   r   r   r   r   r   output_typesQ   s   z$ViterbiDecoderWithGraph.output_typesk2topo1bestTr   Nbackenddec_typereturn_typereturn_ilabelsoutput_alignedsplit_batch_sizegraph_module_cfgc	           
         s4  || _ || _|| _|| _|| _|dkrd| _n|dkrd| _n|dkr+td| dtd| |d	kr| jd
krBddlm	}	 n6| jdkrNddlm
}	 n*| jdkrZddlm}	 n| jdkrbt | jdkrptd| j dtd| j |	| j d | j |d| _n|dkrtd| jdk| _t   d S )Nr   FlatticeTnbestzreturn_type z is not supported at the momentzUnsupported return_type: r   r   r   )
CtcDecodertopo_rnnt_ali)RnntAlignertoken_lm)TokenLMDecoder	loose_alitlgz	dec_type zUnsupported dec_type:    )num_classesblankcfggtnz&gtn-backed decoding is not implemented	topo_rnnt)_blankr   r   r    r   return_latticesNotImplementedError
ValueError,nemo.collections.asr.parts.k2.graph_decodersr$   r&   r(   _decoderr   super__init__)
r   r,   r   r   r   r   r   r    r!   Decoder	__class__r   r   r8   W   s:   




z ViterbiDecoderWithGraph.__init__c                 C   s   | j | dS )z4Updates graph of the backend graph decoder.
        N)r6   update_graph)r   graphr   r   r   r<      s   z$ViterbiDecoderWithGraph.update_graphc                    sp  |d u r|d us|d ur|d u rt d| d| |d u r'd} fdd}nd} fdd}|jd } jdkr j|krg }g }	td| jD ]O}
|
}t| j |}||| }|||d | f }|r||| }|||d | f }|||||\}}~~n|||\}}~~||7 }|	|7 }	qIn|r|||||n|||\}}	t|t|	ksJ ||	fS )	Nz<Both targets and target_length have to be None or not None: z, Fc                    s    j j| |d j jdS )NFr2   r   r   )r6   decoder   r   )abr   r   r   <lambda>       z7ViterbiDecoderWithGraph._forward_impl.<locals>.<lambda>Tc              	      s    j j| |||ddddS )NFTr>   )r6   align)r@   rA   cdr   r   r   rB      rC   r   )RuntimeErrorshaper    rangeminmaxlen)r   r   log_probs_lengthtargetstarget_lengthrD   decode_func
batch_sizer   probs	batch_idxbeginendlog_probs_length_partlog_probs_parttarget_length_parttargets_partpredictions_part
probs_partr   r   r   _forward_impl   sH    

z%ViterbiDecoderWithGraph._forward_implc                 C   s   | j dkrtd| j  d| ||\}}tjdd |D |d jd}tt|| f| j	j
|d jd}tt|| fdj
|d jd}tt||D ]\}\}	}
|	||d || f< |
||d || f< qS|||fS )	NloosealizDecoder with dec_type=`z'` is not intended for regular decoding.c                 S   s   g | ]}t |qS r   )rL   ).0predr   r   r   
<listcomp>   s    z3ViterbiDecoderWithGraph.forward.<locals>.<listcomp>r   )deviceg      ?)r   rG   r\   torchtensorra   fullrL   rK   r1   to	enumeratezip)r   r   rM   r   rR   lengthspredictions_tensorprobs_tensorir_   probr   r   r   forward   s   
&
zViterbiDecoderWithGraph.forwardc           	      C   s   ||k|dk@ }t |s| jdkr| ||||}|S | || || || || }t|D ]"\}}|sR|d |t jdt jd |d |t jdt jd q0|S )Nr   r]   )dtyper+   )	rb   allr   r\   rf   insertemptyint32float)	r   r   rM   rN   rO   
len_enoughresultsrk   computedr   r   r   rD      s   	zViterbiDecoderWithGraph.align)r   r   r   TTr   N)NN)__name__
__module____qualname____doc__propertyr   r   strboolintr   r   r8   r<   r\   rb   no_gradrm   rD   __classcell__r   r   r:   r   r	      sD    /

	0
.
r	   )typingr   rb   	omegaconfr   nemo.core.classesr   nemo.core.neural_typesr   r   r   r   r	   r   r   r   r   <module>   s   