o
    Gi                     @   s   d Z ddlmZ ddlZddlmZ ddlmZ ddlm	Z	 ddl
mZ dd	lmZ 	
				d!dejdejde	dededededede	fddZ	d"de	dede	fddZde	deee  fdd ZdS )#z8
This file contains various functions for CTC decoding.
    )ListN   )intersect_dense_pruned)DenseFsaVec)Fsashortest_path)RaggedTensor         '  log_problog_prob_lendecoding_graphsearch_beamoutput_beammin_active_statesmax_active_statessubsampling_factorreturnc                 C   s   | j dks
J | j|j dksJ |j| d|dks&J | j|jf| d}tt|t|| g 	tj
}	t| |	|d d}
t||
||||d}|S )a4  Get the decoding lattice from a decoding graph and  log_softmax output.
    Args:
      log_prob:
        Output from a log_softmax layer of shape ``(N, T, C)``.
      log_prob_len:
        A tensor of shape ``(N,)`` containing number of valid frames from
        ``log_prob`` before padding.
      decoding_graph:
        An Fsa, the decoding graph. It can be either an ``HLG`` or an ``H``.
        You can use :func:`ctc_topo` to build an ``H``.
      search_beam:
        Decoding beam, e.g. 20.  Smaller is faster, larger is more exact
        (less pruning). This is the default value; it may be modified by
        `min_active_states` and `max_active_states`.
      output_beam:
         Beam to prune output, similar to lattice-beam in Kaldi.  Relative
         to best path of output.
      min_active_states:
        Minimum number of FSA states that are allowed to be active on any given
        frame for any given intersection/composition task. This is advisory,
        in that it will try not to have fewer than this number active.
        Set it to zero if there is no constraint.
      max_active_states:
        Maximum number of FSA states that are allowed to be active on any given
        frame for any given intersection/composition task. This is advisory,
        in that it will try not to exceed that but may not always succeed.
        You can use a very large number if no constraint is needed.
      subsampling_factor:
        The subsampling factor of the model.
    Returns:
      An FsaVec containing the decoding result. It has axes [utt][state][arc].
       r   r   )allow_truncate)r   r   r   r   )ndimshapesizetorchstackarangezeroscputtoint32r   r   )r   r   r   r   r   r   r   r   
batch_sizesupervision_segmentdense_fsa_veclattice r(   =/home/ubuntu/.local/lib/python3.10/site-packages/k2/decode.pyget_lattice   s<   *
	r*   Tr'   use_double_scoresc                 C   s   t | |d}|S )a;  Get the best path from a lattice.

    Args:
      lattice:
        The decoding lattice returned by :func:`get_lattice`.
      use_double_scores:
        True to use double precision floating point in the computation.
        False to use single precision.
    Return:
      An FsaVec containing linear paths.
    )r+   r   )r'   r+   	best_pathr(   r(   r)   one_best_decodingb   s   r-   
best_pathsc                 C   s   t | jtr&| jd}| j |j}|d}|d}t||j}n| j d}t|| j}|d}|j	dks@J |
 S )a  Extract aux_labels from the best-path FSAs and remove 0s and -1s.
    Args:
      best_paths:
        An Fsa with best_paths.arcs.num_axes() == 3, i.e.
        containing multiple FSAs, which is expected to be the result
        of `shortest_path` (otherwise the returned values won't
        be meaningful).

    TODO:
      Also return timestamps of each label.

    Returns:
      Returns a list of lists of int, containing the label sequences we
      decoded.
    r   r      )
isinstance
aux_labelsr	   remove_values_leqarcsr   composeremove_axisvaluesnum_axestolist)r.   r1   	aux_shaper(   r(   r)   get_aux_labelsu   s   


r:   )r
   r   r   r   r   )T)__doc__typingr   r   autogradr   r&   r   fsar   fsa_algor   raggedr	   Tensorfloatintr*   boolr-   r:   r(   r(   r(   r)   <module>   sR   	
S
