o
    Gi(                     @   s   d dl mZ d dl mZ d dlZd dlZd dlZd dlmZ d dlmZ d dlmZ d dlm	Z	 dd	l
mZ d d
lmZ G dd deZG dd deZdS )    )List)TupleN)Fsa)RaggedShape)RaggedTensor)Tensor   )index_select)RnntDecodingConfigc                   @   s,   e Zd ZdeddfddZdefddZdS )RnntDecodingStreamfsareturnNc                 C   s    || _ t|j| _|j| _dS )a  Create a new rnnt decoding stream.

        Every sequence(wave data) needs a decoding stream, this function is
        expected to be called when a new sequence comes. We support different
        decoding graphs for different streams.

        Args:
          graph:
            The decoding graph used in this stream.

        Returns:
          A rnnt decoding stream object, which will be combined into
          :class:`RnntDecodingStreams` to do decoding together with other
          sequences in parallel.
        N)r   _k2create_rnnt_decoding_streamarcsstreamdevice)selfr    r   B/home/ubuntu/.local/lib/python3.10/site-packages/k2/rnnt_decode.py__init__!   s   zRnntDecodingStream.__init__c                 C   s   | j  d| j dS )aReturn a string representation of this object

        For visualization and debug only.
        z, device : 
)r   r   r   r   r   r   __str__5   s   zRnntDecodingStream.__str__)__name__
__module____qualname__r   r   strr   r   r   r   r   r       s    r   c                   @   s   e Zd ZdZdee deddfddZdefdd	Z	de
eef fd
dZdeddfddZdddZ	ddee dedefddZdS )RnntDecodingStreamszSee https://github.com/k2-fsa/icefall/blob/master/egs/librispeech/ASR/pruned_transducer_stateless/beam_search.py  # noqa
    for how this class is used in RNN-T decoding.
    src_streamsconfigr   Nc                 C   sP   t |dksJ t || _|| _| jd j| _dd | jD }t||| _dS )a!  
        Combines multiple RnntDecodingStream objects to create a
        RnntDecodingStreams object, then all these RnntDecodingStreams can do
        decoding in parallel.

        Args:
          src_streams:
            A list of RnntDecodingStream object to be combined.
          config:
            A configuration object which contains decoding parameters like
            `vocab-size`, `decoder_history_len`, `beam`, `max_states`,
            `max_contexts` etc.

        Returns:
          Return a RnntDecodingStreams object.
        r   c                 S   s   g | ]}|j qS r   )r   ).0xr   r   r   
<listcomp>Y   s    z0RnntDecodingStreams.__init__.<locals>.<listcomp>N)lennum_streamsr    r   r   r   streams)r   r    r!   r'   r   r   r   r   B   s   
zRnntDecodingStreams.__init__c                 C   s<   d| j  d}t| j D ]}|d| d| j|  7 }q|S )r   znum_streams : r   zstream[z] : )r&   ranger    )r   sir   r   r   r   \   s   zRnntDecodingStreams.__str__c                 C   s
   | j  S )a=  
        This function must be called prior to evaluating the joiner network
        for a particular frame.  It tells the calling code for which contexts
        it must evaluate the joiner network.

        Returns:
          Return a two-element tuple containing a RaggedShape and a tensor.

          shape:
            A RaggedShape with 2 axes, representing [stream][context].

          contexts:
            A tensor of shape [tot_contexts][decoder_history_len], where
            tot_contexts == shape->TotSize(1) and decoder_history_len comes from
            the config, it represents the number of symbols in the context of
            the decoder network (assumed to be finite). It contains the token
            ids into the vocabulary(i.e. `0 <= value < vocab_size`).
            Its dtype is torch.int32.
        )r'   get_contextsr   r   r   r   r+   f   s   
z RnntDecodingStreams.get_contextslogprobsc                 C   s   | j | dS )ad  
        Advance decoding streams by one frame.

        Args:
          logprobs:
            A tensor of shape [tot_contexts][num_symbols], containing log-probs
            of symbols given the contexts output by `get_contexts()`. It
            satisfies `logprobs.Dim0() == shape.TotSize(1)`, shape is returned
            by `get_contexts()`.
        N)r'   advance)r   r,   r   r   r   r-   |   s   zRnntDecodingStreams.advancec                 C   s   | j   dS )aX  
        Terminate the decoding process of current RnntDecodingStreams object.
        It will update the decoding states and store the decoding results
        currently got to each of the individual streams.

        Note:
          We can not decode with this object anymore after calling
          terminate_and_flush_to_streams().
        N)r'   terminate_and_flush_to_streamsr   r   r   r   r.      s   
z2RnntDecodingStreams.terminate_and_flush_to_streamsF
num_framesallow_partialc                 C   s  t || jks	J | j||\}}t|}t }t| jD ]O}| j| j}|j	ddD ]@\}	}
|	|vrld}t
|
trHt||	}|
j}d}nt
|
tjsPJ |
jtjksXJ |
jdks_J tj}d}|||d||	< q,q| D ]\}	}t }d}t| jD ]}| j| j}| j}|| j}||||  }|| }t||	rt||	}
|d dkrt
|
tsJ t|
||d	}nCt
|
tsJ |
jdksJ |
jtjksJ |
j|ddd
\}}n"|d dkrtj|g| |d |d}nttj|df|d |d}|| q|d dkrt|}ntj j|dd}t!||	| qrt| jD ]}| j| j}|" D ]\}	}
t!||	|
 q1q%|S )a  
        Generate the lattice Fsa currently got.

        Note:
          The attributes of the generated lattice is a union of the attributes
          of all the decoding graphs. For example, if `self` contains three
          individual stream, each stream has its own decoding graphs, graph[0]
          has attributes attr1, attr2; graph[1] has attributes attr1, attr3;
          graph[2] has attributes attr3, attr4; then the generated lattice has
          attributes attr1, attr2, attr3, attr4.

        Args:
          num_frames:
            A List containing the number of frames we want to gather for each
            stream (note: the frames we have ever received for the corresponding
            stream). It MUST satisfy `len(num_frames) == self.num_streams`.
          allow_partial:
            If true and there is no final state active, we will treat all the
            states on the last frame to be final state.
            If false, we only care about the real final state in the
            decoding graph on the last frame when generating lattice.
            Default False.

        Returns:
          Return the lattice Fsa with all the attributes propagated.
          The returned Fsa has 3 axes with `fsa.dim0==self.num_streams`.
        F)include_scoresr   r      r   )fillerdtypetensor_typer5   )default_value)axisneed_value_indexesr4   )r4   r   )r7   )#r%   r&   r'   format_outputr   dictr(   r    r   named_tensor_attr
isinstancer   float
get_fillerr4   k2r   torchint32num_axesitemslistr   num_arcshasattrgetattrr	   indextensoremptyappendcatraggedsetattrnamed_non_tensor_attr)r   r/   r0   ragged_arcsout_mapr   tensor_attr_infor*   srcnamevaluer3   r4   r5   infovaluesstartr   rE   arc_map	new_value_r   r   r   r9      s    




z!RnntDecodingStreams.format_output)r   N)F)r   r   r   __doc__r   r   r
   r   r   r   r   r   r   r+   r-   r.   intboolr   r9   r   r   r   r   r   =   s*    


r   )typingr   r   r?   r@   r   r   r   r   r   opsr	   r
   objectr   r   r   r   r   r   <module>   s   