o
    i'                     @   sl   d Z ddlZddlmZ ddlmZ ddlZddlZddlm	Z	 ddl
mZ ddlmZ G dd	 d	e	ZdS )
z2Parallel beam search module for online simulation.    N)Path)List)BatchBeamSearch)
Hypothesis)
end_detectc                
   @   s   e Zd ZdZdefddZdefddZdefd	d
ZdefddZ		dde
jdededee fddZde
jdedee fddZdS )BatchBeamSearchOnlineSimao  Online beam search implementation.

    This simulates streaming decoding.
    It requires encoded features of entire utterance and
    extracts block by block from it as it shoud be done
    in streaming processing.
    This is based on Tsunoo et al, "STREAMING TRANSFORMER ASR
    WITH BLOCKWISE SYNCHRONOUS BEAM SEARCH"
    (https://arxiv.org/abs/2006.14941).
    
asr_configc                 C   s  t |}d| _d| _d| _d}|jddd]}t|}d| v rPd|d  v r1|d d | _d|d  v r@|d d | _d|d  v rO|d d | _nd	| v rl|d	 }|du rlt	d
 	 W d   dS W d   n1 svw   Y  | jdu s| jdu s| jdu r|durt |}|jddd}t|}W d   n1 sw   Y  d| v r|d }|rd|v r|d | _|rd|v r|d | _|rd|v r|d | _dS dS dS dS dS )zSet config file for streaming decoding.

        Args:
            asr_config (str): The config file for asr training

        Nrzutf-8)encodingencoder_conf
block_sizehop_size
look_aheadconfigzPCannot find config file for streaming decoding: apply batch beam search instead.)
r   r   r   r   openyaml	safe_loadkeyslogginginfo)selfr   train_config_filer   fargsconfig_fileenc_args r   \/home/ubuntu/.local/lib/python3.10/site-packages/espnet/nets/batch_beam_search_online_sim.pyset_streaming_config   sT   
 

z-BatchBeamSearchOnlineSim.set_streaming_configr   c                 C   
   || _ dS )zvSet block size for streaming decoding.

        Args:
            block_size (int): The block size of encoder
        N)r   )r   r   r   r   r   set_block_sizeG      
z'BatchBeamSearchOnlineSim.set_block_sizer   c                 C   r   )zpSet hop size for streaming decoding.

        Args:
            hop_size (int): The hop size of encoder
        N)r   )r   r   r   r   r   set_hop_sizeO   r!   z%BatchBeamSearchOnlineSim.set_hop_sizer   c                 C   r   )zSet look ahead size for streaming decoding.

        Args:
            look_ahead (int): The look ahead size of encoder
        N)r   )r   r   r   r   r   set_look_aheadW   r!   z'BatchBeamSearchOnlineSim.set_look_ahead        xmaxlenratiominlenratioreturnc              
      s  d _  jr jr jrt j j }n|jd }d}||jd k r+|dd|}n|}|dkr7|jd }ntdt||d }t||d }t	
dt|jd   t	
dt|  t	
dt|   |}	g }
g }d}d}|rd}||jd k r|dd|}n|} ||	 ||k rt	dt|   |	|}||d kr |||||}	|jjd }g }|jt||jd f  jk}t|jd D ]2}|| r ||}|| q|s|j|d	f |j|d
d	f v r||jd k rd}d}q|dkr#tdd |D |r#t	
d|  d}nt|dkr4||jd k r4d}|ry jrU|t j t j |jd k rU|t j7 }n|jd }t	d| |dkrxt|
dkrx j rx|
}	|d8 }g }
nAd}|	}
 |||||}	||jd kr|D ]}|| qt|	dkrt	
d d}nt	dt|	  |d7 }||k s|szt|dd dd}t|dkrt	d |dk rg S  ||td|d S |d }|j D ] \}}t	
|dd j | dd| j |  dd|  qt	
d|j!d t	
d|j!t|j d t	
dt|   j"d
urOt	
d d!# fd"d|jdd	 D  d#  |S )$a  Perform beam search.

        Args:
            x (torch.Tensor): Encoded speech feature (T, D)
            maxlenratio (float): Input length ratio to obtain max output length.
                If maxlenratio=0.0 (default), it uses a end-detect function
                to automatically find maximum hypothesis lengths
            minlenratio (float): Input length ratio to obtain min output length.

        Returns:
            list[Hypothesis]: N-best decoding results

        Tr      zdecoder input length: zmax output length: zmin output length: Fz	position Nr$   c                 S   s   g | ]}|  qS r   )asdict).0lhr   r   r   
<listcomp>   s    z4BatchBeamSearchOnlineSim.forward.<locals>.<listcomp>zend detected at zGoing to next block: %dzno hypothesis. Finish decoding.zremained hypotheses: c                 S   s   | j S )N)score)r%   r   r   r   <lambda>   s    z2BatchBeamSearchOnlineSim.forward.<locals>.<lambda>)keyreversezOthere is no N-best results, perform recognition again with smaller minlenratio.g?z6.2fz * 3z = z for ztotal log probability: z.2fznormalized log probability: z"total number of ended hypotheses: zbest hypo:  c                    s   g | ]} j | qS r   )
token_list)r,   r%   r   r   r   r.      s    
)$conservativer   r   r   intshapenarrowmaxsizer   r   strinit_hypextenddebugsearchpost_processyseqtorcharangelengtheosrange_selectappendr   lensortedwarningforwardscoresitemsweightsr/   r5   join)r   r%   r&   r'   cur_end_frameprocess_idxhmaxlenminlenrunning_hyps	prev_hyps
ended_hypsprev_repeatcontinue_decodemove_to_next_blockbestn_batchlocal_ended_hypsis_local_eosihyp
nbest_hypskvr   r6   r   rO   _   s   



"
 

S
. z BatchBeamSearchOnlineSim.forwardhypsc                 C   sL   | j  D ]\}}t|dr|| t|dr#||j| |j|< qdS )a  Extend probabilities and states with more encoded chunks.

        Args:
            x (torch.Tensor): The extended encoder output feature
            hyps (Hypothesis): Current list of hypothesis

        Returns:
            Hypothesis: The extended hypothesis

        extend_probextend_stateN)scorersrQ   hasattrri   rj   states)r   r%   rh   rf   dr   r   r   r@      s   


zBatchBeamSearchOnlineSim.extendN)r$   r$   )__name__
__module____qualname____doc__r>   r   r9   r    r"   r#   rE   Tensorfloatr   r   rO   r@   r   r   r   r   r      s$    ,	
   r   )rr   r   pathlibr   typingr   rE   r   espnet.nets.batch_beam_searchr   espnet.nets.beam_searchr   espnet.nets.e2e_asr_commonr   r   r   r   r   r   <module>   s    