o
    }oiV                     @   sj  d dl Z d dlmZ d dlZd dlZd dlm  mZ d dl	m
Z
 d dlmZ d dlmZ d dlmZmZ d dlmZ d dlmZmZmZ d d	lmZmZ ed
gZej r`eed ej  rmeed ed
ej!fde"dedejdej#fddZ$dd Z%ej&j'ej&(ddd eD dd eD  ej&(dddgej&(dddgej&(dd gej&(d!d"gdejd#e)de)de"de"d!e"fd$d%Z*ej&j'ej&(dd&d eD d'd eD  ej&(dddgej&(dddgej&(dd gej&(d!d"gdejd#e)de)de"de"d!e"fd(d)Z+ej&j'ej&(dd*d eD d+d eD  ej&(dddgej&(dddgej&(dd gej&(d!d"gdejd#e)de)de"de"d!e"fd,d-Z,ej&j'ej&(dd.d eD d/d eD  ej&(dddgej&(dddgej&(dd gej&(d!d"gdejd#e)de)de"de"d!e"fd0d1Z-dS )2    N)Optional)	open_dict)tqdm)ASRModel)BatchedLabelLoopingState%GreedyBatchedLabelLoopingComputerBase)read_manifest)BatchedHyps
Hypothesisbatched_hyps_to_hypotheses)
load_audiomake_preprocessor_deterministiccpuzcuda:0mpsnum_samplesmodeldevicedtypec                 C   s   | d | }t  \ t| |  g g }}t|ddD ]}t|\}	}
||	 |t j|	jd t j	d qt j
jjj|ddj||d}t j|t j	d|}|||d\}}W d    ||fS 1 siw   Y  ||fS )	NzLoading audio files)descr   )r   T)batch_first)r   r   )input_signalinput_signal_length)torchno_gradr   evalr   r   appendtensorshapeint64nnutilsrnnpad_sequenceto)test_audio_filenamesr   r   r   r   audio_filepaths
all_inputsall_lengths
audio_fileaudio_tensor_input_batchlength_batchencoded_outputsencoded_length r/   j/home/ubuntu/.local/lib/python3.10/site-packages/tests/collections/asr/decoding/test_streaming_decoding.pyget_model_encoder_output)   s    



r1   c                 C   s2   dd | D }t |}t||||d\}}||fS )zFHelper function to get encoder outputs for a batch of manifest recordsc                 S   s   g | ]}|d  qS )audio_filepathr/   ).0recordr/   r/   r0   
<listcomp>F       z:get_batch_encoder_outputs_from_records.<locals>.<listcomp>)r$   r   r   r   )lenr1   )recordsr   r   	filenameslocal_batch_sizeencoder_outputencoder_output_lenr/   r/   r0   &get_batch_encoder_outputs_from_recordsD   s   
r=   zdevice,use_cuda_graph_decoderc                 C      g | ]}|d fqS Fr/   r3   r   r/   r/   r0   r5   Q   r6   r5   c                 C      g | ]}|j d kr|dfqS cudaTtyper@   r/   r/   r0   r5   Q       is_tdtFT
chunk_size      
batch_size   max_symbols
   use_cuda_graph_decoderc
              
   C   s8  |r|n|}
|
   |
j|d t|
jj}d|_t| ||j_	|	|j_
W d   n1 s1w   Y  |
| t|}|
jt| |d}dd |D }g }|
jjj}t  t  tdt||D ]v}t||||  |
|d\}}|jd }d}d}|d	d
}td|jd	 |D ]<}|| }tj||d}t||}t|t|}||dd||| f ||d\}}}|du r|}q|| q|dusJ |t|d|d qjW d   n1 sw   Y  W d   n1 sw   Y  g }|D ]}| |
j!"|j#$  q||ksJ dS )z*Test streaming decoding with batched stater   greedy_batchNaudiorK   c                 S      g | ]}|j qS r/   textr3   hypr/   r/   r0   r5   q       z>test_label_looping_streaming_batched_state.<locals>.<listcomp>r   r   r   rI      
fill_valuexout_lenprev_batched_staterK   )%r   r#   copydeepcopycfgdecodingstrategyr   greedyrO   rM   change_decoding_strategyr   
transcribestrabsolutedecoding_computerr   r   inference_moderanger7   r=   r   	transpose	full_likeminimummaximum
zeros_likemerge_extendr   r   	tokenizerids_to_text
y_sequencetolist)tmp_path_factoryan4_val_manifest_corrected%stt_en_fastconformer_transducer_largestt_en_fastconformer_tdt_larger   rO   rG   rH   rK   rM   r   decoding_cfgmanifesttranscriptionsref_transcriptsall_hypsrm   ir;   r<   r:   statebatched_hypstrest_lencurrent_lenbatched_hyps_chunkr*   streaming_transcriptsrX   r/   r/   r0   *test_label_looping_streaming_batched_stateN   s^   





 r   c                 C   r>   r?   r/   r@   r/   r/   r0   r5      r6   c                 C   rA   rB   rD   r@   r/   r/   r0   r5      rF   c
                 C   s  |r|n|}
|
   |
j|d t|
jj}d|_t| ||j_	|	|j_
W d   n1 s1w   Y  |
| t|}|
jt| |d}dd |D }g }|
jj}t  t m tdt||D ]\}t||||  |
|d\}}d}td|jd	 |D ]1}|| }tj||d
}t||}t|t|}||dddd||| f ||d\}q|D ]}|  q|| qiW d   n1 sw   Y  W d   n1 sw   Y  g }|D ]}||
j|j !  q||ksJ dS )z/Test streaming decoding with partial hypothesesrP   rQ   NrR   c                 S   rT   r/   rU   rW   r/   r/   r0   r5      rY   zCtest_label_looping_streaming_partial_hypotheses.<locals>.<listcomp>r   rZ   r[   r\   r;   encoded_lengthspartial_hypotheses)"r   r#   rc   rd   re   rf   rg   r   rh   rO   rM   ri   r   rj   rk   rl   r   r   rn   ro   r7   r=   r   rq   rr   rs   rt   clean_decoding_state_rv   r   rw   rx   ry   rz   )r{   r|   r}   r~   r   rO   rG   rH   rK   rM   r   r   r   r   r   r   
rnnt_inferr   r;   r<   hypsr   r   r   rX   r   r/   r/   r0   /test_label_looping_streaming_partial_hypotheses   sT   





 r   c                 C   r>   r?   r/   r@   r/   r/   r0   r5      r6   c                 C   rA   rB   rD   r@   r/   r/   r0   r5      rF   c
           +   
   C   sh  |r|n|}
|
   |
j|d t|
jj}d|_t| ||j_	|	|j_
W d   n1 s1w   Y  |
| t|}|
jt| |d}dd |D }dd tt|D }|
jjj}|t|k skJ dt  t  t|d| |
|d	\}}t||||  |
|d	\}}|jd
 |jd
 k rt|d|jd
 |jd
  f}tj||ddd|}d}|}|| }d}dd t|D }tt|}t|}d}||  }dk r|dddf tj||ddddf  }t ||dddf d }tj!||d} t | |} ||dd|f }!||!| |d\}"}#}t"|"d|d}$t#t$||$D ]\}%\}&}'|&du rH|'||%< q7|&%|' q7|| 7 }|| 8 }|&||dk tj'|dkddd ( ) }(|(D ]|})||) }&|||)  du r|&|||) < d||)< |r|| ||)< || ||)< |||)< d||)< |d7 }|d7 }||jd kr|t|k rt||||  |
|d	\}}|jd
 |jd
 k rt|d|jd
 |jd
  f}d}||7 }qpd}qp||  }dk sW d   n	1 sw   Y  W d   n	1 sw   Y  g }*|D ]}&|**|
j+,|&j-)  q||*ks2J dS )9Test streaming continuos decoding with partial hypothesesrP   rQ   NrR   c                 S   rT   r/   rU   rW   r/   r/   r0   r5      rY   zItest_label_looping_continuous_streaming_batched_state.<locals>.<listcomp>c                 S      g | ]}d qS Nr/   r3   r*   r/   r/   r0   r5          cBatch size should be less than the number of records in the manifest for continuous streaming test.rZ   r[   r   rI   Tc                 S   r   r   r/   r   r/   r/   r0   r5     r   r\   r^   rb   as_tupleF).r   r#   rc   rd   re   rf   rg   r   rh   rO   rM   ri   r   rj   rk   rl   ro   r7   rm   r   r   rn   r=   r   Fpadarange	unsqueezeexpandlistrt   anyrr   rq   r   	enumeratezipru   reset_state_by_masknonzeror   rz   r   rw   rx   ry   )+r{   r|   r}   r~   r   rO   rG   rH   rK   rM   r   r   r   r   r   r   rm   r;   r<   encoder_output_nextencoder_output_len_nextexpanded_batch_indicesnext_batch_inext_batch_global_inext_query_utterance_ihas_nextr   hyps_global_indicesencoder_output_tr   r   frame_indicesr   encoder_framesr   r*   hyps_continuationsr   rX   hyp_continuationfinished_decoding_indicesidxr   r/   r/   r0   5test_label_looping_continuous_streaming_batched_state   s   





 
*

 Kr   c                 C   r>   r?   r/   r@   r/   r/   r0   r5   V  r6   c                 C   rA   rB   rD   r@   r/   r/   r0   r5   V  rF   c
           %   
   C   s  |r|n|}
|
j |d t|
jj}d|_t| ||j_|	|j_	W d   n1 s-w   Y  |

| t|}|
jt| |d}dd |D }dd tt|D }|
jj}|t|k sfJ dt k t U t|d| |
|d	\}}t||||  |
|d	\}}|jd
 |jd
 k rt|d|jd
 |jd
  f}tj||ddd|}d}|}|| }d}dd t|D }tt|}t|}||  }dk r|dddf tj||ddddf  }t||dddf d }tj||d}t||}||dd|f  dd
} || ||d\}||7 }||8 }tj!|dkddd " # }!|!D ]w}"||" }#|#|||" < |r|#$  d||"< || ||"< || ||"< |||"< d||"< |d7 }|d7 }||jd kr|t|k rt||||  |
|d	\}}|jd
 |jd
 k rt|d|jd
 |jd
  f}d}||7 }q=d}q=||  }dk sW d   n	1 sw   Y  W d   n	1 sw   Y  |D ]}#|#dur|#$  qg }$|D ]}#|$%|
j&'|#j(#  q||$ks	J dS )r   rP   rQ   NrR   c                 S   rT   r/   rU   rW   r/   r/   r0   r5   u  rY   zNtest_label_looping_continuous_streaming_partial_hypotheses.<locals>.<listcomp>c                 S   r   r   r/   r   r/   r/   r0   r5   w  r   r   rZ   r[   r   rI   r   Tc                 S   r   r   r/   r   r/   r/   r0   r5     r   r\   r   r   F))r#   rc   rd   re   rf   rg   r   rh   rO   rM   ri   r   rj   rk   rl   ro   r7   r   r   rn   r=   r   r   r   r   r   r   r   rt   r   rr   rq   rp   r   r   rz   r   r   rw   rx   ry   )%r{   r|   r}   r~   r   rO   rG   rH   rK   rM   r   r   r   r   r   r   r   r;   r<   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rX   r   r/   r/   r0   :test_label_looping_continuous_streaming_partial_hypothesesS  s   




 
*
 G
r   ).rc   typingr   pytestr   torch.nn.functionalr   
functionalr   	omegaconfr   	tqdm.autor   nemo.collections.asr.modelsr   Lnemo.collections.asr.parts.submodules.transducer_decoding.label_looping_baser   r   /nemo.collections.asr.parts.utils.manifest_utilsr   +nemo.collections.asr.parts.utils.rnnt_utilsr	   r
   r   $tests.collections.asr.decoding.utilsr   r   r   DEVICESrC   is_availabler   r   float32intr   r1   r=   markwith_downloadsparametrizeboolr   r   r   r   r/   r/   r/   r0   <module>   s   



	
?	
9	
r	
