o
    }oiK                     @   s`  d dl Z d dlZd dlmZmZ d dlmZ d dlZd dlZd dl	Z	d dl
mZmZ d dlmZ d dlmZ d dlmZmZmZmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ ej dddd Z!dZ"ej dddd Z#ej dddd Z$dd Z%e  edddd Z&G dd dZ'G dd deZ(G d d! d!Z)G d"d# d#Z*dS )$    N)cached_property	lru_cache)Path)
DictConfig	open_dict)ASRModel)mixins)CTCBPEDecodingCTCBPEDecodingConfigCTCDecodingCTCDecodingConfig)NGramGPULanguageModel)ConfidenceConfig)
Hypothesis)Dskip_cuda_python_test_if_cuda_graphs_conditional_nodes_not_supported)BaseTimestampsTestmodule)scopec                 C   s   t j| dS )Nz asr/test/an4/wav/cen3-mjwl-b.wav)ospathjoin)test_data_dir r   d/home/ubuntu/.local/lib/python3.10/site-packages/tests/collections/asr/decoding/test_ctc_decoding.py
audio_file(   s   r   z!nvidia/stt_en_conformer_ctc_smallc                 C   sX   t |d }tj|sJ d| | d|j d }tj|dd|  | S )NzBasr/kenlm_ngram_lm/parakeet-tdt_ctc-110m-libri-1024.kenlm.tmp.arpazLM file not found: lmz.nemoi   )
vocab_size)	r   r   r   existsmktempnamer   	from_filesave_to)tmp_path_factoryr   lm_pathlm_nemo_pathr   r   r   kenlm_model_path0   s
   r%   c                  C   s   t jtdd} |   | S )Ncpu)
model_namemap_location)r   from_pretrained	CTC_MODELeval)modelr   r   r   	ctc_model9   s   r-   c                   C   s   g dS )N) abcdef.r   r   r   r   r   char_vocabulary@   s   r6      )maxsizec                 C   sD   t tj| ddddd}G dd dtj}| }|| |jS )Nasr
tokenizersan4_wpe_128wpe)dirtypec                   @   s   e Zd Zdd ZdS )z!tmp_tokenizer.<locals>._TmpASRBPEc                 S   s   |S Nr   )self_
vocab_pathr   r   r   register_artifactJ   s   z3tmp_tokenizer.<locals>._TmpASRBPE.register_artifactN)__name__
__module____qualname__rC   r   r   r   r   
_TmpASRBPEI   s    rG   )r   r   r   r   r   ASRBPEMixin_setup_tokenizer	tokenizer)r   cfgrG   asrbper   r   r   tmp_tokenizerD   s
   
rM   c                   @   s8  e Zd Zejjdd Zejjdd Zejjdd Zejjej	ddd	gej	d
dd	gdd Z
ejjdd Zejjej	ddd	gej	d
dd	gejjdd Zejjej	ddd	gej	d
dd	gej	ddd	gej	ddd	gej	dedejedejjej  dddgej	dedejedejjej  dddgdd Zejjej	d
dd	gej	ddd	gej	dedejedejjej  dddgej	dedejedejjej  dddgdd ZdS ) TestCTCDecodingc                 C   s(   t  }t }t||d}|d usJ d S Ndecoding_cfg
vocabularyr   r6   r   r@   rK   vocabdecodingr   r   r   test_constructorS   s   z TestCTCDecoding.test_constructorc                 C   s"   t  }t||d}|d usJ d S )NrQ   rJ   )r
   r	   )r@   rM   rK   rV   r   r   r   test_constructor_subwordZ   s   z(TestCTCDecoding.test_constructor_subwordc                 C   s   t dd}t }t||d}d\}}tt d }tj|||fd}tjd||gd}t % |j||dd	d
}	dd |	D }
|
D ]	}t	|t
sKJ qBW d    d S 1 sWw   Y  d S )NgreedystrategyrP            sizelowhighrb   TFfold_consecutivereturn_hypothesesc                 S      g | ]}|j qS r   text.0hypr   r   r   
<listcomp>q       zETestCTCDecoding.test_char_decoding_greedy_forward.<locals>.<listcomp>)r   r6   r   lentorchrandnrandintno_gradctc_decoder_predictions_tensor
isinstancestr)r@   rK   rU   rV   BTVinput_signallength
hypothesestextsrk   r   r   r   !test_char_decoding_greedy_forward`   s    

"z1TestCTCDecoding.test_char_decoding_greedy_forward
alignmentsFT
timestampsc                 C   s@  t d||d}t }t||d}d\}}tt d }tj|||fd}	tjd||gd}
t e |j|	|
ddd	}t	|D ]N\}}t
|tsJJ t|jsRJ t
|jtsZJ |r|jd uscJ t
|jtskJ t|jd
 |
| ksxJ t|jd |
| ksJ |rt|| q?W d    d S 1 sw   Y  d S )NrZ   r\   preserve_alignmentscompute_timestampsrP   r]   r`   ra   rc   Trf   r   )r   r6   r   rq   rr   rs   rt   ru   rv   	enumeraterw   r   	is_tensor
y_sequencerk   rx   r   tupler   check_char_timestamps)r@   r   r   rK   rU   rV   ry   rz   r{   r|   r}   hypsidxrn   r   r   r   ,test_char_decoding_greedy_forward_hypothesesv   s2   
"z<TestCTCDecoding.test_char_decoding_greedy_forward_hypothesesc                 C   s   t dd}t||d}d\}}|jjjd }tj|||fd}tjd||gd}t % |j||dd	d
}	dd |	D }
|
D ]	}t	|t
sHJ q?W d    d S 1 sTw   Y  d S )NrZ   r[   rX   r]   r`   ra   rc   TFrf   c                 S   ri   r   rj   rl   r   r   r   ro      rp   zHTestCTCDecoding.test_subword_decoding_greedy_forward.<locals>.<listcomp>)r
   r	   rJ   r   rr   rs   rt   ru   rv   rw   rx   )r@   rM   rK   rV   ry   rz   r{   r|   r}   r~   r   rk   r   r   r   $test_subword_decoding_greedy_forward   s   

"z4TestCTCDecoding.test_subword_decoding_greedy_forwardc                 C   s:  t d||d}t||d}d\}}|jjjd }tj|||fd}	tjd||gd}
t e |j|	|
ddd	}t	|D ]N\}}t
|tsGJ t|jsOJ t
|jtsWJ |r|jd us`J t
|jtshJ t|jd
 |
| ksuJ t|jd |
| ksJ |rt|| q<W d    d S 1 sw   Y  d S )NrZ   r   rX   r]   r`   ra   rc   Trf   r   )r
   r	   rJ   r   rr   rs   rt   ru   rv   r   rw   r   r   r   rk   rx   r   r   rq   r   check_subword_timestamps)r@   rM   r   r   rK   rV   ry   rz   r{   r|   r}   r   r   rn   r   r   r   /test_subword_decoding_greedy_forward_hypotheses   s0   
"z?TestCTCDecoding.test_subword_decoding_greedy_forward_hypothesespreserve_frame_confidencelength_is_nonelogprobs_devicer&   cudazCUDA required for test.reason)markslength_devicec                 C   s  t d||t|dd}t||d}	d|_t||d}
td d\}}|	jjjd }tj|||f|d}d	|d d d
|	jjjf< d	|d d d|	jjjf< |rQd }n
tj	d||g|d}t
 y |	j||ddd}|
j||ddd}t|t|  kr|ksJ  J t||D ]F\}}t|j|j dksJ t|j|jksJ |r|j|jksJ |rt|jd
 |jd
 ksJ t|jd |jd ksJ qW d    d S 1 sw   Y  d S )NrZ   )r   )r\   r   r   confidence_cfgrX   greedy_batchr`   r]   rb   devicei  r   rd   re   rb   r   Trf   h㈵>)r
   r   r	   r\   rr   manual_seedrJ   r   rs   rt   inference_moderv   rq   zipabsscoreallr   	timestampr   )r@   rM   r   r   r   r   r   r   rK   unbatched_decodingbatched_decodingry   rz   r{   r|   r}   r   batched_hypsrn   batched_hypr   r   r   test_batched_decoding_logprobs   sJ   )

$"z.TestCTCDecoding.test_batched_decoding_logprobslabels_devicec                 C   st  t d|d}t||d}d|_t||d}td d\}	}
|jjjd }tj||	|
f|d}|jjj|d d df< |jjj|d d df< |rJd }n
tjd|
|	g|d	}t X |j	||d
d
d}|j	||d
d
d}t
|t
|  krz|	ks}J  J t||D ]%\}}t|j|j dksJ t|j|jksJ |r|j|jksJ qW d    d S 1 sw   Y  d S )NrZ   )r\   r   rX   r   r`   r]   r   r   r   Trf   r   )r
   r	   r\   rr   r   rJ   r   rt   r   rv   rq   r   r   r   r   r   r   )r@   rM   r   r   r   r   rK   r   r   ry   rz   r{   input_labelsr}   r   r   rn   r   r   r   r   test_batched_decoding_labels!  s:   

$"z,TestCTCDecoding.test_batched_decoding_labelsN)rD   rE   rF   pytestmarkunitrW   rY   r   parametrizer   r   pleasefixmer   rr   r   paramskipifr   is_availabler   r   r   r   r   r   rN   R   s    





5

rN   c                       s`   e Zd ZdZedd Zedd Zedd Zej	j
 fdd	Zej	j
 fd
dZ  ZS )TestCTCTimestampszACTC-specific timestamp tests that inherit from BaseTimestampsTestc                 C   s   t  }t }t||d}|S rO   rS   rT   r   r   r   decoding_chare  s   zTestCTCTimestamps.decoding_charc                 C      t dd}t|| jd}|S NT)r   rX   )r
   r	   rM   r@   rK   rV   r   r   r   decoding_subword_wpel     
z&TestCTCTimestamps.decoding_subword_wpec                 C   r   r   )r
   r	   bpe_tokenizerr   r   r   r   decoding_subword_bper  r   z&TestCTCTimestamps.decoding_subword_bpec                       || _ t   d S r?   )rM   supertest_word_offsets_subword_wper@   rM   	__class__r   r   r   x     z/TestCTCTimestamps.test_word_offsets_subword_wpec                    r   r?   )rM   r   -test_word_offsets_subword_wpe_other_delimiterr   r   r   r   r   }  r   z?TestCTCTimestamps.test_word_offsets_subword_wpe_other_delimiter)rD   rE   rF   __doc__r   r   r   r   r   r   r   r   r   __classcell__r   r   r   r   r   b  s    


r   c                   @   s<   e Zd Zejjejjejjej	
  dddd ZdS ) TestCTCGreedyDecodingWithNGPU_LMzTest is only GPU-based decodingr   c           
      C   sd  t d}||}|j|gd d}t|jj}t|jj}||j	d< d|j	d< |
| W d    n1 s9w   Y  |j|gd d}	|d j|	d jksRJ t|d j|	d j dksbJ t|jj}||j	d< d|j	d< W d    n1 s}w   Y  |
| |j|gd d}	|d j|	d jksJ t|d j|	d j dksJ |
| d S )	Nr   num_workersngram_lm_modelg        ngram_lm_alphar   MbP?g      $@)rr   r   to
transcribecopydeepcopyrK   rV   r   rZ   change_decoding_strategyrk   r   r   )
r@   r   r%   r-   r   r,   gt_hypdecoding_configrK   lm_hypr   r   r   test_ctc_decoding_gpulm  s*   
	


 

 z8TestCTCGreedyDecodingWithNGPU_LM.test_ctc_decoding_gpulmN)rD   rE   rF   r   r   with_downloadsr   r   rr   r   r   r   r   r   r   r   r     s
    r   c                   @   sP   e Zd ZdZejjejjej	
  ddejdg ddefddZdS )	TestCTCGreedyDecodingCudaGrpahszJ
    Tests CudaGraphs implementations from CTC models greedy decoding
    z!CUDA decoder can run only on CUDAr   
force_mode)	no_graphsno_while_loops
full_graphc                 C   s  |dkrt   td}||}t|jj}t|jj}||j	d< d|j	d< d|j	d< |
| W d   n1 s=w   Y  |j|gdd	}	d
d |	D }
dd |	D }dd |	D }d|jjd d< |
|jj |jjj|d |j|gdd	}dd |D }dd |D }dd |D }tt|
D ]f}t|
| t|| ksJ || tj|| ddksJ d| || || ksJ d| t|
| || }|dksJ dt|
| || D ]\}}||krtd| td| td| qq|
| dS )a  
        Compares pure Pytorch and with three modes of statefull implementations for double floating point precision.
            1. Pure pytorch, but statefull implementation: no_graphs
            2. With CudaGrpahs: no_while_loops and full_graph.
        r   r   r   g?r   Fallow_cuda_graphsNr   c                 S   ri   r   rj   rl   r   r   r   ro     rp   zITestCTCGreedyDecodingCudaGrpahs.test_stated_stateless.<locals>.<listcomp>c                 S   ri   r   r   rl   r   r   r   ro     rp   c                 S   ri   r   r   rl   r   r   r   ro     rp   TrZ   )modec                 S   ri   r   rj   rl   r   r   r   ro     rp   c                 S   ri   r   r   rl   r   r   r   ro     rp   c                 S   ri   r   r   rl   r   r   r   ro     rp   g{Gz?)r   zScores mismatch for batch_idx z"Timestamps mismatch for batch_idx r   zGCuda graph greedy decoder should match original decoder implementation.zErroneous samples in batch:zOriginal transcript:zNew transcript:)r   rr   r   r   r   r   rK   rV   r   rZ   r   r   force_cuda_graphs_moderangerq   r   approxjiwerwerr   print)r@   r   r%   r-   r   r   r,   r   rK   actual_hypothesesactual_transcriptsactual_scoresactual_timestampscudagraph_hypothesescudagraph_transcriptscudagraph_scorescudagraph_timestamps	batch_idxr   actualfastr   r   r   test_stated_stateless  sT   	









z5TestCTCGreedyDecodingCudaGrpahs.test_stated_statelessN)rD   rE   rF   r   r   r   r   r   rr   r   r   r   rx   r   r   r   r   r   r     s    r   )+r   r   	functoolsr   r   pathlibr   r   r   rr   	omegaconfr   r   nemo.collections.asr.modelsr   !nemo.collections.asr.parts.mixinsr   2nemo.collections.asr.parts.submodules.ctc_decodingr	   r
   r   r   ?nemo.collections.asr.parts.submodules.ngram_lm.ngram_lm_batchedr   5nemo.collections.asr.parts.utils.asr_confidence_utilsr   +nemo.collections.asr.parts.utils.rnnt_utilsr   !nemo.core.utils.cuda_python_utilsr   .tests.collections.asr.decoding.test_timestampsr   fixturer   r*   r%   r-   r6   rM   rN   r   r   r   r   r   r   r   <module>   sB   





  !$