o
    }oiYY                     @   s  d dl Z d dlZd dlmZmZ d dlmZ d dlmZ d dl	Z	d dl
Z
d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZmZmZ d dlmZ d dlm Z  d dl!m"Z" d dl#m$Z$ e %e"p{e &e"Z'dd Z(e	) edddd Z*eddd.ddZ+eddd/ddZ,edddd Z-d d! Z.d"d# Z/d$d% Z0d0d&e1d'ee2eB  fd(d)Z3G d*d+ d+Z4G d,d- d-e$Z5dS )1    N)cached_property	lru_cache)Path)Optional)
DictConfig)ASRModel)RNNTDecoder	RNNTJoint)mixins)rnnt_beam_decoding)rnnt_greedy_decoding)tdt_beam_decoding)RNNTBPEDecodingRNNTDecodingRNNTDecodingConfig)
rnnt_utils)numba_utils)__NUMBA_MINIMUM_VERSION__)BaseTimestampsTestc                   C   s   g dS )N) abcdef. r   r   r   e/home/ubuntu/.local/lib/python3.10/site-packages/tests/collections/asr/decoding/test_rnnt_decoding.pychar_vocabulary*   s   r      )maxsizec                 C   sD   t tj| ddddd}G dd dtj}| }|| |jS )Nasr
tokenizersan4_wpe_128wpe)dirtypec                   @   s   e Zd Zdd ZdS )z!tmp_tokenizer.<locals>._TmpASRBPEc                 S   s   |S Nr   )self_
vocab_pathr   r   r   register_artifact4   s   z3tmp_tokenizer.<locals>._TmpASRBPE.register_artifactN)__name__
__module____qualname__r,   r   r   r   r   
_TmpASRBPE3   s    r0   )r   ospathjoinr
   ASRBPEMixin_setup_tokenizer	tokenizer)test_data_dircfgr0   asrbper   r   r   tmp_tokenizer.   s
   
r:         c                 C   s,   |dd}t d t|| d}|  |S )N   )pred_hiddenpred_rnn_layersr   )prednet
vocab_size)torchmanual_seedr   freeze)rA   decoder_output_sizeprednet_cfgdecoderr   r   r   get_rnnt_decoder<   s
   

rH   c                 C   s2   |||dd}t d t|| |d}|  |S )Nrelu)encoder_hiddenr>   joint_hidden
activationr   )
vocabulary)rB   rC   r	   rD   )rA   rM   encoder_output_sizerE   joint_output_shapejointnet_cfgjointr   r   r   get_rnnt_jointE   s   
rR   r=   c                 C   s   dd l }tj| ddddd}t E tj|dd}d	|jj	_
d|jj	_|  |j|d
dd\}}tj|tjdd}tjt|gtjd}|||d\}	}
W d    n1 s[w   Y  ||	|
fS )Nr   r"   testan4wavzcen3-fjlp-b.wavcpu)map_location        i>  T)r2   srmonodtype)input_signalinput_signal_length)librosar1   r2   r3   rB   no_gradr   from_pretrainedpreprocessor
featurizerditherpad_toevalloadtensorfloat32	unsqueezelenint32)data_dir
model_namer_   audio_filepathmodelaudiorY   r]   r^   encodedencoded_lenr   r   r   get_model_encoder_outputS   s   



rt   c                 C   s   | | }|S r(   )decode_hypothesis)hypsdecodingdecoded_hypsr   r   r   "decode_text_from_greedy_hypothesesj   s   
ry   c                 C   sB   g }g }| D ]}|j }||}||d  || q||fS )Nr   )n_best_hypothesesru   append)rv   rw   
hypothesesall_hypotheses	nbest_hypn_hypsrx   r   r   r   !decode_text_from_nbest_hypothesesp   s   
r   c                 C   s
  | dd}t| d\}}}| }t|d d }tj|j|jf|d|d|}|}	|}
t	 I ||	|
dd	 }t
||j\}}|d	 }td
|d  t|D ] \}}td|d d|j t|jd	ksiJ td|j t  qRW d    d S 1 s~w   Y  d S )N	beam_sizer=   nvidia/parakeet-tdt_ctc-110mmodel_defaultstdt_durationsF)r   return_best_hypothesis	durationsencoder_outputencoded_lengthsr   Beam search algorithm :search_type	Hyp indextext :	Timesteps)poprt   to_config_dictlistr   BeamTDTInferrG   rQ   rB   r`   r   rw   print	enumeratetextrk   	timestamp)r7   beam_configr   rp   rr   rs   model_configr   beamenc_outenc_lenrv   r*   all_hypsidxhyp_r   r   r   check_beam_decoding~   s8   	
"r   use_cuda_graph_decoderlm_pathc                 C   s  t | d\}}}| }tj|j|j|jjt|d d ddd||r&t|nd |r+dndd
}|}|}	t	
 C |||	d	d
 }
t|
|j}td t|D ]#\}}td|d  d|j  t|jd
kshJ td|j t  qNW d    d S 1 s}w   Y  d S )Nr   r   r   
   F      ?rX   )blank_indexr   max_symbols_per_steppreserve_alignmentspreserve_frame_confidencer   ngram_lm_modelngram_lm_alphar   r   zDecoding resultz
Hyp index r=   z
 | text : r   )rt   r   greedy_decodeGreedyBatchedTDTInferrG   rQ   	blank_idxr   strrB   r`   ry   rw   r   r   r   rk   r   )r7   r   r   rp   rr   rs   r   decoding_algor   r   rv   r   r   r   r   r   r   check_tdt_greedy_decoding   s6   

"r   c                   @   sx  e Zd Zejjdd Zejjdd Zejje	 ddejj
ejjdd Zejje	 ddejj
ejjejd	d
dgd	efddZejje	 ddejj
ejjejdddidddddddddddddddddddddgdd Zejje	 ddejj
ejjejdg d d!d" Zejje	 ddejj
ejjejdg d d#d$ Zejje	 ddejj
ejjejd%d
dgejd&d
dgd%ed&efd'd(Zejje	 ddejj
ejjejddddddddddddd)dgd*d+ Zejje	 ddejj
ejjejddddd)d,d-gd.d/ Zd0S )1TestRNNTDecodingc                 C   sH   t  }t }tt|d}tt|d}t||||d}|d us"J d S NrA   decoding_cfgrG   rQ   rM   r   r   rH   rk   rR   r   r)   r8   vocabrG   rQ   rw   r   r   r   test_constructor      z!TestRNNTDecoding.test_constructorc                 C   sH   t  }|j}tt|d}tt|d}t||||d}|d us"J d S Nr   r   rG   rQ   r6   )r   r   rH   rk   rR   r   )r)   r:   r8   r   rG   rQ   rw   r   r   r   test_constructor_subword   r   z)TestRNNTDecoding.test_constructor_subwordz>RNNTLoss has not been compiled with appropriate numba version.reasonc                 C   s*  t |d\}}}tj|j|j|jjd ddd}|}|}t k |||dd }t||j	}	|	d }	|	j
d us:J td|	j tt|	j
D ]8}
g }tt|	j
|
 D ] }|	j
|
 | \}}t|sfJ t|smJ |t| qTtd	|
 d
|  qGt  W d    d S 1 sw   Y  d S )N!stt_en_conformer_transducer_smallr=      Tr   r   r   r   r   TextTokens at timestamp  = )rt   r   GreedyRNNTInferrG   rQ   num_classes_with_blankrB   r`   ry   rw   
alignmentsr   r   rangerk   	is_tensorr{   int)r)   r7   rp   rr   rs   r   r   r   rv   hyptt_uulogplabelr   r   r   (test_greedy_decoding_preserve_alignments   s4   

"z9TestRNNTDecoding.test_greedy_decoding_preserve_alignmentsloop_labelsTFc              	   C   s  t |d\}}}tj|j|j|jjd dd|d}tj|j|j|jjd ddd}|}|}	t  |||	dd }
t	|
|j
d }|||	dd }t	||j
d }|jd	usYJ |jd	us`J |j|jkshJ t|jt|jkstJ tt|jD ]H}g }tt|j| D ]:}|j| | \}}t|sJ t|sJ |j| | \}}||ksJ tj||d
d
dsJ |t| qq{W d	   d	S 1 sw   Y  d	S )zFTest batched greedy decoding using non-batched decoding as a referencer   r=   r   T)r   r   r   r   r   r   r   Ng-C6?)atolrtol)rt   r   GreedyBatchedRNNTInferrG   rQ   r   r   rB   r`   ry   rw   r   r   rk   r   r   allcloser{   r   )r)   r7   r   rp   rr   rs   search_algoetalon_search_algor   r   rv   r   etalon_hyps
etalon_hypr   r   r   r   r   etalon_logpetalon_labelr   r   r   0test_batched_greedy_decoding_preserve_alignments   sX   	
	

"zATestRNNTDecoding.test_batched_greedy_decoding_preserve_alignmentsr   r   greedydefaultr;   )r   r   alsdr   )r   alsd_max_target_lenr   tsd   )r   tsd_max_sym_exp_per_stepr   maes)r   maes_num_stepsmaes_expansion_betar   r=   c                 C   s0  | dd}t|d\}}}tj|j|jf|ddd|}|}|}	tj|jjd tj	d}
t
  |||	dd	 }t||j\}}|d	 }|d	 }|jd usRJ |d
 dkrjt|t|d t|	d	  ksjJ td|d
  t|D ]\}}td|d d|j tt|j|	d	  dksJ tt|jD ]U}g }tt|j| D ] }|j| | \}}t|sJ t|sJ |t| qt|dkr|d |
ksJ |d d D ]}||
ksJ qtd| d|  qt  t|jd	ksJ td|j t  quW d    d S 1 sw   Y  d S )Nr   r=   r   FT)r   r   r   r[   r   r   r   r   r   r   r   r   r;   r   r   r   )r   rt   r   BeamRNNTInferrG   rQ   rB   rh   r   rl   r`   r   rw   r   rk   r   floatr   r   r   absr   r   r{   r   )r)   r7   r   r   rp   rr   rs   r   r   r   blank_idrv   r   r   r   r   r   r   r   r   r   tokenr   r   r   +test_rnnt_beam_decoding_preserve_alignments6  sZ   	
$$z<TestRNNTDecoding.test_rnnt_beam_decoding_preserve_alignmentszmodel_name, decoding_strategy))r   r   )r   greedy_batch)r   r   )r   r   )r   r   )r   r   c           
      C   s   t ||\}}}t|jj}||d< d|d< d|d< t||j|j|jd}|j||dd}	t	|	d t
r@t|	d d | d S t|	d | d S )NstrategyTr   compute_timestampsr   return_hypothesesr   )rt   r   r8   rw   r   rG   rQ   r6   rnnt_decoder_predictions_tensor
isinstancer   r   check_subword_timestamps)
r)   r7   decoding_strategyrn   rp   rr   rs   r8   rw   rv   r   r   r   (test_subword_decoding_compute_timestamps  s   z9TestRNNTDecoding.test_subword_decoding_compute_timestampsc                 C   s   t ||\}}}t|jj}||d< d|d< d|d< dd |jjD }t||j|j|d}	|	j	||dd}
t
|
d	 trHt|
d	 d	 |	 d S t|
d	 |	 d S )
Nr   Tr   r   c                 S   s   g | ]}|d  qS )r   r   ).0r   r   r   r   
<listcomp>  s    zJTestRNNTDecoding.test_char_decoding_compute_timestamps.<locals>.<listcomp>r   r   r   )rt   r   r8   rw   r6   r   r   rG   rQ   r   r   r   r   check_char_timestamps)r)   r7   r   rn   rp   rr   rs   r8   r   rw   rv   r   r   r   %test_char_decoding_compute_timestamps  s   z6TestRNNTDecoding.test_char_decoding_compute_timestampsr   use_lmc                 C   s&   t |d }t|||r|nd d d S )NzBasr/kenlm_ngram_lm/parakeet-tdt_ctc-110m-libri-1024.kenlm.tmp.arpa)r   r   )r   r   )r)   r7   r   r   kenlm_model_pathr   r   r   test_tdt_greedy_decoding  s   	
z)TestRNNTDecoding.test_tdt_greedy_decodingr<   c                 C   s   t || d S r(   )r   )r)   r7   r   r   r   r   test_tdt_beam_decoding  s   z'TestRNNTDecoding.test_tdt_beam_decodingg333333?)r   r   r   r   r   c                 C   s6   t jddd tj|ddd}||d< t|| d S )Nkenlmz/Skipping test because 'kenlm' is not installed.r   r"   kenlm_ngram_lmz/parakeet-tdt_ctc-110m-libri-1024.kenlm.tmp.arpar   )pytestimportorskipr1   r2   r3   r   )r)   r7   r   r   r   r   r   !test_tdt_beam_decoding_with_kenlm  s   z2TestRNNTDecoding.test_tdt_beam_decoding_with_kenlmN)r-   r.   r/   r  markunitr   r   skipifNUMBA_RNNT_LOSS_AVAILABLEwith_downloadsr   parametrizeboolr   r   r   r   r   r   r  r   r   r   r   r      s    

$2=r   c                       s   e Zd ZdZdd Ze fddZe fddZe fdd	Ze	d
d Z
e	dd Ze	dd Zejj fddZejj fddZ  ZS )TestRNNTTimestampszBRNNT-specific timestamp tests that inherit from BaseTimestampsTestc                 C   s&   t |}|D ]	}|d g|d< q|S )Nchar)copydeepcopy)r)   offsetsresultoffsetr   r   r   _convert_offsets  s   
z#TestRNNTTimestamps._convert_offsetsc                       |  t jS r(   )r  superchar_offsets_charsr)   	__class__r   r   r  "     z%TestRNNTTimestamps.char_offsets_charsc                    r  r(   )r  r  char_offsets_wper  r  r   r   r  &  r  z#TestRNNTTimestamps.char_offsets_wpec                    r  r(   )r  r  char_offsets_bper  r  r   r   r  *  r  z#TestRNNTTimestamps.char_offsets_bpec                 C   s<   t  }t }tt|d}tt|d}t||||d}|S r   r   r   r   r   r   decoding_char.  s   z TestRNNTTimestamps.decoding_charc                 C   s@   t  }| jj}tt|d}tt|d}t|||| jd}|S r   )r   r:   r   rH   rk   rR   r   r   r   r   r   decoding_subword_wpe7  s   z'TestRNNTTimestamps.decoding_subword_wpec                 C   s@   | j j}t }tt|d}tt|d}t|||| j d}|S r   )bpe_tokenizerr   r   rH   rk   rR   r   )r)   r   r8   rG   rQ   rw   r   r   r   decoding_subword_bpe@  s   z'TestRNNTTimestamps.decoding_subword_bpec                       || _ t   d S r(   )r:   r  test_word_offsets_subword_wper)   r:   r  r   r   r#  I     z0TestRNNTTimestamps.test_word_offsets_subword_wpec                    r"  r(   )r:   r  -test_word_offsets_subword_wpe_other_delimiterr$  r  r   r   r&  N  r%  z@TestRNNTTimestamps.test_word_offsets_subword_wpe_other_delimiter)r-   r.   r/   __doc__r  propertyr  r  r  r   r  r  r!  r  r  r  r#  r&  __classcell__r   r   r  r   r    s&    


r  )r<   )Nr<   r<   r<   r(   )6r  r1   	functoolsr   r   pathlibr   typingr   r  rB   	omegaconfr   nemo.collections.asr.modelsr   nemo.collections.asr.modulesr   r	   !nemo.collections.asr.parts.mixinsr
   %nemo.collections.asr.parts.submodulesr   r   r   r   3nemo.collections.asr.parts.submodules.rnnt_decodingr   r   r    nemo.collections.asr.parts.utilsr   nemo.core.utilsr   nemo.core.utils.numba_utilsr   .tests.collections.asr.decoding.test_timestampsr   numba_cpu_is_supportednumba_cuda_is_supportedr	  r   fixturer:   rH   rR   rt   ry   r   r   r  r   r   r   r  r   r   r   r   <module>   sT   
!!  [