o
    wiwF                    @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
mZ d dlZd dlZd dlmZ d dlmZmZmZmZ d dlmZmZ d dlmZmZmZmZ d d	lmZmZm Z m!Z!m"Z" d d
l#m$Z$ d dl%m&Z&m'Z'm(Z( d dl)m*Z* d dl+m,Z, d dl-m.Z. dee/ de,fddZ0G dd dZ1G dd dZ2G dd dZ3G dd dZ4G dd dZ5G dd dZ6G dd  d Z7G d!d" d"Z8G d#d$ d$Z9G d%d& d&Z:dS )'    N)deepcopy)List)Mockpatch)
DictConfig)BLEUBLEU_TOKENIZER_get_bleu_tokenizers_from_cuts_move_dimension_to_the_front)ConstraintParserMultiTaskMetric)WERword_error_rateword_error_rate_detailword_error_rate_per_utt)AbstractCTCDecodingCTCBPEDecodingCTCBPEDecodingConfigCTCDecodingCTCDecodingConfig)AbstractMultiTaskDecoding)AbstractRNNTDecodingRNNTBPEDecodingRNNTDecoding)
Hypothesis)CharTokenizer) assert_dataclass_signature_match
vocabularyreturnc              	   C   sf   t dttddd | D d td}W d    n1 s"w   Y  t|dt|jd |S )	Nzpathlib.Path.open
c                 S   s   g | ]}t |qS  )repr).0charr    r    c/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/tests/collections/asr/test_asr_metrics.py
<listcomp>2       z8build_char_tokenizer_with_vocabulary.<locals>.<listcomp>return_valuea_path_which_will_not_be_used	tokenizer)
vocab_size)r   r   ioStringIOjoinr   setattrr+   )r   char_tokenizerr    r    r$   $build_char_tokenizer_with_vocabulary1   s
   (
r1   c                	   @   sD  e Zd Zdgeej dg ZeeZd5de	de
de
dejfdd	Zde	de
dejfd
dZde	de	de
fddZejjdd Zejjejdddgejdddgdd Zejjejdddgdd Zejjejdddgdd Zejjejdddgejdddgdd Zejjejdddgejdddgdd  Zde	de	de
fd!d"Zd#d$ Zde	de	dede
fd%d&Zejjejdddgejdddgd'd( Zejjejdddgd)d* Zejjd+d, Z ejjd-d. Z!ejjd/d0 Z"ejjd1d. Z!ejjd2d3 Z#d4S )6TestWordErrorRate 'Ftxtuse_tokenizeras_logprobsr   c                    s   |rj j}j |}ntj}tfddttjD   fdd|D }g }d}|D ]}||kr=|| n
|| || |}q1t	|
d}	|sV|	S |	tj}	tjjj|	d |d}
|

d}
|
S )Nc                       g | ]	} j | |fqS r    r   r"   iselfr    r$   r%   E       z<TestWordErrorRate.__string_to_ctc_tensor.<locals>.<listcomp>c                       g | ]} | qS r    r    r"   cchar_to_indr    r$   r%   F   r&   r   )num_classes)r0   r+   text_to_idslenr   dictrangeappendtorchTensor	unsqueezetoint64nn
functionalone_hot)r=   r5   r6   r7   blank_idstring_in_id_formctc_listprev_idrA   tensor
new_tensorr    rC   r=   r$   __string_to_ctc_tensor>   s*   
 


z(TestWordErrorRate.__string_to_ctc_tensorc                    sT   |r	j |}ntfddttjD   fdd|D }t|dS )Nc                    r8   r    r9   r:   r<   r    r$   r%   `   r>   zBTestWordErrorRate.__reference_string_to_tensor.<locals>.<listcomp>c                    r?   r    r    r@   rB   r    r$   r%   a   r&   r   )	r0   rF   rH   rI   rG   r   rK   rL   rM   )r=   r5   r6   rT   r    rY   r$   __reference_string_to_tensor[   s
    z.TestWordErrorRate.__reference_string_to_tensor
prediction	referencec           	      C   sx   |  ||}| ||}|jdkr|dd |dd ||d |tt|gd | \}}}| 	 }|
 S )Nr      predictionspredictions_lengthstargetstargets_lengths)(_TestWordErrorRate__string_to_ctc_tensor._TestWordErrorRate__reference_string_to_tensorbatch_dim_index
transpose_rK   rW   rG   computedetachcpuitem)	r=   werr\   r]   r6   predictions_tensortargets_tensorres_r    r    r$   get_werd   s   
zTestWordErrorRate.get_werc                 C   sj  t dgdgddksJ t dgdgddksJ t dgdgddks$J t dgd	gddks0J t dgd
gddks<J t dgdgddksHJ tdgdgdd dksVJ tdgdgdd dksdJ tdgdgdd dksrJ tdgd	gdd dksJ tdgd
gdd dksJ tdgdgdd dksJ tdgdgdtddtdtdtdfksJ tddgddgddksJ tdgdgddksJ tdgdgddksJ tdgd
gdddksJ tdgdgddgdfksJ tddgddgdtddgdfksJ td
dgddgdddgdfksJ td
dgddgddddgdfks3J d S )Ncatcot
hypotheses
references      ?GPUzG P U      @ducati motorcycle
motorcycleducuti motorcycle      ?za B ca b cUUUUUU?r    infgpu)       @r^   rw   rw           )rw   r^   r   r   rw   )ry   r^   r   r   rw   T)ru   rv   use_cer)rw      r   rw   r   katr   gUUUUUU?g?UUUUUU?g333333?)r   r   floatr   r<   r    r    r$   test_wer_functionu   sF   
*


z#TestWordErrorRate.test_wer_functionrf   r   r^   test_wer_bpeTc                 C   s   |  dd|dksJ |  dd|dksJ |  dd|dks!J |  dd|dks,J |  dd	|d
ks7J t|  dd|d dk sFJ d S Nrr   rs   rw   r   zg p ury   rz   r{   r|   r}   za f cr~   r   ư>)get_wer_ctcabsr=   rf   r   r    r    r$   test_wer_metric_simple   s   "z(TestWordErrorRate.test_wer_metric_simplec                    sz    fdd}t dD ]0}tdd}tdd}||}||}| r:t j|||dt|g|gd dk s:J q
d	S )
<This test relies on correctness of word_error_rate function.c                       d  fddt| D S )Nr   c                 3   "    | ]}t d  jV  qdS r   Nrandomchoicer.   r   r"   rp   r<   r    r$   	<genexpr>        zXTestWordErrorRate.test_wer_metric_randomized.<locals>.__random_string.<locals>.<genexpr>r.   rI   lengthr<   r    r$   __random_string      zETestWordErrorRate.test_wer_metric_randomized.<locals>.__random_string   r^      )r\   r]   r   rt   r   N)rI   r   randintstripr   r   r   r=   r   !_TestWordErrorRate__random_stringtest_idn1n2s1s2r    r<   r$   test_wer_metric_randomized   s    z,TestWordErrorRate.test_wer_metric_randomizedc                 C   s   ddi}|rt || j}t|dd}nt|| j }t|dd}| jd|dd   	 }|g dks8J |j
|}|g d	ksFJ |j
|}|dksRJ d S )
NstrategygreedyFr   rr   )r6   r      r^      )rA   at)r   r0   r   r   r   copyrd   intnumpytolistdecodingdecode_ids_to_tokensdecode_ids_to_str)r=   r   decoding_configr   rl   tokenstokens_decodedstr_decodedr    r    r$   test_wer_metric_decode   s   z(TestWordErrorRate.test_wer_metric_decodec           	      C   P  d|d}t t|| jdd}| jd|dd }|dkr#|dd	 d|j_|jj|dd
}|d }t	|t
s:J |dkrB|d n
|d d dd d f }|jtg d  dks]J |jdksdJ |jdkskJ |jd |k svJ |jdks}J tj|jd	|  gtjd}|jj||dd}|d }t	|t
sJ |jdksJ d S Nr   )r   rf   Fr   rr   T)r7   r   r^   return_hypothesesr   r   dtype)decoder_lengthsr   )r   r   r   rd   r   rg   r   preserve_alignmentsctc_decoder_predictions_tensor
isinstancer   
y_sequencerK   rW   sumscoretext
alignmentsallr   shapelong	r=   rf   r   r   rl   rW   hypsampler   r    r    r$   !test_wer_metric_return_hypothesis   (   
& z3TestWordErrorRate.test_wer_metric_return_hypothesisc           	      C   r   r   )r   r   r0   rd   r   rg   r   r   r   r   r   r   rK   rW   r   r   r   r   r   r   r   r   r   r    r    r$   )test_wer_metric_subword_return_hypothesis   r   z;TestWordErrorRate.test_wer_metric_subword_return_hypothesisc                 C   s   t tdg |dgd}|r%t | jjjt| j|| jjtd}t|dd}nt t	| j
| j
 || jtd}t|dd}| ||}| ||}||d |tt	|gd | \}	}
}
|	  }	|	 S )	Nrw   r   r   r   r'   )rS   r*   r   r   specFr   )rS   
labels_mapr   r   r   r_   )r   r   r0   r*   r+   r   ids_to_textr   r   rG   r   r   (decode_token_to_str_with_vocabulary_mockr   rd   re   rK   rW   rh   ri   rj   rk   )r=   r\   r]   r   #ctc_decoder_predictions_tensor_mockr   rl   rm   rn   ro   rp   r    r    r$   r     s>   zTestWordErrorRate.get_wer_ctcc                    s   d  fdd|D S )Nr   c                       g | ]} j | qS r    r9   r"   id_r<   r    r$   r%   B      zNTestWordErrorRate.decode_token_to_str_with_vocabulary_mock.<locals>.<listcomp>r.   )r=   idsr    r<   r$   r   A  s   z:TestWordErrorRate.decode_token_to_str_with_vocabulary_mockc                 C   s   t tdg |dgd}|r&t | jjjt| j|| jjtd}t||dd}nt t	| j
| j
 || jtd}t||dd}| ||}|jdkrN|dd	 | ||}	|jdkr_|	dd	 ||	d |tt	|gd
 | \}
}}|
  }
|
 S )Nrw   r   r'   )rS   r*   rnnt_decoder_predictions_tensorr   r   F)rf   r   )rS   r   r   r   r   r   r^   r_   )r   r   r0   r*   r+   r   r   r   r   rG   r   r   r   r   re   rf   rg   rd   rK   rW   rh   ri   rj   rk   )r=   r\   r]   rf   r   $rnnt_decoder_predictions_tensor_mockr   rl   rn   rm   ro   rp   r    r    r$   get_wer_rnntD  sF   

zTestWordErrorRate.get_wer_rnntc                 C   s   |  dd||dksJ |  dd||dksJ |  dd||dks$J |  dd||dks0J |  dd	||d
ks<J t|  dd||d dk sLJ d S r   )r   r   r   r    r    r$   test_rnnt_wer_metric_simplen  s   $z-TestWordErrorRate.test_rnnt_wer_metric_simplec                    s|    fdd}t dD ]1}tdd}tdd}||}||}| r;t j||d|dt|g|gd d	k s;J q
d
S )r   c                    r   )Nr   c                 3   r   r   r   r   r<   r    r$   r     r   z]TestWordErrorRate.test_rnnt_wer_metric_randomized.<locals>.__random_string.<locals>.<genexpr>r   r   r<   r    r$   r   ~  r   zJTestWordErrorRate.test_rnnt_wer_metric_randomized.<locals>.__random_stringr   r^   r   r   )r\   r]   rf   r   rt   r   N)rI   r   r   r   r   r   r   r   r    r<   r$   test_rnnt_wer_metric_randomizedy  s    z1TestWordErrorRate.test_rnnt_wer_metric_randomizedc           	      C   sx  ddt | j}}}td tj|||tjd}tjd||gtjd}||td|dgd < t }t	|| jd}|j
||dd}|d }t|jtjsPJ |jtj|tjdks]J |jd	ksdJ t |jdksmJ |jd u stJ tddd
}t	|| jd}|j
||dd}|d }t|jtjsJ |jtj|tjdksJ |jd	ksJ t |jdksJ |jd usJ d S )Nr^      r   r   sizer   r9   Tr   r   r   compute_timestamps   )rG   r   rK   manual_seedrandnfloat32r   int32r   r   r   r   r   rL   r   rW   r   	timestampr   	r=   BTVdecoder_outputsdecoder_lensdecoding_cfgr   r   r    r    r$   test_char_decoding_logprobs  s.   
z-TestWordErrorRate.test_char_decoding_logprobsc           	      C   v  dd| j j}}}td tj|||tjd}tjd||gtjd}||td|dgd < t }t	|| j d}|j
||dd}|d }t|jtjsOJ |jtj|tjdks\J |jd	kscJ t|jdkslJ |jd u ssJ tddd
}t	|| j d}|j
||dd}|d }t|jtjsJ |jtj|tjdksJ |jd	ksJ t|jdksJ |jd usJ d S Nr^   r   r   r   r   r*   Tr   r   r   r   r0   r+   rK   r   r   r   r   r   r   r   r   r   r   rL   r   rW   r   rG   r   r   r   r    r    r$   test_subword_decoding_logprobs  .   
z0TestWordErrorRate.test_subword_decoding_logprobsc           
      C   s  ddt | j}}}td tjd|d ||gtjd}tjd||gtjd}||td|dgd < t }t|| jd}|j	||dd}|d }t
|jtjsTJ |jtj|tjdksaJ |jd	kshJ t |jdksqJ |jd u sxJ tddd
}t|| jd}tt |j	||dd}	W d    n1 sw   Y  tddd
}t|| jd}|j	||dd}|d }t
|jtjsJ |jtj|tjdksJ |jd	ksJ t |jdksJ |jd u sJ d S )Nr^   r   r   r   r9   Tr   r   r   r   Fr   )rG   r   rK   r   r   r   r   r   r   r   r   r   rL   r   rW   r   r   r   pytestraises
ValueError
r=   r   r   r   r   r   r   r   r   rp   r    r    r$   test_char_decoding_labels  s8   
z+TestWordErrorRate.test_char_decoding_labelsc           	      C   r   r   r   r   r    r    r$   r    r  c           
      C   s  dd| j j}}}td tjd|d ||gtjd}tjd||gtjd}||td|dgd < t }t|| j d}|j	||dd}|d }t
|jtjsSJ |jtj|tjdks`J |jd	ksgJ t|jdkspJ |jd u swJ tddd
}t|| j d}tt |j	||dd}	W d    n1 sw   Y  tddd
}t|| j d}|j	||dd}|d }t
|jtjsJ |jtj|tjdksJ |jd	ksJ t|jdksJ |jd u sJ d S )Nr^   r   r   r   r   Tr   r   r   r   Fr   )r0   r+   rK   r   r   r   r   r   r   r   r   r   rL   r   rW   r   rG   r   r   r  r  r  r  r    r    r$   test_subword_decoding_labels  s8   
z.TestWordErrorRate.test_subword_decoding_labelsN)F)$__name__
__module____qualname__liststringascii_lowercaser   r1   r0   strboolrK   rL   rd   re   rq   r  markunitr   parametrizer   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r    r    r    r$   r2   9   s\    	
0%*


&
r2   c                   @   s0   e Zd ZdZejjdd Zejjdd ZdS )TestBLEUHelperFunctionszTest BLEU helper functionsc                 C   s2   t  }i |_||_|g}t|}|dgksJ dS )z)Test handling cuts without BLEU tokenizerN)r   customfirst_non_padding_cutr	   )r=   cutcuts
tokenizersr    r    r$   4test_get_bleu_tokenizers_from_cuts_missing_tokenizer?  s   zLTestBLEUHelperFunctions.test_get_bleu_tokenizers_from_cuts_missing_tokenizerc                 C   sZ   t ddd}t|d}|jdksJ t|d}|jdksJ t|d}|jdks+J d	S )
zTest moving tensor dimensions   
      r^   )r  r  r  )r  r  r  r   )r  r  r  N)rK   r   r
   r   )r=   rW   movedr    r    r$    test_move_dimension_to_the_frontK  s   


z8TestBLEUHelperFunctions.test_move_dimension_to_the_frontN)	r	  r
  r  __doc__r  r  r  r  r  r    r    r    r$   r  <  s    
r  c                   @   s  e Zd ZdZdgeej dg g d ZeeZ	d4ddZ
ded	ejfd
dZejjejdddgejdg ddd Zejjdd Zejjdd Zejjejdddgdd Zejjdd Zejjdd Zejjdd Zejjdd  Zejjejd!g d"d#d$ Zejjd%d& Zejjd'd( Zejjd)d* Zejjejd!g d"d+d, Zejjd-d. Z ejjd/d& Zejjd0d( Zejjd1d* Zejjejd!g d"d2d, Zd3S )5TestBLEUMetriczTest BLEU metric functionalityr3   r4   u   你u   好u   世u   界u   朋u   友ctcc                    s   d}|dkr t td} fdd|_t tdg ddgd	|_|S |d
kr>t td} fdd|_t tdg ddgd	|_|S |dkr\t td} fdd|_t tdg ddgd	|_|S t	d| d)zCreate mock decoding instanceNr#  r   c                       d  fdd| D S )Nr   c                    r   r    r9   r   r<   r    r$   r%   i  r   ITestBLEUMetric.create_mock_decoding.<locals>.<lambda>.<locals>.<listcomp>r   r   r<   r    r$   <lambda>i      z5TestBLEUMetric.create_mock_decoding.<locals>.<lambda>rw   hello worldr   r'   rnntc                    r%  )Nr   c                    r   r    r9   r   r<   r    r$   r%   o  r   r&  r   r'  r<   r    r$   r(  o  r)  	multitaskc                    r%  )Nr   c                    r   r    r9   r   r<   r    r$   r%   u  r   r&  r   r'  r<   r    r$   r(  u  r)  z`decode_type:` z+ is invalid type for `create_mock_decoding')
r   r   r   r   r   r   r   r   decode_predictions_tensor	TypeError)r=   decode_typer   r    r<   r$   create_mock_decodingd  s.   


z#TestBLEUMetric.create_mock_decodingr5   r   c                    sB   t fddttjD   fdd|D }t|dS )z"Convert reference string to tensorc                    r8   r    r9   r:   r<   r    r$   r%     r>   z?TestBLEUMetric.__reference_string_to_tensor.<locals>.<listcomp>c                    r?   r    r    r@   rB   r    r$   r%     r&   r   )rH   rI   rG   r   rK   rW   rM   )r=   r5   rT   r    rY   r$   r[   }  s    z+TestBLEUMetric.__reference_string_to_tensorrf   r   r^   r/  )r#  r+  r,  c                 C   sX   |  |}t||dddd}|j|ksJ |j|ksJ |jdks#J |jdus*J dS )z7Test BLEU metric initialization with different decoders13ar   F)r   rf   bleu_tokenizern_gram	lowercaseN)r0  r   rf   r   r3  decode)r=   rf   r/  r   bleur    r    r$   test_bleu_initialization  s   

z'TestBLEUMetric.test_bleu_initializationc                 C   s   |  d}t|dd}d}t|dt| j}tddg}| d}||d	}td
d
g}t	dg ddt	dg ddg|j
_|j||||d |j
  dS )z$Test basic BLEU update functionalityr#  r1  r   r2  r  d   P   Z   r*  r^      rw   r   hello earthr_   N)r0  r   rK   r   rG   r   rW   +_TestBLEUMetric__reference_string_to_tensorrepeatr   r   r(   updateassert_called_once)r=   r   r6  
batch_sizer`   ra   rb   rc   r    r    r$   test_bleu_update_basic  s$   

z%TestBLEUMetric.test_bleu_update_basicc                 C   s|   |  d}t|dd}tddt| j}tjdtjd}tjddtjd}tjdtjd}|j||||d |j	  d	S )
z'Test BLEU update with empty predictionsr#  r1  r8  r   r9  r   2   r_   N)
r0  r   rK   emptyrG   r   r   r@  r   assert_not_calledr=   r   r6  r`   ra   rb   rc   r    r    r$   "test_bleu_update_empty_predictions  s   
z1TestBLEUMetric.test_bleu_update_empty_predictionsc                 C   s   |  d}t||dd}d}d}t| j}|dkr*t|||}td|||f}nt|||}td|||f}tddg}	td	dg}
td
g ddtd
g ddg|j	_
|j||	||
d |j	  dS )z7Test BLEU update with different batch dimension indicesr#  r1  )r   rf   r2  r  rD  r   (   -   #   rw   testr   r   r_   N)r0  r   rG   r   rK   r   r   rW   r   r   r(   r@  rA  )r=   rf   r   r6  rB  
time_stepsr+   r`   rb   ra   rc   r    r    r$   %test_bleu_update_different_batch_dims  s,   

z4TestBLEUMetric.test_bleu_update_different_batch_dimsc                 C   s   |  d}t|ddd}t }tdi|_||_t }tdi|_||_||g}d}t|dt| j	}t
dd	g}| d
}	|	|d}	t
ddg}
tdg d
dtdg ddg|j_|j|||	|
|d |j  dS )z-Test BLEU with different tokenizers from cutsr#  Tr1  )r   check_cuts_for_tokenizersr2  zhr  r9  r:  r;  rL  r^   r   rw   r   u   测试r`   ra   rb   rc   r  N)r0  r   r   r   r  r  rK   r   rG   r   rW   r>  r?  r   r   r(   r@  rA  )r=   r   r6  cut1cut2r  rB  r`   ra   rb   rc   r    r    r$   test_bleu_with_cuts_tokenizers  s4   



z-TestBLEUMetric.test_bleu_with_cuts_tokenizersc           	      C   s   |  d}t|dd}t g}d}t|dt| j}tddg}tdt| j|d	f}td
dg}t	j
tdd |j|||||d W d   dS 1 sRw   Y  dS )z-Test BLEU with mismatched cuts and batch sizer#  T)r   check_cuts_for_bleu_tokenizersr  r9  r:  r;  r   rD  rI  rJ  z/BLEU metrics configured for multiple tokenizers)matchrQ  N)r0  r   r   rK   r   rG   r   rW   r   r  r  AssertionErrorr@  )	r=   r   r6  r  rB  r`   ra   rb   rc   r    r    r$   test_bleu_cuts_length_mismatch  s"   
"z-TestBLEUMetric.test_bleu_cuts_length_mismatchc           	      C   s   |  d}t|ddd}d}tddt| j}tdg}| |}t|jd g	d}t
d	g |d
g|j_|j||||d |jdd}|d  d	ksRJ dS )z3Test BLEU calculation with perfect prediction matchr#  r1  r^   r   r2  r3  hellor9  r:  r   rw   r   r_   Freturn_all_metricsr6  N)r0  r   rK   r   rG   r   rW   r>  r   rM   r   r   r(   r@  rh   rk   )	r=   r   r6  perfect_textr`   ra   rb   rc   resultr    r    r$   test_bleu_perfect_match*  s"   

z&TestBLEUMetric.test_bleu_perfect_matchc                 C   s   |  d}t|dd}tddt| j}tdg}| d}tdg}td	g d
dg|j	_
|j||||d |jdd}|d  dksIJ dS )z,Test BLEU calculation with no matching wordsr#  r1  r8  r^   r9  r:  r*  r<  rw   zcat dogr   r_   Fr[  r6  r   Nr0  r   rK   r   rG   r   rW   r>  r   r   r(   r@  rh   rk   r=   r   r6  r`   ra   rb   rc   r^  r    r    r$   test_bleu_no_matchH  s   

z!TestBLEUMetric.test_bleu_no_matchr3  )r^   r  r   r   c                 C   s<  |  d}t|d|d}tddt| j}tdg}| d}t|jd g}t	dg d	d
g|j
_|j||||d |jddd }d\}	}
}}d}|dkrT|	}n0|dkr`t|	|
 }n$|dkrm|	|
 | d }n|dkr||	|
 | | d }ntd| dt| | dk sJ d|dd| ddS )z0Test BLEU calculation with partial word matches.r#  r1  rY  r^   r9  r:  the quick brown fox jumpsrw   zthe quick brown fox runsr   r_   Fr[  r6  )g?g      ?r   r}   rD   r  r   r   r   g      ?z`n_gram` value of z- is not supported by `test_bleu_partial_match皙?   Expected BLEU ≈ z.3f, got N)r0  r   rK   r   rG   r   rW   r>  r   r   r   r(   r@  rh   mathsqrtr  r   rk   )r=   r3  r   r6  r`   ra   rb   rc   r^  p1p2p3p4expected_bleur    r    r$   test_bleu_partial_matcha  s:   

	z&TestBLEUMetric.test_bleu_partial_matchc                 C      |  d}t|dd}tddt| j}tdg}| d}tdg}td	g d
dg|j	_
|j||||d | }|d  dksGJ dS z+Test BLEU calculation with empty predictionr#  r1  r8  r^   r9  r:  r*  r<  rw   r   r   r_   r6  r   Nr`  ra  r    r    r$   test_bleu_empty_prediction     

z)TestBLEUMetric.test_bleu_empty_predictionc                 C   ro  z*Test BLEU calculation with empty referencer#  r1  r8  r^   r9  r:  r   r   rw   r*  r   r_   r6  r   Nr`  ra  r    r    r$   test_bleu_empty_reference      

z(TestBLEUMetric.test_bleu_empty_referencec                 C     |  d}t|ddd}d}t|dt| j}tg d}| d}| d	}| d
}t|j	d |j	d |j	d }	tj
||	tjd}
|d |
dd|j	d f< |d |
dd|j	d f< |d |
dd|j	d f< t|j	d |j	d |j	d g}tdg ddtdg ddtdg ddg|j_|j|||
|d |jddd }d}d}t|| }t| | dk sJ d|dd| ddS z4Test BLEU calculation with multiple samples in batchr#  r1  r  rY  r   r9  )r:  U   r;  r*  z	test casezexample textr^   r   r   Nrw   r   ztest differentzcompletely wrongr_   Fr[  r6  r}   r   rd  re  z.2frf  r0  r   rK   r   rG   r   rW   r>  maxr   zerosr   r   r   r(   r@  rh   rg  rh  r   rk   r=   r   r6  rB  r`   ra   target1target2target3max_lenrb   rc   r^  ri  rj  rm  r    r    r$   test_bleu_multiple_samples  >   



"z)TestBLEUMetric.test_bleu_multiple_samplesc           	      C      |  d}t|d|d}tddt| j}tdg}| d}t|jd g}t	dg d	d
g|j
_|j||||d |jddd }d|   krRdksUJ  J |dkrc| dksaJ dS | dkskJ dS zATest that different n-gram settings produce different BLEU scoresr#  r1  rY  r^   r9  r:  rc  rw   zquick brown fox the jumpsr   r_   Fr[  r6  r   r}   Nr0  r   rK   r   rG   r   rW   r>  r   r   r   r(   r@  rh   rk   	r=   r3  r   r6  r`   ra   rb   rc   r^  r    r    r$   &test_bleu_different_ngram_calculations
  &   

 z5TestBLEUMetric.test_bleu_different_ngram_calculationsc                 C   s  |  d}t|dddd}|  d}t|dddd}d}t|dt| j}tdd	g}| d
}| d}	t|j	d |	j	d }
tj
||
tjd}|d |dd|j	d f< |	d |dd|	j	d f< t|j	d |	j	d g}t }tdi|_||_t }tdi|_||_||g}d}d}tdg |dtdg |dg|j_tdg |dtdg |dg|j_|j||||d |jddd }|j|||||d |jddd }td}t| |  dksJ dt| | dk sJ d|dd| ddS ) zFTest BLEU calculation with multiple tokenizers for different languagesr#  r1  Fr  )r   r2  rU  r3  Tr9  r:  rx  r*  u   你好世界r^   r   r   NrP  r=  u   你好朋友rw   r   r_   r[  r6  rQ  g      ?g{Gz?z7Multi-tokenization should produce different BLEU scoresrd  z'Multi-tokenization BLEU should be near z.4frf  )r0  r   rK   r   rG   r   rW   r>  rz  r   r{  r   r   r   r  r  r   r   r(   r@  rh   rg  rh  r   rk   )r=   decoding_singlebleu_singledecoding_multi
bleu_multirB  r`   ra   english_targetchinese_targetr  rb   rc   rR  rS  r  english_predictionchinese_predictionresult_singleresult_multiexpected_multi_bleur    r    r$   test_bleu_multi_tokenization/  sj   






z+TestBLEUMetric.test_bleu_multi_tokenizationc                 C   ro  rp  r`  ra  r    r    r$   rq    rr  c                 C   ro  rs  r`  ra  r    r    r$   rt    ru  c                 C   rv  rw  ry  r|  r    r    r$   r    r  c           	      C   r  r  r  r  r    r    r$   r    r  N)r#  )!r	  r
  r  r   r  r  r  r   r1   r0   r0  r  rK   rL   r>  r  r  r  r  r7  rC  rH  rN  rT  rX  r_  rb  rn  rq  rt  r  r  r  r    r    r    r$   r!  ^  s`    


!
&


3


?#
`


?r!  c                   @   sV   e Zd ZdZdgeej dg g d Zdd Ze	j
jdd Ze	j
jd	d
 ZdS )TestBLEUEdgeCasesz)Test BLEU edge cases and error conditionsr3   r4   r"  c                    s(   t td} fdd|_t g d|_|S )zCreate minimal mock decodingr$  c                    r%  )Nr   c                    r   r    r9   r   r<   r    r$   r%   2  r   zLTestBLEUEdgeCases.create_mock_decoding.<locals>.<lambda>.<locals>.<listcomp>r   r'  r<   r    r$   r(  2  r)  z8TestBLEUEdgeCases.create_mock_decoding.<locals>.<lambda>r'   )r   r   r   r   )r=   r   r    r<   r$   r0  /  s   
z&TestBLEUEdgeCases.create_mock_decodingc                 C   s   |   }tdg ddg|j_t|d}tddt| j}t	dg}t
dt| jd	}t	d
g}|j||||d |jdd}|d  dksLJ dS )zTest BLEU with empty hypothesesrw   r   r   r   r^   r9  r:  r   r^   rD  rI  r_   Fr[  r6  r   N)r0  r   r   r(   r   rK   r   rG   r   rW   r   r@  rh   rk   ra  r    r    r$   test_bleu_empty_hypotheses6  s    
z,TestBLEUEdgeCases.test_bleu_empty_hypothesesc                 C   s|   |   }t|d}tddt| j}tdg}tdt| jd}tdg}tdg dd	g|j	_
|j||||d
 dS )z"Test BLEU with zero-length targetsr  r^   r9  r:  r   r  rw   rL  r   r_   N)r0  r   rK   r   rG   r   rW   r   r   r   r(   r@  rG  r    r    r$   test_bleu_zero_length_targetsS  s   

z/TestBLEUEdgeCases.test_bleu_zero_length_targetsN)r	  r
  r  r   r  r  r  r   r0  r  r  r  r  r  r    r    r    r$   r  *  s    
r  c                   @   sX   e Zd ZdZdd Zejjdd Zejjdd Z	ejjdd	 Z
ejjd
d ZdS )&TestMultiTaskMetricConstraintFunctionsz4Test the constraint parsing and evaluation functionsc                 C      t  | _dS zSet up test fixturesNr   parserr<   r    r    r$   setUpl     z,TestMultiTaskMetricConstraintFunctions.setUpc                 C   sn   t  }ddd}|tjdd|}|du sJ |tjdd|}|du s&J |tjdd	|}|du s5J d
S )z-Test static constraint with equality operator
transcribeentasklangr  T	translateFmissingvalueN)r   _static_constraintoperatoreqr=   r  
propertiesr^  r    r    r$   test_static_constraint_equalityp  s   
zFTestMultiTaskMetricConstraintFunctions.test_static_constraint_equalityc                 C   sP   t  }ddd}|tjdd|}|du sJ |tjdd|}|du s&J dS )	z/Test static constraint with inequality operatorr  r  r  r  r  TFN)r   r  r  ner  r    r    r$   !test_static_constraint_inequality  s   
zHTestMultiTaskMetricConstraintFunctions.test_static_constraint_inequalityc                 C   sp   t  }dddd}|tjdd|}|du sJ |tjdd|}|du s'J |tjdd	|}|du s6J d
S )zTest comparing two propertiesr  	different)source_langtarget_langotherr  r  Tr  Fr  N)r   _compare_constraintr  r  r  r    r    r$   test_compare_constraint  s   z>TestMultiTaskMetricConstraintFunctions.test_compare_constraintc                 C   s   t  }ddd}dd }dd }||||}|du sJ ||||}|du s*J ||||}|du s7J |||}|du sCJ |||}|du sOJ d	S )
z$Test logical AND, OR, NOT operationsr  r  r  c                 S      |  ddkS )Nr  r  getpr    r    r$   r(        zPTestMultiTaskMetricConstraintFunctions.test_logical_operations.<locals>.<lambda>c                 S   r  )Nr  r  r  r  r    r    r$   r(    r  FTN)r   _logical_and_logical_or_logical_not)r=   r  r  true_constraintfalse_constraintr^  r    r    r$   test_logical_operations  s   
z>TestMultiTaskMetricConstraintFunctions.test_logical_operationsN)r	  r
  r  r   r  r  r  r  r  r  r  r  r    r    r    r$   r  i  s    


r  c                   @   s   e Zd ZdZdd Zejjdd Zejjdd Z	ejjdd	 Z
ejjd
d Zejjdd Zejjdd Zejjdd Zejjdd Zejjdd Zejjdd Zejjdd Zejjdd ZdS )$TestMultiTaskMetricConstraintParsingz0Test the constraint string parsing functionalityc                 C   r  r  r  r<   r    r    r$   r    r  z*TestMultiTaskMetricConstraintParsing.setUpc                 C   D   t  }|d}ddi}||du sJ ddi}||du s J dS )z(Test parsing simple equality constraints.task==transcriber  r  Tr  FNr   parse_constraintr=   r  constraint_fnr  r    r    r$   test_simple_equality_constraint     
zDTestMultiTaskMetricConstraintParsing.test_simple_equality_constraintc                 C   r  )z*Test parsing simple inequality constraintsz.task!=translater  r  Tr  FNr  r  r    r    r$   !test_simple_inequality_constraint  r  zFTestMultiTaskMetricConstraintParsing.test_simple_inequality_constraintc                 C   sH   t  }|d}ddd}||du sJ ddd}||du s"J dS )z-Test parsing property-to-property comparisonsz.source_lang==.target_langr  )r  r  TdeFNr  r  r    r    r$   #test_property_comparison_constraint  s   


zHTestMultiTaskMetricConstraintParsing.test_property_comparison_constraintc                 C   b   t  }|d}ddd}||du sJ ddd}||du s"J ddd}||du s/J d	S )
zTest parsing AND constraintsz.task==transcribe and .lang==enr  r  r  Tr  Fr  Nr  r  r    r    r$   test_and_constraint     



z8TestMultiTaskMetricConstraintParsing.test_and_constraintc                 C   s\   t  }|d}ddi}||du sJ ddi}||du s J ddi}||du s,J dS )	zTest parsing OR constraintsz%.task==transcribe or .task==translater  r  Tr  r  FNr  r  r    r    r$   test_or_constraint  s   
z7TestMultiTaskMetricConstraintParsing.test_or_constraintc                 C   r  )zTest parsing NOT constraintsznot .task==translater  r  Tr  FNr  r  r    r    r$   test_not_constraint  r  z8TestMultiTaskMetricConstraintParsing.test_not_constraintc                 C   sh   t  }|d}dddd}||du sJ dddd}||du s$J dddd}||du s2J d	S )
z'Test parsing complex nested constraintsz0.task==transcribe and .source_lang==.target_langr  r  )r  r  r  Tr  Fr  Nr  r  r    r    r$   test_complex_constraint  s   
z<TestMultiTaskMetricConstraintParsing.test_complex_constraintc                 C   s|   t  }|d}ddd}||du sJ ddd}||du s"J ddd}||du s/J d	dd}||du s<J d
S )z)Test parsing constraints with parenthesesz5(.task==transcribe or .task==translate) and .lang==enr  r  r  Tr  r  Fr  Nr  r  r    r    r$   test_parentheses_constraint!  s   




z@TestMultiTaskMetricConstraintParsing.test_parentheses_constraintc                 C   s|   t  }|d}ddd}||du sJ ddd}||du s"J ddd}||du s/J d	dd}||du s<J d
S )z0Test parsing constraints with nested parentheseszD(.task==transcribe and (.lang==en or .lang==de)) or .task==translater  r  r  Tr  frFr  Nr  r  r    r    r$   "test_nested_parentheses_constraint4  s   




zGTestMultiTaskMetricConstraintParsing.test_nested_parentheses_constraintc                 C   r  )
z:Test parsing constraints with parentheses and NOT operatorz%not (.task==transcribe and .lang==en)r  r  r  Fr  Tr  Nr  r  r    r    r$   $test_parentheses_with_not_constraintF  r  zITestMultiTaskMetricConstraintParsing.test_parentheses_with_not_constraintc                 C   s   t  }|d}ddddd}||du sJ dddd	d}||du s&J ddddd}||d
u s5J ddddd}||d
u sDJ dS )z:Test parsing complex constraints with multiple parenthesesz\(.task==transcribe or .task==translate) and (.source_lang!=.target_lang or .domain==special)r  r  r  general)r  r  r  domainTr  specialFr  Nr  r  r    r    r$   #test_complex_parentheses_constraintU  s   zHTestMultiTaskMetricConstraintParsing.test_complex_parentheses_constraintc                 C   s@   t  }tt |d W d   dS 1 sw   Y  dS )z*Test that invalid constraints raise errorszinvalid constraint formatN)r   r  r  SyntaxErrorr  r=   r  r    r    r$   test_invalid_constrainti  s   "z<TestMultiTaskMetricConstraintParsing.test_invalid_constraintN)r	  r
  r  r   r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r    r    r    r$   r    s6    










r  c                   @   sH   e Zd ZdZdd Zejjdd Zejjdd Z	ejjdd	 Z
d
S )TestMultiTaskMetricCutSplittingz$Test the cut splitting functionalityc                 C   s   t  }||_||_|S )z+Helper to create mock cuts with custom data)r   r  r  )r=   custom_datar  r    r    r$   create_mock_cutt  s   z/TestMultiTaskMetricCutSplitting.create_mock_cutc           
      C   s   |  ddd|  ddd|  dddg}t }t |_tdddd	d
dd	di}td3}t }t }||g|_t||}||\}}	|	d ddgksSJ |	d dgks\J W d   dS 1 sgw   Y  dS )zTest basic cut splittingr  r  r  r  r  metricsr   nemo.collections.asr.metrics.WER
constraint_target_.task==translate!nemo.collections.asr.metrics.BLEUrl   r6  Gnemo.collections.asr.metrics.multitask.MultiTaskMetric.from_config_dictrl   r   r  r6  r^   N)r  r   r   r   r   side_effectr   _split_cuts)
r=   r  
mock_modelcfgmock_from_configmock_wer	mock_bleumultitask_metric
cuts_split	idx_splitr    r    r$   test_split_cuts_simple|  s4   


"z6TestMultiTaskMetricCutSplitting.test_split_cuts_simplec           	      C   s   |  dddg}t }t |_tdddddii}td	#}t }||_t||}||\}}|d g ks:J W d
   d
S 1 sEw   Y  d
S )z(Test cut splitting with no matching cutsr  r  r  r  rl   r  r  r  r  N)r  r   r   r   r   r(   r   r  	r=   r  r  r  r  r  r  r  r  r    r    r$   test_split_cuts_no_matches  s(   

"z:TestMultiTaskMetricCutSplitting.test_split_cuts_no_matchesc           	      C   s   g }t  }t  |_tdddddii}td#}t  }||_t||}||\}}|d g ks3J W d   dS 1 s>w   Y  dS )z#Test cut splitting with empty inputr  rl   r  r  r  r  N)r   r   r   r   r(   r   r  r  r    r    r$   test_split_cuts_empty_input  s&   

"z;TestMultiTaskMetricCutSplitting.test_split_cuts_empty_inputN)r	  r
  r  r   r  r  r  r  r  r  r  r    r    r    r$   r  q  s    
+
r  c                   @   s>   e Zd ZdZejdd Zejjdd Z	ejjdd Z
dS )	TestMultiTaskMetricUpdatez$Test the metric update functionalityc                 C   s   t  }t  |_tddddddddi}td }t  }t  }||g|_t||}||_||_|W  d	   S 1 s=w   Y  d	S )
z1Create a MultiTaskMetric with mocked dependenciesr  r  r  r  r  r  r  r  N)r   r   r   r   r  r   	_mock_wer
_mock_bleur=   r  r  r  r  r  r  r    r    r$   mock_multitask_metric  s,   


$z/TestMultiTaskMetricUpdate.mock_multitask_metricc              	   C   s   t  t  g}ddi|d _|d |d _ddi|d _|d |d _d}t|dd}td	d
g}tj|dtjd}tdd|df}tddg}tdd|df}	|j	||||||	|d |j
j	  |j
j	j}
|
jd jd dkswJ |jj	  |jj	j}|jd jd dksJ dS )z,Test update with cuts that match constraintsr  r  r   r  r^   r  r9     r:  r;  r     rD  rI  rJ  <   r`   ra   predictions_maskrb   rc   	input_idsr  r`   Nr   r  r  rK   r   rW   onesr  r   r@  r  rA  	call_argskwargsr   r  r=   r  r  rB  r`   ra   r  rb   rc   r  wer_call_argsbleu_call_argsr    r    r$   test_update_with_matching_cuts  s6   

z8TestMultiTaskMetricUpdate.test_update_with_matching_cutsc              	   C   s   t  g}ddi|d _|d |d _d}t|dd}tdg}tj|dtjd}tdd	|d
f}tdg}tdd	|df}	|j	||||||	|d |j
j	  |j
j	j}
|
jd jd dkseJ |jj	  |jj	j}|jd jd dks|J dS )z5Test update when no cuts match a metric's constraintsr  r  r   r^   r9  r  r:  r   r   rD  rI  r  r  r`   Nr  r	  r    r    r$   test_update_with_empty_indices2  s2   

z8TestMultiTaskMetricUpdate.test_update_with_empty_indicesN)r	  r
  r  r   r  fixturer  r  r  r  r  r    r    r    r$   r    s    
!
&r  c                   @   @   e Zd ZdZejjdd Zejjdd Zejjdd Z	dS )	TestMultiTaskMetricComputez%Test the metric compute functionalityc                 C   s   t  }t  |_tdddddii}td/}t  }ddd	d
|j_||_t||}|jddd}ddd	d
}||ks=J W d   dS 1 sHw   Y  dS )zTest compute for WER metricr  rl   r  r  r  r  rd  g      $@g      Y@)val_werval_wer_numval_wer_denomTval_)r\  prefixNr   r   r   r   rh   r(   r   )r=   r  r  r  r  r  r^  expectedr    r    r$   test_compute_wer_metricZ  s.   


"z2TestMultiTaskMetricCompute.test_compute_wer_metricc                 C   s   t  }t  |_tdddddii}td*}t  }ddd	|j_||_t||}|jd
d}|ddd	ks8J W d   dS 1 sCw   Y  dS )zTest compute for BLEU metricr  r6  r  r  r  r  g     9@g     @@)r6  bleu_numtest_)r  Nr  )r=   r  r  r  r  r  r^  r    r    r$   test_compute_bleu_metric{  s&   

"z3TestMultiTaskMetricCompute.test_compute_bleu_metricc                 C   s   t  }t  |_tddddddddi}td'}t  }t  }||g|_t||}|  |j  |j  W d	   d	S 1 sDw   Y  d	S )
z(Test that reset is called on all metricsr  r  r  r  r  r  r  r  N)r   r   r   r   r  r   resetrA  r  r    r    r$   test_reset_metrics  s,   



"z-TestMultiTaskMetricCompute.test_reset_metricsN)
r	  r
  r  r   r  r  r  r  r  r  r    r    r    r$   r  W  s    
 
r  c                   @   r  )	TestMultiTaskMetricEdgeCasesz$Test edge cases and error conditionsc           	      C   s   t  }t  |_tdddddii}td-}t  }||_t||}t  }i |_||_||g\}}|d g ks;J W d   dS 1 sFw   Y  dS )z.Test handling of cuts without custom attributer  rl   r  r  r  r  N)	r   r   r   r   r(   r   r  r  r  )	r=   r  r  r  r  r  r  r  r  r    r    r$   test_missing_custom_attribute  s*   

"z:TestMultiTaskMetricEdgeCases.test_missing_custom_attributec                 C   sJ   t  }|d}|i }|du sJ |d}|ddi}|du s#J dS )z(Test complex constraints with edge casesz.missing_prop==valueFz.prop==valuepropNr  )r=   r  r  r^  r    r    r$   "test_complex_constraint_edge_cases  s   

z?TestMultiTaskMetricEdgeCases.test_complex_constraint_edge_casesc                 C   sN   t  }d|jv s
J d|jv sJ |jd tjksJ |jd tjks%J dS )z,Test that all operators are properly definedz==z!=N)r   
primitivesr  r  r  r  r    r    r$   test_operators_coverage  s
   z4TestMultiTaskMetricEdgeCases.test_operators_coverageN)
r	  r
  r  r   r  r  r  r  r!  r#  r    r    r    r$   r    s    
 
r  );r,   rg  r  r   r  r   r   typingr   unittest.mockr   r   r  rK   	omegaconfr   !nemo.collections.asr.metrics.bleur   r   r	   r
   &nemo.collections.asr.metrics.multitaskr   r    nemo.collections.asr.metrics.werr   r   r   r   2nemo.collections.asr.parts.submodules.ctc_decodingr   r   r   r   r   8nemo.collections.asr.parts.submodules.multitask_decodingr   3nemo.collections.asr.parts.submodules.rnnt_decodingr   r   r   +nemo.collections.asr.parts.utils.rnnt_utilsr   "nemo.collections.common.tokenizersr   nemo.utils.config_utilsr   r  r1   r2   r  r!  r  r  r  r  r  r  r  r    r    r    r$   <module>   sR       "     Q?T 5uqa