o
    ‚Ð¢i!  ã                   @   sÊ   d Z ddlZddlm  mZ ddlZddlm	Z	m
Z
mZ G dd„ dƒZG dd„ dƒZG dd	„ d	ƒZG d
d„ dƒZG dd„ dƒZG dd„ dƒZG dd„ dƒZG dd„ dƒZG dd„ dƒZG dd„ dƒZdS )zTests for the Tier 1 validator.é    N)Úvalidate_transcriptionÚvalidate_batchÚValidationResultc                   @   ó$   e Zd Zdd„ Zdd„ Zdd„ ZdS )ÚTestEmptyAndNoSpeechc           	      C   sL  t ddddœddƒ}|j}|s0ddt ¡ v st |¡r t |¡ndt |¡dœ }tt |¡ƒ‚d }|j	}d	}||k}|sqt 
d
|fd||f¡dt ¡ v sRt |¡rWt |¡ndt |¡t |¡dœ }dd|i }tt |¡ƒ‚d  } }}|j}| }|s ddt ¡ v s‹t |¡rt |¡ndt |¡dœ }tt |¡ƒ‚d  }}d S )NÚseg1Ú )ÚtranscriptionÚtaggedÚteç      @ú,assert %(py2)s
{%(py2)s = %(py0)s.is_empty
}Úresult©Úpy0Úpy2g        ©ú==)z5%(py2)s
{%(py2)s = %(py0)s.quality_score
} == %(py5)s©r   r   Úpy5úassert %(py7)sÚpy7ú4assert not %(py2)s
{%(py2)s = %(py0)s.asr_eligible
})r   Úis_emptyÚ@py_builtinsÚlocalsÚ
@pytest_arÚ_should_repr_global_nameÚ	_safereprÚAssertionErrorÚ_format_explanationÚquality_scoreÚ_call_reprcompareÚasr_eligible)	Úselfr   Ú@py_assert1Ú@py_format3Ú@py_assert4Ú@py_assert3Ú@py_format6Ú@py_format8Ú@py_format4© r,   ú0/home/ubuntu/transcripts/tests/test_validator.pyÚtest_empty_transcription   s   PŠ^z-TestEmptyAndNoSpeech.test_empty_transcriptionc                 C   sÈ   ddddœ}t d|ddƒ}|j}|s3ddt ¡ v st |¡r#t |¡ndt |¡dœ }tt |¡ƒ‚d }|j	}| }|s^d	dt ¡ v sIt |¡rNt |¡ndt |¡dœ }tt |¡ƒ‚d  }}d S )
Nz[NO_SPEECH]r   ©r	   r
   Údetected_languager   r   z0assert %(py2)s
{%(py2)s = %(py0)s.is_no_speech
}r   r   r   )
r   Úis_no_speechr   r   r   r   r   r   r    r#   )r$   Údatar   r%   r&   r(   r+   r,   r,   r-   Útest_no_speech   s   P^z#TestEmptyAndNoSpeech.test_no_speechc                 C   sb   t di ddƒ}|j}|s-ddt ¡ v st |¡rt |¡ndt |¡dœ }tt |¡ƒ‚d }d S )Nr   r   r   r   r   r   )	r   r   r   r   r   r   r   r   r    )r$   r   r%   r&   r,   r,   r-   Útest_none_transcription   s   Tz,TestEmptyAndNoSpeech.test_none_transcriptionN)Ú__name__Ú
__module__Ú__qualname__r.   r3   r4   r,   r,   r,   r-   r      s    r   c                   @   ó   e Zd Zdd„ Zdd„ ZdS )ÚTestLengthRatioc                 C   sr   d}||ddœ}t d|ddƒ}|j}|s5ddt ¡ v s t |¡r%t |¡ndt |¡dœ }tt |¡ƒ‚d }d S )	Nz8hello world this is a test transcription with some wordsÚenr/   r   r   z3assert %(py2)s
{%(py2)s = %(py0)s.length_ratio_ok
}r   r   ©	r   Úlength_ratio_okr   r   r   r   r   r   r    )r$   Útextr2   r   r%   r&   r,   r,   r-   Útest_normal_length   s   Tz"TestLengthRatio.test_normal_lengthc                 C   s|   d}||ddœ}t d|ddƒ}|j}| }|s8ddt ¡ v s#t |¡r(t |¡ndt |¡dœ }tt |¡ƒ‚d  }}d S )	NÁô  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaar:   r/   r   g       @z7assert not %(py2)s
{%(py2)s = %(py0)s.length_ratio_ok
}r   r   r;   )r$   r=   r2   r   r%   r(   r+   r,   r,   r-   Útest_suspiciously_long   s   ^z&TestLengthRatio.test_suspiciously_longN)r5   r6   r7   r>   r@   r,   r,   r,   r-   r9      ó    r9   c                   @   r8   )ÚTestLanguageMismatchc                 C   óx   ddddœ}t d|ddƒ}|j}| }|s6ddt ¡ v s!t |¡r&t |¡ndt |¡dœ }tt |¡ƒ‚d  }}d S )	NÚtestr   r/   r   r   z5assert not %(py2)s
{%(py2)s = %(py0)s.lang_mismatch
}r   r   )	r   Úlang_mismatchr   r   r   r   r   r   r    ©r$   r2   r   r%   r(   r+   r,   r,   r-   Útest_matching_language'   ó   ^z+TestLanguageMismatch.test_matching_languagec                 C   sÜ   ddddœ}t d|ddƒ}|j}|s3ddt ¡ v st |¡r#t |¡ndt |¡d	œ }tt |¡ƒ‚d }d
d„ |j	D ƒ}t
|ƒ}|shddt ¡ v sOt t
¡rTt t
¡ndt |¡t |¡dœ }tt |¡ƒ‚d  }}d S )NrD   Úhir/   r   r   r   z1assert %(py2)s
{%(py2)s = %(py0)s.lang_mismatch
}r   r   c                 s   s    | ]}d |v V  qdS )rE   Nr,   )Ú.0Úfr,   r,   r-   Ú	<genexpr>0   s   € z@TestLanguageMismatch.test_mismatched_language.<locals>.<genexpr>z,assert %(py4)s
{%(py4)s = %(py0)s(%(py2)s)
}Úany)r   r   Úpy4)r   rE   r   r   r   r   r   r   r    ÚflagsrM   )r$   r2   r   r%   r&   r(   Ú@py_format5r,   r,   r-   Útest_mismatched_language,   s   Prz-TestLanguageMismatch.test_mismatched_languageN)r5   r6   r7   rG   rQ   r,   r,   r,   r-   rB   &   ó    rB   c                   @   r8   )ÚTestTagConsistencyc                 C   sv   d}d}||ddœ}t d|ddƒ}|j}|s7ddt ¡ v s"t |¡r't |¡ndt |¡d	œ }tt |¡ƒ‚d }d S )
Núhello worldz[laugh] hello worldr:   r/   r   r   z6assert %(py2)s
{%(py2)s = %(py0)s.tag_consistency_ok
}r   r   ©	r   Útag_consistency_okr   r   r   r   r   r   r    )r$   Útransr
   r2   r   r%   r&   r,   r,   r-   Útest_consistent_tags4   s
   Tz'TestTagConsistency.test_consistent_tagsc                 C   s€   d}d}||ddœ}t d|ddƒ}|j}| }|s:ddt ¡ v s%t |¡r*t |¡ndt |¡d	œ }tt |¡ƒ‚d  }}d S )
NrT   zcompletely different textr:   r/   r   r   z:assert not %(py2)s
{%(py2)s = %(py0)s.tag_consistency_ok
}r   r   rU   )r$   rW   r
   r2   r   r%   r(   r+   r,   r,   r-   Útest_inconsistent_tags;   s
   ^z)TestTagConsistency.test_inconsistent_tagsN)r5   r6   r7   rX   rY   r,   r,   r,   r-   rS   3   s    rS   c                   @   r8   )ÚTestUNKDensityc                 C   s2  ddddœ}t d|ddƒ}|j}d}||k}|sLt d|fd||f¡d	t ¡ v s-t |¡r2t |¡nd	t |¡t |¡d
œ }dd|i }tt 	|¡ƒ‚d  } }}|j
}d}||k}|s‘t d|fd||f¡d	t ¡ v srt |¡rwt |¡nd	t |¡t |¡d
œ }dd|i }tt 	|¡ƒ‚d  } }}d S )Nzclean text herer:   r/   r   r   r   r   ©z/%(py2)s
{%(py2)s = %(py0)s.num_unk
} == %(py5)sr   r   r   r   )z5%(py2)s
{%(py2)s = %(py0)s.num_inaudible
} == %(py5)s)r   Únum_unkr   r"   r   r   r   r   r   r    Únum_inaudible©r$   r2   r   r%   r'   r(   r)   r*   r,   r,   r-   Útest_no_unkD   s   ŠŽzTestUNKDensity.test_no_unkc                 C   s¨   ddddœ}t d|ddƒ}|j}d}||k}|sLt d|fd||f¡d	t ¡ v s-t |¡r2t |¡nd	t |¡t |¡d
œ }dd|i }tt 	|¡ƒ‚d  } }}d S )Nz[UNK] [UNK] [UNK] word [UNK]r:   r/   r   r   é   r   r[   r   r   r   r   )
r   r\   r   r"   r   r   r   r   r   r    r^   r,   r,   r-   Útest_high_unkJ   ó   ŽzTestUNKDensity.test_high_unkN)r5   r6   r7   r_   ra   r,   r,   r,   r-   rZ   C   rA   rZ   c                   @   ó   e Zd Zdd„ ZdS )ÚTestEventTagCountingc           	      C   s¬   d}d|ddœ}t d|ddƒ}|j}d}||k}|sNt d|fd	||f¡d
t ¡ v s/t |¡r4t |¡nd
t |¡t |¡dœ }dd|i }tt 	|¡ƒ‚d  } }}d S )Nz#[laugh] hello [noise] world [cough]rT   r:   r/   r   r   é   r   )z6%(py2)s
{%(py2)s = %(py0)s.num_event_tags
} == %(py5)sr   r   r   r   )
r   Únum_event_tagsr   r"   r   r   r   r   r   r    )	r$   r
   r2   r   r%   r'   r(   r)   r*   r,   r,   r-   Útest_counts_event_tagsQ   s   Žz+TestEventTagCounting.test_counts_event_tagsN)r5   r6   r7   rg   r,   r,   r,   r-   rd   P   ó    rd   c                   @   r8   )ÚTestQualityScorec                 C   s¨   ddddœ}t d|ddƒ}|j}d}||k}|sLt d|fd||f¡d	t ¡ v s-t |¡r2t |¡nd	t |¡t |¡d
œ }dd|i }tt 	|¡ƒ‚d  } }}d S )Nz
clean textr:   r/   r   r   gÍÌÌÌÌÌì?)ú>=)z5%(py2)s
{%(py2)s = %(py0)s.quality_score
} >= %(py5)sr   r   r   r   ©
r   r!   r   r"   r   r   r   r   r   r    r^   r,   r,   r-   Útest_perfect_qualityY   rb   z%TestQualityScore.test_perfect_qualityc                 C   s¨   ddddœ}t d|ddƒ}|j}d}||k }|sLt d	|fd
||f¡dt ¡ v s-t |¡r2t |¡ndt |¡t |¡dœ }dd|i }tt 	|¡ƒ‚d  } }}d S )Nz[UNK] [UNK] xzdifferent textrI   r/   r   r:   r   gš™™™™™é?©ú<)z4%(py2)s
{%(py2)s = %(py0)s.quality_score
} < %(py5)sr   r   r   r   rk   r^   r,   r,   r-   Ú!test_degraded_quality_with_issues^   rb   z2TestQualityScore.test_degraded_quality_with_issuesN)r5   r6   r7   rl   ro   r,   r,   r,   r-   ri   X   rR   ri   c                   @   r   )ÚTestLaneFlagsc                 C   ón   ddddœ}t d|ddƒ}|j}|s3ddt ¡ v st |¡r#t |¡ndt |¡dœ }tt |¡ƒ‚d }d S )	Nz	good textr:   r/   r   r   z0assert %(py2)s
{%(py2)s = %(py0)s.asr_eligible
}r   r   )	r   r#   r   r   r   r   r   r   r    ©r$   r2   r   r%   r&   r,   r,   r-   Útest_asr_eligiblee   ó   TzTestLaneFlags.test_asr_eligiblec                 C   rq   )	Nzgood text herer:   r/   r   r   z6assert %(py2)s
{%(py2)s = %(py0)s.tts_clean_eligible
}r   r   ©	r   Útts_clean_eligibler   r   r   r   r   r   r    rr   r,   r,   r-   Útest_tts_clean_eligiblej   rt   z%TestLaneFlags.test_tts_clean_eligiblec                 C   rC   )	Nz
[UNK] textr:   r/   r   r   z:assert not %(py2)s
{%(py2)s = %(py0)s.tts_clean_eligible
}r   r   ru   rF   r,   r,   r-   Útest_tts_not_eligible_with_unko   rH   z,TestLaneFlags.test_tts_not_eligible_with_unkN)r5   r6   r7   rs   rw   rx   r,   r,   r,   r-   rp   d   s    rp   c                   @   r8   )ÚTestBoundaryScorec           	      C   s´   ddddœ}dddœ}t d|dd|ƒ}|j}d	}||k }|sRt d
|fd||f¡dt ¡ v s3t |¡r8t |¡ndt |¡t |¡dœ }dd|i }tt 	|¡ƒ‚d  } }}d S )Nr=   r:   r/   TF©Úabrupt_startÚ
abrupt_endr   r   g      ð?rm   )z5%(py2)s
{%(py2)s = %(py0)s.boundary_score
} < %(py5)sr   r   r   r   ©
r   Úboundary_scorer   r"   r   r   r   r   r   r    ©	r$   r2   Útrimr   r%   r'   r(   r)   r*   r,   r,   r-   Útest_abrupt_start_penaltyv   ó   
Žz+TestBoundaryScore.test_abrupt_start_penaltyc           	      C   s´   ddddœ}dddœ}t d|dd|ƒ}|j}d}||k}|sRt d	|fd
||f¡dt ¡ v s3t |¡r8t |¡ndt |¡t |¡dœ }dd|i }tt 	|¡ƒ‚d  } }}d S )Nr=   r:   r/   Trz   r   r   g…ëQ¸…ã?)ú<=)z6%(py2)s
{%(py2)s = %(py0)s.boundary_score
} <= %(py5)sr   r   r   r   r}   r   r,   r,   r-   Útest_both_abrupt|   r‚   z"TestBoundaryScore.test_both_abruptN)r5   r6   r7   r   r„   r,   r,   r,   r-   ry   u   rA   ry   c                   @   rc   )ÚTestBatchValidationc                 C   s|  dddddœdœdddddœdœg}t |ddd	d
œi ƒ}t|ƒ}d}||k}|smt d|fd||f¡dt ¡ v s=t t¡rBt t¡nddt ¡ v sNt |¡rSt |¡ndt |¡t |¡dœ }dd|i }tt 	|¡ƒ‚d  } }}|d }|j
}|d }|j
}	||	k}|s²t d|fd||	f¡t |¡t |¡t |¡t |	¡dœ }dd|i }
tt 	|
¡ƒ‚d  } } } }}	d S )NÚs1Úhellor:   r/   )Ú
segment_idÚtranscription_dataÚs2r   r   g      @)r†   rŠ   é   r   )z0%(py3)s
{%(py3)s = %(py0)s(%(py1)s)
} == %(py6)sÚlenÚresults)r   Úpy1Úpy3Úpy6zassert %(py8)sÚpy8r   é   )ú>)zW%(py3)s
{%(py3)s = %(py1)s.quality_score
} > %(py8)s
{%(py8)s = %(py6)s.quality_score
})rŽ   r   r   r‘   zassert %(py10)sÚpy10)r   rŒ   r   r"   r   r   r   r   r   r    r!   )r$   Ú	responsesr   Ú@py_assert2Ú@py_assert5r'   Ú@py_format7Ú@py_format9Ú@py_assert0Ú@py_assert7Ú@py_format11r,   r,   r-   Útest_validate_batch„   s   þ®–z'TestBatchValidation.test_validate_batchN)r5   r6   r7   r   r,   r,   r,   r-   r…   ƒ   rh   r…   )Ú__doc__Úbuiltinsr   Ú_pytest.assertion.rewriteÚ	assertionÚrewriter   ÚpytestÚsrc.validatorr   r   r   r   r9   rB   rS   rZ   rd   ri   rp   ry   r…   r,   r,   r,   r-   Ú<module>   s    "