o
    7wi                     @   s   d Z ddlZddlmZ ejfddZejddddfd	d
ZejfddZejfddZddejfddZ	ddejfddZ
ejfddZdS )zWER print functions.

The functions here are used to print the computed statistics
with human-readable formatting.
They have a file argument, but you can also just use
contextlib.redirect_stdout, which may give a nicer syntax.

Authors
 * Aku Rouhe 2020
    N)edit_distancec                 C   sn   t djdi | |dd t | d | d k rdnd|d t djdi | |d t d	jdi | |d d
S )ar  Prints out WER summary details in human-readable format.

    This function essentially mirrors the Kaldi compute-wer output format.

    Arguments
    ---------
    wer_details : dict
        Dict of wer summary details,
        see ``speechbrain.utils.edit_distance.wer_summary``
        for format.
    file : stream
        Where to write. (default: sys.stdout)
    zl%WER {WER:.2f} [ {num_edits} / {num_scored_tokens}, {insertions} ins, {deletions} del, {substitutions} sub ] )fileendnum_scored_sentsnum_ref_sentsz
 [PARTIAL]r   z=%SER {SER:.2f} [ {num_erroneous_sents} / {num_scored_sents} ]zKScored {num_scored_sents} sentences, {num_absent_sents} not present in hyp.N printformatwer_detailsr   r	   r	   S/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/speechbrain/dataio/wer.pyprint_wer_summary   s2   
r   z<eps>z ; Tc              	   C   sh   |r	t |||d | D ]&}|d r1|rt||d t|d |d |d |||d |r1t||d qdS )a  Print WER summary and alignments.

    Arguments
    ---------
    details_by_utterance : list
        List of wer details by utterance,
        see ``speechbrain.utils.edit_distance.wer_details_by_utterance``
        for format. Has to have alignments included.
    file : stream
        Where to write. (default: sys.stdout)
    empty_symbol : str
        Symbol to use when aligning to nothing.
    separator : str
        String that separates each token in the output. Note the spaces in the
        default.
    print_header: bool
        Whether to print headers
    sample_separator: str
        A separator to put between samples (optional)
    r   empty_symbol	separatorscoredr   	alignment
ref_tokens
hyp_tokensN)_print_alignments_global_header_print_alignment_header_print_alignmentr   )details_by_utterancer   r   r   print_headersample_separatordetsr	   r	   r   print_alignments<   s(   r   c                 C   s   t d|d t d|d | r&t d|d | D ]}t djd	i ||d qnt d|d |rGt d|d |D ]}t djd	i ||d q6d S t d|d d S )
NP================================================================================r   zUTTERANCES WITH HIGHEST WERzANon-empty hypotheses -- utterances for which output was produced:z{key} %WER {WER:.2f}z(No utterances which had produced output!z@Empty hypotheses -- utterances for which no output was produced:z,No utterances which had not produced output!r	   r
   )top_non_empty	top_emptyr   r   r	   r	   r   _print_top_wer_uttsn   s(   r#   c                 C   s>   t d|d t d|d | D ]}t djdi ||d qd S )Nr    r   zSPEAKERS WITH HIGHEST WERz{speaker} %WER {WER:.2f}r	   r
   )spks_by_werr   r   r	   r	   r   _print_top_wer_spks   s
   r%   c                 C   s   g }g }g }| D ]E\}	}
}t |	}|
d urt ||
 n|}|d ur't || n|}tt|t|t|}||| ||| ||| qt|||d t|||d t|||d d S )Nr   )strmaxlenappendcenterr   join)r   abr   r   r   a_paddedb_padded
ops_paddedopij	op_stringa_stringb_string
pad_lengthr	   r	   r   r      s   r   c                 C   s   t d|d t d|d t d|d t d|d t d|d g d}g d}tjd	 d d
ftjd d
dftjd ddftjd ddftjd ddftjd dd fg}t||||| |d d S )Nr    r   
ALIGNMENTSr   zFormat:z<utterance-id>, WER DETAILS)	referenceonthefirstline)and
hypothesisr:   r;   thirdinsr   sub   eq         delr   )r   r   EDIT_SYMBOLSr   )r   r   r   r,   r-   r   r	   r	   r   r      s,   
r   c                 C   s(   t d|d t djdi | |d d S )Nr    r   zp{key}, %WER {WER:.2f} [ {num_edits} / {num_ref_tokens}, {insertions} ins, {deletions} del, {substitutions} sub ]r	   r
   r   r	   r	   r   r      s   
r   )__doc__sysspeechbrain.utilsr   stdoutr   r   r#   r%   r   r   r   r	   r	   r	   r   <module>   s"    -
2

