o
    }oi-                     @   s  d dl Z d dlZd dlmZmZmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ ee	dZd d	lmZ d5dedefddZd6dee dee defddZdedefddZd7dedeeeef  defddZd8dededefddZd9ded edefd!d"Z			#	$		$		$	$		$d:d%ed&ed'ed(ed ed)ed*ed+ed,edee d-edeeeef fd.d/Z			#		$	$		0		$d;d%ed&ed'ed*ed+ed,edee d1ed2ee d-efd3d4ZdS )<    N)OptionalTupleUnion)SacreBLEUScore)
ROUGEScore)word_error_rate_detail)logging)LogMode)bleurouge)
DictConfig .
configreturnc              	   C   st   g }|   D ].\}}|r| | | n|}t|tr*|t||||d| q|| d|  q||S )a]  
    Flatten a DictConfig object into a string of parameter names and their values.

    Args:
        config (DictConfig): The input DictConfig object.
        parent_key (str): The parent key for nested configurations.
        sep (str): Separator between keys.

    Returns:
        str: Flattened string of parameter names and their values.
    )sepjoin=)items
isinstancer   extendflatten_dict_configsplitappendr   )r   
parent_keyr   r   r   kvnew_key r   _/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/asr/parts/utils/eval_utils.pyr   "   s   

r   exclude_keysc                    sL   | sdS d}t | |d|} r fdd|D }ddd |D }|S )z
    Flatten a DictConfig object into a string of hydra overrides for commandline, for example:
    >>> config = OmegaConf.create({"foo": {"bar": 1, "baz": 2}})
    >>> get_hydra_override_from_config(config)
    "++foo.bar=1 ++foo.baz=2"
    r   r   )r   c                    s&   g | ] t  fd dD s qS )c                    s   g | ]}|  d d kqS )r   r   )r   ).0yxr   r    
<listcomp>D   s    z=get_hydra_override_from_config.<locals>.<listcomp>.<listcomp>)any)r"   r!   r$   r    r&   D   s   & z2get_hydra_override_from_config.<locals>.<listcomp> c                 S   s   g | ]}d | qS )z++r   )r"   r%   r   r   r    r&   E   s    )r   r   r   )r   r!   r   	overrides	param_strr   r(   r    get_hydra_override_from_config8   s   r,   textc                 C   s   t dd| }|S )zS
    Remove spaces before punctuations, e.g. "hello , world" -> "hello, world"
    z(\w)\s+([.,;!?])z\1\2)resub)r-   resultr   r   r     strip_spaces_before_punctuationsI   s   r1   punctuationsc                 C   s,   |s	dd dD }|D ]}|  |d} q| S )z+
    Remove punctuations from a string
    c                 S      g | ]}|qS r   r   r"   charr   r   r    r&   V       z'remove_punctuations.<locals>.<listcomp>z!"#$%&()*+,-./:;<=>?@[\]^_`{|}~r   )replace)r-   r2   punctuationr   r   r    remove_punctuationsQ   s
   r9   Ten_strnum_to_wordsc                 C   s   dd dD }dd dD }dd dD }|   } |  } |D ]}| |d} q|D ]}| |d	} q*|D ]}| |d
} q5|rS|dkrKt| dd} ntjdtjd d	| 	 }|S )zY
    Remove unauthorized characters in a string, lower it and remove unneeded spaces
    c                 S   r3   r   r   r4   r   r   r    r&   a   r6   zclean_label.<locals>.<listcomp>u7   /?*",.:=?_{|}~¨«·»¡¿„…‧‹›≪≫!:;ː→c                 S   r3   r   r   r4   r   r   r    r&   b   r6   u$   `¨´‘’“”`ʻ‘’“"‘”c                 S   r3   r   r   r4   r   r   r    r&   c   r6   u   ‘’ʻ‘’‘r   r)   'r:   langidzyCurrently support basic num_to_words in English only. Please use Text Normalization to convert other languages! Skipping!mode)
striplowerr7   convert_num_to_wordsr   warningr	   ONCEr   r   )r;   r<   r?   replace_with_spacereplace_with_blankreplace_with_aposiretr   r   r    clean_label]   s(   rL   r?   c                 C   s   |dkreg d}|   } |  }d}g }|D ]H}| rXt|}|rW|d }|| }	||	 t|d }|sUd}
|ddd }|D ]}|
|d 7 }
qB||
d 7 }|  |s"q||d 7 }q|  }|S tjdtj	d	 |S )
z}
    Convert digits to corresponding words. Note this is a naive approach and could be replaced with text normalization.
    r:   )
zeroonetwothreefourfivesixseveneightniner   
   Nr)   zoCurrently support basic num_to_words in English only. Please use Text Normalization to convert other languages!r@   )
rB   r   isdigitintr   clearr   rE   r	   rF   )r;   r?   r<   wordsout_strnum_wordwordnumdigit
digit_wordnum_streler   r   r    rD   y   s>   
rD   	pred_textFpred_manifestgt_text_attr_namepred_text_attr_nameclean_groundtruth_textuse_ceroutput_filenameignore_capitalizationignore_punctuationstrip_punc_spacec           !      C   s  g }g }g }|r
dnd}t | d}| D ]}| }|sqt|}||vrFd|v r/d}n	td| d dd|f  W  d   S ||  }||  }|rZt||d}|rit||	d	}t||	d	}n
|
rst	|}t	|}|r}|
 }|
 }t|g|g|d
\}}}}}|||< ||d< ||d< ||d< ||d< || || || qW d   n1 sw   Y  t|||d
\}}}}}|s| }n|}t |d}|D ]}t|| |d |  qW d   n1 sw   Y  dt|d|||d|d|d|i} || |fS )a  
    Calculate wer, inserion, deletion and substitution rate based on groundtruth text and pred_text_attr_name (pred_text)
    We use WER in function name as a convention, but Error Rate (ER) currently support Word Error Rate (WER) and Character Error Rate (CER)
    cerwerrr-   ground-truth text attribute z= is not present in manifest! Cannot calculate WER. Returning!Nr>   r2   )
hypotheses
referencesrj   tokensins_ratedel_ratesub_ratewr   samples)open	readlinesrB   jsonloadsr   inforL   r9   r1   rC   r   r   dumpwriteflushlen)!rf   rg   rh   ri   r?   rj   rk   rl   rm   r2   rn   r{   hypsrefseval_metricfplinesamplehyprefrp   rv   rw   rx   ry   	total_wertotal_tokenstotal_ins_ratetotal_del_ratetotal_sub_rateoutput_manifest_w_werfout	total_resr   r   r    cal_write_wer   s   




.


r   r
   metricmetric_argsc
              	   C   s6  g }
g }g }|t vrtd| dt   |r!t | di |nt |  }t| d}| D ]}| }|s9q0t|}||vrcd|v rId}nt	d| d| d d d |f  W  d    S ||  }||  }|r~t
||d}t
||d}n
|	rt|}t|}|r| }| }|d	kr||g|gg }n||| }|||< |
| || || q0W d    n1 sw   Y  |d	krd
d |D }||| }|s| }n|}t|d}|
D ]}t|| |d |  qW d    n	1 s	w   Y  dt|
||i}|||fS )Nzmetric z& is not supported! Please choose from rq   r-   rr   z. is not present in manifest! Cannot calculate z. Returning!rs   r
   c                 S   s   g | ]}|gqS r   r   )r"   r   r   r   r    r&   8  s    z)cal_write_text_metric.<locals>.<listcomp>rz   r   r{   r   )TEXT_METRICS_MAPPING
ValueErrorkeysr|   r}   rB   r~   r   r   r   r9   r1   rC   itemr   r   r   r   r   )rf   rg   rh   rk   rl   rm   r2   r   r   rn   r{   r   r   metric_calculatorr   r   r   r   r   scoretotal_scorer   r   r   r   r   r    cal_write_text_metric   sr    


(


r   )r   r   r   )NN)N)Tr:   )r:   )Nr-   re   Fr:   FNFFNF)
Nr-   re   NFFNr
   NF)r~   r.   typingr   r   r   torchmetrics.textr   torchmetrics.text.rouger    nemo.collections.asr.metrics.werr   
nemo.utilsr   nemo.utils.nemo_loggingr	   r   	omegaconfr   strr   listr,   r1   r9   boolrL   rD   dictr   r   r   r   r   r    <module>   s    $%	

_	
