o
    si@                     @   s   d dl Z d dlZd dlZd dlZd dlmZ d dlmZ d dlZ	d dl
Zd dlmZmZ d dlmZ ddlmZ g dZ				
			dddZG dd dZG dd dZG dd dZG dd dejZdS )    N)List)Counter)InputMetricsOutputMetrics   )average_arrays_in_dic)si_sdrsdrsirsarstoipesq>  allTFc	                 C   s  |dkrt }t|tr|g}t| |d|d}	t||d||d}
i }|	df|
dffD ]P\}}|D ]I}|| }z|| ||< W q/ tyx } z/|ratd| d|pPd	 d
| t t	
  d||< ntd| d|pid	 |W Y d}~q/d}~ww q)|rt|S |S )a  Get speech separation/enhancement metrics from mix/clean/estimate.

    Args:
        mix (np.array): mixture array.
        clean (np.array): reference array.
        estimate (np.array): estimate array.
        sample_rate (int): sampling rate of the audio clips.
        metrics_list (Union[List[str], str): List of metrics to compute.
            Defaults to 'all' (['si_sdr', 'sdr', 'sir', 'sar', 'stoi', 'pesq']).
        average (bool): Return dict([float]) if True, else dict([array]).
        compute_permutation (bool): Whether to compute the permutation on
            estimate sources for the output metrics (default False)
        ignore_metrics_errors (bool): Whether to ignore errors that occur in
            computing the metrics. A warning will be printed instead.
        filename (str, optional): If computing a metric fails, print this
            filename along with the exception/warning message for debugging purposes.

    Shape:
        - mix: :math:`(D, N)` or `(N, )`.
        - clean: :math:`(K\_source, N)` or `(N, )`.
        - estimate: :math:`(K\_target, N)` or `(N, )`.

    Returns:
        dict: Dictionary with all requested metrics, with `'input_'` prefix
        for metrics at the input (mixture against clean), no prefix at the
        output (estimate against clean). Output format depends on average.

    Examples
        >>> import numpy as np
        >>> import pprint
        >>> from asteroid.metrics import get_metrics
        >>> mix = np.random.randn(1, 16000)
        >>> clean = np.random.randn(2, 16000)
        >>> est = np.random.randn(2, 16000)
        >>> metrics_dict = get_metrics(mix, clean, est, sample_rate=8000,
        ...                            metrics_list='all')
        >>> pprint.pprint(metrics_dict)
        {'input_pesq': 1.924380898475647,
         'input_sar': -11.67667585294225,
         'input_sdr': -14.88667106190552,
         'input_si_sdr': -52.43849784881705,
         'input_sir': -0.10419427290163795,
         'input_stoi': 0.015112115177091223,
         'pesq': 1.7713886499404907,
         'sar': -11.610963379923195,
         'sdr': -14.527246041125844,
         'si_sdr': -46.26557128489802,
         'sir': 0.4799929272243427,
         'stoi': 0.022023073540350643}

    r   T)observationspeech_sourceenable_si_sdrsample_rate)speech_predictionr   r   r   compute_permutationinput_ zError computing z for z<unknown file>z, ignoring. Error was: N)ALL_METRICS
isinstancestrr   r   	ExceptionwarningswarnRuntimeWarning	tracebackprint_stackRuntimeErrorr   )mixcleanestimater   metrics_listaverager   ignore_metrics_errorsfilenameinput_metricsoutput_metricsutt_metricssrcprefixmetrickeyerr r1   D/home/ubuntu/.local/lib/python3.10/site-packages/asteroid/metrics.pyget_metrics   sT   >

r3   c                   @   s`   e Zd ZdZeedddfddZdddejd	ejd
ejfddZ	dd Z
ddefddZdS )MetricTrackerat  Metric tracker, subject to change.

    Args:
        sample_rate (int): sampling rate of the audio clips.
        metrics_list (Union[List[str], str): List of metrics to compute.
            Defaults to 'all' (['si_sdr', 'sdr', 'sir', 'sar', 'stoi', 'pesq']).
        average (bool): Return dict([float]) if True, else dict([array]).
        compute_permutation (bool): Whether to compute the permutation on
            estimate sources for the output metrics (default False)
        ignore_metrics_errors (bool): Whether to ignore errors that occur in
            computing the metrics. A warning will be printed instead.
    TFc                 C   s8   || _ || _|| _|| _|| _g | _d| _t | _	d S Nr   )
r   r%   r&   r   r'   series_list_len_last_savedpd	DataFrame_all_metrics)selfr   r%   r&   r   r'   r1   r1   r2   __init__   s   zMetricTracker.__init__N)r(   r"   r#   r$   c                K   sD   t |||| j| j| j| j| j|d	}|| | jt	
| dS )a3  Compute metrics for mix/clean/estimate and log it to the class.

        Args:
            mix (np.array): mixture array.
            clean (np.array): reference array.
            estimate (np.array): estimate array.
            sample_rate (int): sampling rate of the audio clips.
            filename (str, optional): If computing a metric fails, print this
                filename along with the exception/warning message for debugging purposes.
            **kwargs: Any key, value pair to log in the utterance metric (filename, speaker ID, etc...)
        )r   r%   r&   r   r'   r(   N)r3   r   r%   r&   r   r'   updater6   appendr8   Series)r;   r"   r#   r$   r(   kwargsr+   r1   r1   r2   __call__   s   
zMetricTracker.__call__c                 C   s<   | j t| jkr| jS t| j| _ t| j| _t| jS )z1Return dataframe containing the results (cached).)r7   lenr6   r:   r8   r9   r;   r1   r1   r2   as_df   s
   zMetricTracker.as_df	dump_pathc                 C   s   i }|   }| jD ]}d| }|| ||  }||  ||< | ||d < q	|durU|ds5|d n|}t|d}tj||dd W d   |S 1 sPw   Y  |S )zHReturn dict of average metrics. Dump to JSON if `dump_path` is not None.r   _impNz.jsonwr   )indent)rD   r%   meanendswithopenjsondump)r;   rE   final_results
metrics_dfmetric_nameinput_metric_nameldffr1   r1   r2   final_report   s   

zMetricTracker.final_reportN)__name__
__module____qualname____doc__tupler   r<   npndarrayrA   rD   r   rT   r1   r1   r1   r2   r4   x   s"    

r4   c                   @   s$   e Zd Zdd Zdd Zdd ZdS )MockWERTrackerc                 O   s   d S rU   r1   r;   argsr@   r1   r1   r2   r<         zMockWERTracker.__init__c                 O   s   t  S rU   )dictr^   r1   r1   r2   rA      s   zMockWERTracker.__call__c                 C   s   dS )Nr   r1   rC   r1   r1   r2   final_report_as_markdown   r`   z'MockWERTracker.final_report_as_markdownN)rV   rW   rX   r<   rA   rb   r1   r1   r1   r2   r]      s    r]   c                   @   s   e Zd ZdZdddZdejdejdejded	ee	 f
d
dZ
edddZedd Zdd ZeddddejfddZedd Zdd Zdd ZdS ) 
WERTrackera,  Word Error Rate Tracker. Subject to change.

    Args:
        model_name (str): Name of the petrained model to use.
        trans_df (dataframe): Containing field `utt_id` and `text`.
            See librimix/ConvTasNet recipe.
        use_gpu (bool): Whether to use GPU for forward caculation.
    Tc              	   C   s   ddl m} ddlm} dd l}|| _|rdnd| _| }|d	i ||d| ji| _g | _	g | _
g | _g | _g | _t|j|jd |k d | _|| _| || _t | _t | _t | _|| | | | | | g| _d S )
Nr   )Speech2Text)ModelDownloadercudacpudevicenamefsr1   ) espnet2.bin.asr_inferencerd   espnet_model_zoo.downloaderre   jiwer
model_namerh   download_and_unpack	asr_modelinput_txt_listclean_txt_listoutput_txt_listtranscriptionstrue_txt_listint
data_framer   trans_df_df_to_dict	trans_dicr   mix_counterclean_counterest_counterComposeToLowerCaseRemovePunctuationRemoveMultipleSpacesStripSentencesToListOfWordsRemoveEmptyStringstransformation)r;   rn   rx   use_gpurd   re   rm   dr1   r1   r2   r<      s6    
zWERTracker.__init__r"   r#   r$   r   wav_idc                C   sj  || j kr| j||||| j d\}}}t }t }t }| |}	ti i i i d}
|	|
d< t|D ]&\}}||
d d| < | j| |
d d| < | jt|| j| d q1|D ]%}t| j	| j| |	| j
d}|  j|7  _||7 }| jt||	d qZtt||D ]@\}\}}| |}	t| j	| j| |	| j
d}|  j|7  _||7 }| jt||	d ||
d	 d| < |	|
d	 d| < qtt||D ]@\}\}}| |}	t| j	| j| |	| j
d}|  j|7  _||7 }| jt||	d ||
d
 d| < |	|
d
 d| < q| j|
 t| jdi t|| jdi t|| jdi t|dS )zCCompute and store best hypothesis for the mixture and the estimatesfs_fromfs_to)mixture_txtr#   	estimatestruthr   r   utt_id_txt_)utt_idtext)r   
hypothesisr   r#   r   )	input_wer	clean_werwerNr1   )r   resampler   predict_hypothesisra   	enumeraterz   ru   r>   hsdir   r{   rq   zipr|   rr   r}   rs   rt   wer_from_hsdi)r;   r"   r#   r$   r   r   local_mix_counterlocal_clean_counterlocal_est_countertxt
trans_dictitmp_id	out_countwavestr1   r1   r2   rA     sj   




zWERTracker.__call__r   c                 C   s   || | | | |  }|S rU   r1   )hitssubstitutions	deletions
insertionsr   r1   r1   r2   r   J  s   zWERTracker.wer_from_hsdic                    s:   ddl m} g d || |||d } fdd|D S )Nr   )compute_measuresr   r   r   r   )r   r   truth_transformhypothesis_transformc                    s   i | ]\}}| v r||qS r1   r1   .0kvkeepr1   r2   
<dictcomp>Z  s    z#WERTracker.hsdi.<locals>.<dictcomp>)rm   r   items)r   r   r   r   outr1   r   r2   r   O  s   zWERTracker.hsdic                 C   s,   t || j}| |}|d ^}}|S r5   )torch
from_numpytorh   rp   )r;   r   nbestsr   _r1   r1   r2   r   \  s   
zWERTracker.predict_hypothesisNr   wavsc                    s"   ddl m   fdd|D S )Nr   )r   c                    s   g | ]	} |d qS ))sr_origsr_newr1   )r   rG   	_resampler   r   r1   r2   
<listcomp>f  s    z'WERTracker.resample.<locals>.<listcomp>)resampyr   )r   r   r   r1   r   r2   r   b  s   zWERTracker.resamplec                 C   s$   dd t | d  | d  D S )Nc                 S   s   i | ]\}}||qS r1   r1   r   r1   r1   r2   r   j  s    z*WERTracker._df_to_dict.<locals>.<dictcomp>r   r   )r   to_list)dfr1   r1   r2   ry   h  s   $zWERTracker._df_to_dictc                    s:  t  fdddD }t  fdddD }t  fdddD } jdi t j} jdi t j} jdi t j} fdddD } fd	ddD } fd
ddD }	t j|g| |dg }
t j|g| |dg }t j|g|	 |dg }dg|
 dg| dg| g}tj|g dd}|S )z-Generate a MarkDown table, as done by ESPNet.c                 3       | ]} j | V  qd S rU   r{   r   r   rC   r1   r2   	<genexpr>n      z&WERTracker.final_df.<locals>.<genexpr>)r   r   r   c                 3   r   rU   r|   r   rC   r1   r2   r   o  r   c                 3   r   rU   r}   r   rC   r1   r2   r   p  r   c                       g | ]} j | qS r1   r   r   rC   r1   r2   r   u      
z'WERTracker.final_df.<locals>.<listcomp>r   c                    r   r1   r   r   rC   r1   r2   r   x  r   c                    r   r1   r   r   rC   r1   r2   r   {  r   -ztest_clean / mixtureztest_clean / cleanztest_clean / separated)	datasetSntWrdCorrSubDelInsErrzS.Err)columnsNr1   )	sumr   ra   r{   r|   r}   rB   r8   r9   )r;   
mix_n_wordclean_n_word
est_n_wordmix_werr   est_wermix_hsdi
clean_hsdiest_hsdifor_mix	for_cleanfor_esttabler   r1   rC   r2   final_dfl  s4   


zWERTracker.final_dfc                 C   s   |   jdddS )NFgithub)indextablefmt)r   to_markdownrC   r1   r1   r2   rb     s   z#WERTracker.final_report_as_markdown)T)r   r   r   r   )rV   rW   rX   rY   r<   r[   r\   rv   r   r   rA   staticmethodr   r   r   r   ry   r   rb   r1   r1   r1   r2   rc      s2    
	 
H

!rc   c                       s*   e Zd ZdZd fdd	Zdd Z  ZS )	F1TrackerzF1 score tracker.Hz>c                    s   t    || _d S rU   )superr<   epsilon)r;   r   	__class__r1   r2   r<     s   

zF1Tracker.__init__c                 C   s   t t ||}t t t |t |}t t t |||}t t t |||}|| || | |  }||| | j  }||| | j  }	d||	  ||	 | j  }
|
j| jd| j d}
t|t|t|	t|
dS )N   r   )minmax)accuracy	precisionrecallf1_score)r   r   logical_andlogical_notlogical_xorr   clampfloat)r;   y_predy_truetptnfpfnr   r   r   f1r1   r1   r2   forward  s   zF1Tracker.forward)r   )rV   rW   rX   rY   r<   r  __classcell__r1   r1   r   r2   r     s    r   )r   r   TFFN)rL   r   r   r   typingr   collectionsr   pandasr8   numpyr[   pb_bss_evalr   r   torch.nnnnutilsr   r   r3   r4   r]   rc   Moduler   r1   r1   r1   r2   <module>   s0    
gU :