o
    i5(                  
   @   sV  d Z ddlZddlZddlZddlZddlZddlZddlZddlm	Z	 G dd dej
Zdd Zdd	 Zd
d Zdd Zdd ZedkrejddZejdeddd ejdddd ejddddd ejdddd ejd ed!d"d# ejd$dd%d e Ze Zejdd ZeD ]Zee e  qejej d&d' ej!" #d(Z$ee$ej%ej&ej'Z(i Z)e$D ]Z*ee*Z*e(e* d) Z+e,e(e* d* Z-e(e* d+ Z.e,e(e* d, Z/e-dkre0d-1e* qe/dkre0d.1e* qe2d/1e* e2d01e+d1 e- e2d21e.d1  ej34ej5d3e*6d4d5 d6 Z7e8e7d7d8d9qZ9d:Z:e:d;krdZ;dZ<e(e* d* = D ]\Z>Z?e?e:k rKe;d<7 Z;q<e<d<7 Z<q<dZ@e(e* d, = D ]\Z>Z?e?e:krhe@d<7 Z@q[e<e- ZAeBe@d=Z@e@e.d1  ZCe@e/ ZDe9Ed>1e:eAeDeC e:ejF7 Z:e:d;ks0W d   n	1 sw   Y  e7e)e*< qdS dS )?zk This implementation is adapted from https://github.com/wenet-e2e/wekws/blob/main/wekws/bin/compute_det.py.    N)split_mixed_labelc                       s.   e Zd Zd fdd	Zdd Zdd Z  ZS )	thread_wrapper c                    s$   t t|   || _|| _g | _d S N)superr   __init__funcargsresult)selfr   r	   	__class__r   P/home/ubuntu/.local/lib/python3.10/site-packages/funasr/utils/compute_det_ctc.pyr      s   
zthread_wrapper.__init__c                 C   s   | j | j | _d S r   )r   r	   r
   r   r   r   r   run   s   zthread_wrapper.runc                 C   s   z| j W S  ty   Y d S w r   )r
   	Exceptionr   r   r   r   
get_result   s
   zthread_wrapper.get_result)r   )__name__
__module____qualname__r   r   r   __classcell__r   r   r   r   r      s    r   c                 C   s$   t | }ddd |D }| S )N c                 s   s    | ]}| d V  qdS ) Nr   ).0subr   r   r   	<genexpr>!   s    z$space_mixed_label.<locals>.<genexpr>)r   joinstrip)	input_strsplits	space_strr   r   r   space_mixed_label   s   r!   c                 C   s\   g }t | ddd}|D ]}| dkr||  qW d    |S 1 s'w   Y  |S )Nrutf8encodingr   )openr   append)	list_filelistsfinliner   r   r   
read_lists%   s   
r,   c              	   C   s   t d i }|D ]+}| dd }t|dk r&t d|  q	||d d ||d < q	g }| D ]6}| dd }t|dkre|d |v re|t	|d ||d  |d d	d
 q9t d|d  q9|S )Nzmake pair for wav-trans list	r      zinvalid line in trans file: {}r   r      i>  )keytxtwavsample_ratez*can't find corresponding trans for key: {})
logginginfor   replacesplitlendebugformatr'   dict)	wav_liststrans_liststrans_tabler+   arrr)   r   r   r   	make_pair.   s4   

r@   c                 C   s   g }|D ]Z}d|v sJ d|v sJ d|v sJ |d }|d }|d }z"t |\}}tj|tjd}|d}t|d }	|	t| }
W n   t	d|  d}
Y |
|d< |
| q|S )	Nr0   r2   r1   )dtyper   zload file failed:         duration)kaldiioload_mattorchtensorfloat32	unsqueezer8   floatr4   r5   r'   )tid
data_listsresultsobjr0   wav_filer1   ratewaveformframesrC   r   r   r   count_durationL   s(   
rS   c              	   C   sN  i }t |dddD}|D ]9}|  }|d }|d }	|	dkr7||vr6||t|d t|d d	i q||vrE||d
dd	i qW d    n1 sPw   Y  t|}
t|}t|
|}t	dt
|  d}d}tt
|| }g }t|D ].}||d krtt|||||  f}ntt|||d  f}|  || ||7 }q~g }|D ]}|  || 7 }qt	dt
|  i }| D ]"}t|}i ||< i || d< d|| d< i || d< d|| d< q|D ]}d|v sJ d|v sJ d|v sJ d|v sJ |d }|d }|d }t|}d| d }|d }||v s.J | D ]s}t|}d| d }||dkrt||| d kr]|| d ||| d i n|| d |di || d  |7  < q0||| d kr|| d ||| d i n|| d |di || d  |7  < q0q|S )Nr"   r#   r$   r   r/   detectedr.      )kwconfiunknowng      zorigin list samples:    zafter list samples: keyword_tablerB   keyword_durationfiller_tablefiller_durationr0   r2   r1   rC   r   rV   rW   )r&   r   r7   updater!   rJ   r,   r@   r4   r5   r8   intranger   rS   startr'   r   r   find)keywords_list	data_file
trans_file
score_filescore_tabler*   r+   r?   r0   is_detectedr<   r=   rL   num_workersrb   steptasksidxtaskduration_listskeyword_filler_tablekeywordrN   rO   r1   txt_regstr_lrblkrC   keyword_regstr_lrblkr   r   r   load_data_and_scoreg   s   




rt   __main__zcompute det curve)descriptionz
--keywordsTz&preset keyword str, input all keywords)typerequiredhelpz--test_dataztest data file)rx   ry   z--trans_datar   ztranscription of test data)rx   defaultry   z--score_filez
score filez--stepgMbP?zthreshold step)rw   rz   ry   z--stats_dirzto save det stats filesz%%(asctime)s %(levelname)s %(message)s)levelr:   ,r[   rZ   r]   r\   z0Can't compute det for {} without positive samplez0Can't compute det for {} without negative samplezComputing det for {}z-  Keyword duration: {} Hours, wave number: {}g      @z  Filler duration: {} Hourszstats.r   _z.txtwr#   r$   rB   g      ?r/   gư>z{:.3f} {:.6f} {:.6f} {:.6f}
)G__doc__osjsonr4   argparse	threadingrD   rF   funasr.utils.kws_utilsr   Threadr   r!   r,   r@   rS   rt   r   ArgumentParserparseradd_argumentstrrJ   
parse_argsr	   	getLoggerroot_loggerhandlershandlerremoveHandlerclosebasicConfigDEBUGkeywordsr   r7   rd   	test_data
trans_datarg   rp   stats_filesrq   keyword_durr8   keyword_num
filler_dur
filler_numprintr:   r5   pathr   	stats_dirr6   
stats_filer&   fout	thresholdnum_false_rejectnum_true_detectitemsr0   rW   num_false_alarmtrue_detect_ratemaxfalse_alarm_per_hourfalse_alarm_ratewriterk   r   r   r   r   <module>   s    	
_

 








!$