o
    ॵibV                     @   sV  d dl Z d dlZd dlZd dlZd dlmZ d dlmZmZm	Z	 d dl
Zd dlZd dlmZ d dlm  mZ d dlmZ d dlmZ d dlmZ e Zdd Zd	d
 Zdd Zdd Z	d%dejdejdejdejdef
ddZ dejdejdejdejfddZ!			d&dejdejde"de#de#d e	eee#  ejf fd!d"Z$G d#d$ d$Z%dS )'    N)defaultdict)ListOptionalTuple)ReduceOp)clip_grad_norm_)
get_loggerc                 C   s  |    |dd}|dd}|dd}|dd}	|dd}
|d	d
}|dd
}td|}t|D ]\}}|d
krIt|tj	 |dkrP dS |\}}}}}||}||}||}|duro||}|
d}|dkryq:| |\}}t||||\}}|| }|| }|  |d
 | dkrt|  |}t|r|  |  || dkrtd||	|
||||  q:|d
 |d
krt|tj	 dS dS )z Train one epoch
    	grad_clipg      I@log_interval
   epochr   rank
local_rank
world_size   
grad_accumNz3RANK {}/{}/{} TRAIN Batch {}/{} size {} loss {:.6f})traingettorchtensorto	enumeratedist
all_reducer   SUMsizectc_lossbackwardr   
parametersisfinitestep	zero_gradloggerinfoformatitemfill_)model	optimizerdata_loaderdevicewriterargsclipr
   r   r   r   r   accum_batchsiterator_stop	batch_idxbatchkeyfeatstargetfeats_lengthstarget_lengthsnum_uttslogits_lossacc	grad_norm r=   c/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/trainers/audio/kws_utils/batch_utils.pyexecutor_train$   sZ   






r?   c                 C   sr  |    |dd}|dd}d}d}d}td|}	tjd|d}
|d	d}|d
d}|dd}t  t|D ]\}}|dkrOt	|	t
j |	dkrU n|\}}}}}||}||}||}|durt||}|d}|dkr~q@| |\}}t||||d\}}t|r||7 }|| 7 }|| 7 }|
d  | 7  < |
d  || 7  < |
d  |7  < |
d  | 7  < || dkrtd||||||| | ||| 	 q@|	d |dkrt	|	t
j W d   n	1 sw   Y  |dkrt	|
t
j td|
d  |
d}
|
d  |
d   |
d  |
d   fS )z Cross validation on
    r
   r   r   r   r           )   )r*   r   r   r   NT      zORANK {}/{}/{} CV Batch {}/{} size {} loss {:.6f} acc {:.2f} history loss {:.6f}zTotal utts number is {}cpu)evalr   r   r   r   zerosno_gradr   r   r   r   r   r   r   r   sumr%   r"   r#   r$   r&   )r'   r)   r*   r,   r
   r   num_seen_uttsnum_seen_tokens
total_lossr/   counterr   r   r   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r=   r=   r>   executor_cv\   sx   









*
 
rM   c           %      C   s  | dddusJ dtj|d d}| dd}|   d}d}	t 0 t|dd	d
}
t|D ]\}}t	j	
 }|\}}}}}||}||}|durY||}|d}|dkrcq6| |\}}|d}| }t	j	
 }tt|D ]}|| }|| d||  }t||| |}d}d}|D ]M}|d }|d }t|t|ksJ | D ])} ||  d }!t||!}"|"dkr| }t|"|"t|! D ]
}#|||# d 9 }q nq|durt|} nq|dur|
d||| q}|
d| q}t	j	
 }$|||  7 }|	|$|  7 }	|| dkr*td| tj  q6td|d |	d  W d   n1 sCw   Y  W d   |S W d   |S 1 s\w   Y  |S )z Test model with decoder
    test_dirNz2Please config param: test_dir, to store score filez	score.txtr
   r   r@   wutf8)encodingr   rB         ?token_idprobz{} detected {} {:.3f}
z{} rejected
zProgress batch {}z5Total infer cost {:.2f} mins, decode cost {:.2f} minsg      N@)r   ospathjoinrE   r   rG   openr   datetimenowr   r   softmaxrD   rangelenctc_prefix_beam_searchkeys
is_sublistmathsqrtwriter$   total_secondsr"   r#   sysstdoutflush)%r'   r)   r*   keywords_tokenkeywords_idxsetr,   score_abs_pathr
   infer_secondsdecode_secondsfoutr0   r1   batch_start_timer`   r3   r4   r5   r6   r7   r8   r9   infer_end_timeir2   scorehypshit_keyword	hit_scoreone_hyp
prefix_idsprefix_nodeswordlaboffsetidxdecode_end_timer=   r=   r>   executor_test   s   











*DDDr~   c                 C   s   t | t |k r
dS t | t |kr| |krdS dS tt | t | D ]#}| | |d krGtt |D ]}| ||  || krB nq4|  S q$dS )NrT   r   )r^   r]   )	main_list
check_listrq   jr=   r=   r>   ra      s   ra   Fr8   r4   logits_lengthsr6   need_accc                 C   sH   d}|rt | |||}| dd} | d} tj| |||dd}||fS )z CTC Loss
    Args:
        logits: (B, D), D is the number of keywords plus 1 (non-keyword)
        target: (B)
        logits_lengths: (B)
        target_lengths: (B)
    Returns:
        (float): loss of current batch
    r@   r   r   rB   rH   )	reduction)acc_utterance	transposelog_softmaxFr   )r8   r4   r   r6   r   r;   r:   r=   r=   r>   r     s   

r   logits_lengthtarget_lengthc                 C   s$  | d u rdS |  d} |  } | }d}d}d}d}t }t| dD ]^}	| |	 d ||	  }
t|
||	 d dd}dd ||	 d ||	   D }g }t|dkr_dd |d d D }|||}|d dkr||d 7 }||d	 7 }||d
 7 }||d 7 }q%t	|| | | d | S )Nr   rB   rC      c                 S      g | ]}t |qS r=   str.0r%   r=   r=   r>   
<listcomp>5      z!acc_utterance.<locals>.<listcomp>c                 S   r   r=   r   r   r=   r=   r>   r   8  r   allinssubdelg      Y@)
r\   rD   
Calculatorr]   r   r_   tolistr^   	calculatefloat)r8   r4   r   r   
total_word	total_ins	total_sub	total_del
calculatorrq   rr   rs   rz   recresultr=   r=   r>   r   $  s>   
"r   rC      keywords_tokensetscore_beam_sizepath_beam_sizereturnc              
   C   s  |  d}| }t ddg ffg}td|D ]R}|| }	tdd }
|	|\}}g }g }t| | D ])\}}|durQ|dkrP||v rP|| || q6|dkr_|| || q6t|dkrgq|D ]}|	| 	 }|D ]\}\}}}t|dkr|d nd}|dkr|
| \}}}|||  ||  }|
 }|||f|
|< qs||krtj|dd	d
s|
| \}}}|||  }|
 }||d d kr||d d< ||d d< |||f|
|< tj|dd	d
s||f }|
| \}}}|||  }|
 }|t|||d |||f|
|< qs||f }|
| \}}}|r6||d d kr5||d d< ||d d< n|
 }|t|||d |||  ||  }|||f|
|< qsqit|
 dd dd}
|
d| }qdd |D }|S )ay   CTC prefix beam search inner implementation

    Args:
        logits (torch.Tensor): (1, max_len, vocab_size)
        logits_lengths (torch.Tensor): (1, )
        keywords_tokenset (set): token set for filtering score
        score_beam_size (int): beam size for score
        path_beam_size (int): beam size for path

    Returns:
        List[List[int]]: nbest results
    r   rR   r@   c                   S   s
   ddg fS )Nr@   r=   r=   r=   r=   r>   <lambda>b  s   
 z(ctc_prefix_beam_search.<locals>.<lambda>Ng?rT   gư>)abs_tolrU   frame)tokenr   rU   c                 S   s   | d d | d d  S )Nr   r   r=   )xr=   r=   r>   r     s    T)r2   reversec                 S   s6   g | ]}|d  |d d  |d d  |d d fqS )r   r   rB   r=   )r   yr=   r=   r>   r     s   6 z*ctc_prefix_beam_search.<locals>.<listcomp>)r   tupler]   r   topkzipr   appendr^   r%   copyrb   isclosedictsorteditems)r8   r   r   r   r   maxlen	ctc_probscur_hypstprobs	next_hypstop_k_probstop_k_indexfilter_probsfilter_indexrU   r|   spsprefixpbpnb	cur_nodeslastn_pbn_pnbnodesn_prefixrs   r=   r=   r>   r_   E  s   









*r_   c                   @   s4   e Zd Zdd Zdd Zdd Zdd Zd	d
 ZdS )r   c                 C   s>   i | _ g | _i | _d| jd< d| jd< d| jd< d| jd< d S )Nr   corr   r   r   r   )dataspacecostselfr=   r=   r>   __init__  s   


zCalculator.__init__c                 C   s  | dd | dd t| jt|k r$| jg  t| jt|k s| jD ]'}|D ]
}d|d< d|d< q+t|t|k rN|ddd t|t|k s>q'tt|D ]}|| j| d d< d| j| d d< qUtt|D ]}|| jd | d< d| jd | d< qpd| jd d d< |D ]}|| jvrt|dkrdddddd	| j|< q|D ]}|| jvrt|dkrdddddd	| j|< qt|D ]\}}t|D ]\}}	|dks|dkrqtj}
d
}| j|d  | d | j	d  }d}||
k r|}
|}| j| |d  d | j	d  }d}||
k r|}
|}||	kr1| j|d  |d  d | j	d  }d}n| j|d  |d  d | j	d  }d}||
k rN|}
|}|
| j| | d< || j| | d< qqg g dddddd}t|d }t|d }	 | j| | d dkrt|| dkr| j||  d d | j||  d< | j||  d d | j||  d< |d d |d< |d d |d< |d  d||  |d  d||  |d }|d }n,| j| | d dkrFt|| dkr)| j||  d d | j||  d< | j||  d d | j||  d< |d d |d< |d d |d< |d  d||  |d  d||  |d }|d }n| j| | d dkrt|| dkr| j||  d d | j||  d< | j||  d d | j||  d< |d d |d< |d d |d< |d  d||  |d  dd |d }nf| j| | d dkrt|| dkr| j||  d d | j||  d< |d d |d< |d  dd |d  d||  |d }n | j| | d dkr	 |S t
dj||| j| | d d qy)Nr    r   nonerror)r   r   r   r   r   r   r   r   r   noner   r   r   )rz   r   r   r   r   r   r   Tr   rz   r   z<this should not happen , i = {i} , j = {j} , error = {error})rq   r   r   )insertr^   r   r   r]   r   r   rf   maxsizer   printr$   )r   rz   r   rowelementrq   r   r   	lab_token	rec_tokenmin_dist	min_errorr   r   r   r=   r=   r>   r     s   

  

$$
	$$$$
$$
$
zCalculator.calculatec                 C   s   dddddd}| j D ]C}|d | j | d  |d< |d | j | d  |d< |d | j | d  |d< |d | j | d  |d< |d | j | d  |d< q|S Nr   r   r   r   r   r   r   r   )r   r   r   r=   r=   r>   overall3  s   
zCalculator.overallc                 C   s   dddddd}|D ]H}|| j v rR|d | j | d  |d< |d | j | d  |d< |d | j | d  |d< |d | j | d  |d< |d | j | d  |d< q
|S r   r   )r   r   r   r   r=   r=   r>   cluster=  s   
zCalculator.clusterc                 C   s   t | j S )N)listr   r`   r   r=   r=   r>   r`   H  s   zCalculator.keysN)__name__
__module____qualname__r   r   r   r   r`   r=   r=   r=   r>   r     s    	z
r   )F)NrC   r   )&rZ   rb   rV   rf   collectionsr   typingr   r   r   numpynpr   torch.distributeddistributedr   torch.nn.functionalnn
functionalr   r   torch.nn.utilsr   modelscope.utils.loggerr   r"   r?   rM   r~   ra   Tensorboolr   r   setintr_   r   r=   r=   r=   r>   <module>   sd   8FS


$
i