o
    i                  
   @   s   d Z ddlZddlmZ ddlZddlZddlZddl	m
  mZ ddlm  mZ ddlm
Z
 ddlmZ defddZd	eejef fd
dZd!dedefddZdd Zdededejdejdef
ddZdd Zdd Zd"ddZdd  ZdS )#zH Some implementations are adapted from https://github.com/yuyq96/D-TDNN
    N)Union)nn)Fileaudioc                 C   s   d}t t| D ]W}| | }|d |d ksJ dt|d tjs&J dt|d d t|d d  |d jd ksAJ d|dkrU|d | |d  d ksUJ d||d |d  7 }q|S )Nr      z$modelscope error: Wrong time stamps.   z"modelscope error: Wrong data type.>  zFmodelscope error: audio data in list is inconsistent with time length.)rangelen
isinstancenpndarrayintshape)r   	audio_duriseg r   N/home/ubuntu/.local/lib/python3.10/site-packages/funasr/utils/speaker_utils.pycheck_audio_list   s   0 r   inputsc                 C   s   g }t t| D ]s}t| | trAt| | }tjt	|dd\}}t|j
dkr3|d d df }t|d}|d}n5t| | tjrrt| | j
dksVJ d| | }|jdv rg|d d}n|d}t|}ntd	|| q|S )
Nfloat32)dtyper   r   r   z.modelscope error: Input array should be [N, T])int16int32int64i   zOmodelscope error: The input type is restricted to audio address and nump array.)r	   r
   r   strr   readsfloadioBytesIOr   torch
from_numpy	unsqueezesqueezer   r   r   astype
ValueErrorappend)r   outputr   
file_bytesdatafsr   r   r   sv_preprocess"   s*   

r-   r   vad_segmentsreturnc                    s@   ddd  fdd}g }t | D ]\}}||| q|S )Ng      ?g      ?)seg_dur	seg_shiftc           
         s   | d }| d }t  d  }t  d  }d}g }td|jd |D ]G}t|| |jd }||kr8 |S |}td|| }||| }	|	jd |k r\t|	d||	jd  fd}	|| | | | |	g q%|S )Nr   r   r0   r1   constant)r   r	   r   minmaxr   padr(   )
seg_dataseg_str+   	chunk_lenchunk_shiftlast_chunk_edseg_reschunk_stchunk_ed
chunk_dataconfigr,   r   r   	seg_chunkB   s$   "zsv_chunk.<locals>.seg_chunk)	enumerateextend)r.   r,   rA   segsr   sr   r?   r   sv_chunk<   s   rF   c                 C   sR   g }| D ]}t j|ddd}||jddd }||d qt|}|S )Nr   P   )num_mel_binsT)dimkeepdim)Kaldifbankr$   meanr(   r"   cat)r   featuresaufeaturer   r   r   extract_feature\   s   
rR   segmentslabels
embeddingsc           
      C   s   t | t |ks
J t|}g }tt | D ]}|| | d | | d || g qt|}g }t| d D ]}|||k d}|| q9t|}dd }tdt |D ]/}|||d  d || d r|| d ||d  d  d }	|	|| d< |	||d  d< qZt	|}|S )Nr   r   c                 S   s   | |d krdS dS )Ng-C6?TFr   )t1t2r   r   r   is_overlappedx   s   z"postprocess.<locals>.is_overlappedr   )
r
   correct_labelsr	   r(   merge_sequer4   rM   r   stacksmooth)
rS   r.   rT   rU   distribute_resr   spk_embsspk_embrX   pr   r   r   postprocessf   s(   &
 ra   c                 C   sF   d}i }g }| D ]}||vr|||< |d7 }| ||  qt|S )Nr   r   )r(   r   array)rT   	labels_idid2id
new_labelsr   r   r   r   rY      s   
rY   c                 C   sv   | d g}t dt| D ],}| | d |d d ks&| | d |d d kr.|| |  q| | d |d d< q|S )Nr   r   r   )r	   r
   r(   )r]   resr   r   r   r   rZ      s   
0rZ   r   c                 C   s$  t t| D ]}t| | d d| | d< t| | d d| | d< | | d | | d  |k r|dkrA| |d  d | | d< q|t| d krV| |d  d | | d< q| | d | |d  d  | |d  d | | d  kr| |d  d | | d< q| |d  d | | d< qt| } | S )Nr   r   r   )r	   r
   roundrZ   )rg   mindurr   r   r   r   r\      s   8r\   c                 C   s   g }| D ]F}|d d d }|d d d }d}d}|D ]$}|\}	}
}|	d }	|
d }
t t||
t ||	 d}||kr@|}|}q||d< || q|S )Nts_listr   rf   r   i  spk)r4   r3   r(   )sentence_listsd_time_listsd_sentence_listdsentence_startsentence_endsentence_spkmax_overlapsd_timespk_stspk_edrk   overlapr   r   r   distribute_spk   s$   
rx   )r   )r   )__doc__r    typingr   librosar   numpyr   r"   torch.nn.functionalr   
functionalFtorchaudio.compliance.kaldi
compliancekaldirK   funasr.utils.modelscope_filer   listr   r   r-   rF   rR   ra   rY   rZ   r\   rx   r   r   r   r   <module>   s:    

$

