o
    i#|                     @   s`  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z
d dlmZ d dlmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dl m!Z! d dl"m#Z# d dl$m%Z% d dl$m&Z& dddZ'zd dl(m)Z)m*Z*m+Z+ d dl,m-Z- W n   Y dddZ.G dd dZ/dS )    N)tqdm)
DictConfig
ListConfig)deep_update)tables)
load_bytes)download_from_url)timestamp_sentence)timestamp_sentence_en)download_model)slice_padding_audio_samples)	merge_vad)load_audio_text_image_video)set_all_random_seed)load_pretrained_model)export_utils)misc   c              	   C   s>   |  d|}zt|}W n ttfy   |}Y nw t|dS )z?Return a positive integer representing CPU threads from config.ncpu   )getint	TypeError
ValueErrormax)configfallbackvalue r   J/home/ubuntu/.local/lib/python3.10/site-packages/funasr/auto/auto_model.py_resolve_ncpu"   s   
r    )sv_chunkpostprocessdistribute_spk)ClusterBackendc                    s  g }g }g d}t jt j  t| tr!| ds| dr!t| } t| trtj	| rtj
| \}}| }||v rt| dd`}	|	D ]U}
dd fdd	td
D  }| drnt|
 }|d }|d|}n"|
 jdd}t|dkr|d n|d }t|dkr|d n|}|| || qEW d   n1 sw   Y  ||fS |du rt| }| g}|g}||fS t| ttfr:|durt|ttfrg }t| |D ]\}}t||d\}}|| qg }t| D ]}|| q||fS | }g }| D ]0}t|trtj	|rt|}n|du r/dd fdd	td
D  }|| q||fS t| trDt| } |du rYdd fdd	td
D  }| g}|g}||fS ) )z.scpz.txtz.json.jsonlz.textzhttp://zhttps://zutf-8)encoding	rand_key_ c                 3       | ]}t  V  qd S Nrandomchoice.0_charsr   r   	<genexpr>E       z(prepare_data_iterator.<locals>.<genexpr>   r&   sourcekeyr   )maxsplitr   N)data_in	data_typec                 3   r*   r+   r,   r/   r2   r   r   r4   k   r5   c                 3   r*   r+   r,   r/   r2   r   r   r4   r   r5   )stringascii_lettersdigits
isinstancestr
startswithr   ospathexistssplitextloweropenjoinrangeendswithjsonloadsstripr   splitlenappendr   "extract_filename_without_extensionlisttuplezipprepare_data_iteratorbytesr   )r:   	input_lenr;   r8   	data_listkey_listfilelistr1   file_extensionfinlinelinesdatadata_list_tmp	data_in_idata_type_idata_list_iitemdata_ir   r2   r   rU   2   sz   
 

3
 

 

 rU   c                   @   sj   e Zd Zdd Zedd Zdd Zddd	Z					dd
dZdddZ	dddZ
dd Zdd ZdS )	AutoModelc                 K   s  zddl m} ||ddd W n   Y tt|dd }tj|d | jd!i |\}}|d	d }|d
i d u rAi n|d
i }|d urvtd ||d< |dd|d< |d |d< |	d|dd | jd!i |\}}|dd }|di d u ri n|di }|d urtd ||d< |dd|d< |d |d< |	d|dd | jd!i |\}}|dd }	|di d u ri n|di }
|
di d u ri n|
di }|	d ur1td |	|
d< |dd|
d< |d |
d< |
	d|dd | jd!i |
\}	}
t
d!i ||d | _|dd}|dvr.td || _|| _|| _|| _|| _|| _|| _|	| _|
| _|d | _|   d S )"Nr   )check_for_updatedisable_updateF)disable	log_levelINFO)level	vad_model
vad_kwargszBuilding VAD model.modelvad_model_revisionmastermodel_revisiondevicer   r   
punc_modelpunc_kwargszBuilding punc model.punc_model_revision	spk_model
spk_kwargs	cb_kwargszBuilding SPK model.spk_model_revisionspk_modepunc_segment)defaultvad_segmentr|   z@spk_mode should be one of default, vad_segment and punc_segment.
model_pathr   )funasr.utils.version_checkerrg   r   getattrloggingupperbasicConfigbuild_modelinfo
setdefaultr$   tocb_modelerrorr{   kwargsro   rm   rn   rt   ru   rw   rx   r   _store_base_configs)selfr   rg   rj   ro   rm   rn   rt   ru   rw   rx   ry   r{   r   r   r   __init__{   sh    
 
 



zAutoModel.__init__c               
   K   s  d| v sJ d| vrt d| dd td/i | } t| dd | dd	}|d	kr4tj rO|d
kr=tj	 rO|dkrGtj
j rO| dddkrUd}d| d< || d< t| d}|| d< t |krmt| | dd }|| d< d| d< |d urBt|tr|dn|}| di }g }g }g }| dg }	| dg }
t|tttfs|gt| }t|D ]k\}}tj|}|| }t|	dkr|	| |d< t|
dkr|
| |d< |d/i |}|| t|dr|jnd }t|dr| n|}d}|d ur	t|}|dkrt|dr| }|| || qt|dkr6|d }|d }|d }|| d< || d< || d< | dd }d | d< |d urptj|}|d/i | di }t|d rl| nd | d< || d< tj | d }|d usJ | d  d!i }t!|| di  t!||  |d/i |}| d"d }|d urt"j#$|rt d#|  t%||| d$d%| d&d | d'g | d(d d) nt&d*|  | d+d,r|'tj( n| d-d,r|'tj) |'| | d.d%st&  || fS )0Nro   
model_confz"download models from model hub: {}hubmsseedr   rs   cudaxpumpsngpur   cpu
batch_sizer   r   	tokenizer
vocab_size,tokenizer_conftoken_lists	seg_dicts
token_listseg_dict	get_vocabget_vocab_sizefrontend
input_sizefrontend_confoutput_sizez is not registered
init_paramzLoading pretrained params from ignore_init_mismatchT
oss_bucket	scope_mapexcludes)ro   rC   r   r   r   r   z#error, init_param does not exist!: fp16Fbf16disable_logr   )*r   r   formatr   r   r   torchr   is_availabler   backendsr   r    get_num_threadsset_num_threadsr?   r@   rN   rR   rS   r   rO   	enumerater   tokenizer_classesrP   hasattrr   r   r   frontend_classesr   model_classesr   rB   rC   rD   r   printr   float16bfloat16)r   rs   r   r   
tokenizerstokenizers_conftokenizers_buildvocab_sizesr   token_list_filesr   itokenizer_classr   r   r   r   frontend_classmodel_classr   ro   r   r   r   r   r      s   












	
zAutoModel.build_modelc                 O   s(   | j }t|| | jg ||R  }|S r+   )r   r   ro   )r   argscfgr   resr   r   r   __call__;  s   
zAutoModel.__call__Nc                 K   sB   |    | jd u r| j|f||d|S | j|f||d|S )N)rW   progress_callback)_reset_runtime_configsrm   	inferenceinference_with_vad)r   inputrW   r   r   r   r   r   generateA  s    
zAutoModel.generatec           !      K   s  |d u r|    |d u r| jn|}d|v r|d t|| |d u r&| jn|}|  |dd}t|||dd |d\}	}
i }g }t|
}| jdd}|sXt	d|d	d
nd }d}d}t
d||D ]}t||| }|
|| }|	|| }||d}|| dkr|dd dkr|d |d< ||d< t }t 4 |jdi ||}t|ttfrt|dkr|d nddig}t|dkr|d ni }W d    n1 sw   Y  t }|| |dd}|| }|dd|d< |dd|d< |d|d< t| |d< || d|d< | d}|r&|||  || |rLz||| W n tyK } ztd|  W Y d }~nd }~ww ||7 }||7 }qd|rc|d|| d t| j} | jdkrtj|  tj  W d    |S 1 sw   Y  |S )Ncacher   r   r;   rW   r;   r8   disable_pbarFblueTcolourtotaldynamic_ncolsg        r   )r:   r8   fbankr:   data_lengthstextr)   batch_data_timer   	load_dataextract_feat0.3fforwardrtfz, zprogress_callback error: 	rtf_avg: r   r   ) r   r   popr   ro   evalr   rU   rO   r   rI   mintimeperf_counterr   no_gradr   r?   rR   rS   extendupdateset_description	Exceptionr   r   next
parametersrs   typer   empty_cache)!r   r   rW   ro   r   r8   r   r   r   rY   rX   speed_statsasr_result_listnum_samplesr   pbartime_speech_totaltime_escape_totalbeg_idxend_idx
data_batch	key_batchbatchtime1r   results	meta_datatime2r   time_escapedescriptioners   r   r   r   r   M  s   










zAutoModel.inferencec           >   
      s  |    | j}t| j| t }| j|f|| j| jd|}t }|ddrFtt	|D ]}t
|| d |ddd || d< q0| j}	t|| tt|dd	d d
}
t|ddd }|
|d< t|||dd d\}}g }d}t }|ddstdt	|ddnd }tt	|D ]}|| d }|| d  || }t|d dr|d jnd}t|||ddd}t	|}t	 } fddt|D }t|dd d}g }t	|s||dg d  td!| qt	|d"krt	|d" d"krt|
|d" d" d
 |d" d" d"  }
|d# d$kr d"}
d"}t }|d }||7 }g }d"}d
} ttd"|D ]\}!}"||! d" d
 ||! d" d"  }#t||#|!d
 |  }$|!|d
 k rv|#|k rv|$|
k rvt||#}| d
7 } q;t|||||  \}%}&| j|%fd |	|d|}'| jd urtt	|%D ]N}(|||  |( d" d" d% |||  |( d" d
 d% t|%|( gg})t|)}*||* d&d |*D }+| j|+fd | j|d|},|,d" d' |'|( d'< q| }| d
7 } |#}t	|'d
k rq;||' q;t	||kr||dg d  td(| qd"g| }-t|D ]}!||! d
 }.||! |-|.< q$i }/t|D ]}!|-|!  D ]\}0}1|0 d)r|0|/vrUg |/|0< |-|! |0 D ]}2|2d"   |! d" 7  < |2d
   |! d" 7  < q[|/|0 |-|! |0  qB|0d'kr|0|/vr|-|! |0 |/|0< qBt!j"|/|0 |-|! |0 gd"d*|/|0< qBd+|0v r|0|/vr|-|! |0 |/|0< qB|/|0  d,|-|! |0  7  < qB|0|/vr|-|! |0 |/|0< qB|/|0  |-|! |0 7  < qBq:t	|/d+ # sq|d-d}3d }4| j$d ur2t| j%| | j|/d+ f| j$| j%d.|}5t&&|/d+ }4|3r*|4|/d/< |5d" d+ |/d+< | jd ur|d0dr|4d u rIt'd1 t|d2d d}|/d' }6| j(|6) |d3d d4}7t*|d |7|6) }8| j+d5krg }9t,|- D ]!\}:};d)|:vrt'd6 |9|;d" |;d
 |:d+ |:d) d7 qxn4| j+d8krd)|/vrt'd9 |d:drt-|5d" d; |/d) |4|3d<}9nt.|5d" d; |/d) |4|3d<}9t/|9|8 |9|/d=< n;|d>drt	|/d+ # sg }9n$|d:drt-|5d" d; |/d) |4|3d<}9nt.|5d" d; |/d) |4|3d<}9|9|/d=< d'|/v r|/d'= ||/d< ||/ t }<|<| }=|rH|0d
 |1d?|=| d@dA|dBdC|=d@ q|S )DN)rW   ro   r   r   Fr   merge_length_s   i  batch_size_si,  r   batch_size_threshold_s<   r   r;   )rW   r;   gư>r   redTr   r8   r   fsi>  )r	  audio_fsc                    s   g | ]} | |fqS r   r   r0   r   vadsegmentsr   r   
<listcomp>      z0AutoModel.inference_with_vad.<locals>.<listcomp>c                 S   s   | d d | d d  S )Nr   r   r   xr   r   r   <lambda>  r  z.AutoModel.inference_with_vad.<locals>.<lambda>)r8   r)   )r8   r   	timestampzdecoding, utt: {}, empty speechr   rs   r   g     @@c                 S   s   g | ]}|d  qS )   r   r  r   r   r   r  	  s    spk_embeddingzdecoding, utt: {}, empty resultr  )dimr   r%   return_raw_text)ro   r   raw_textreturn_spk_resz3Missing punc_model, which is required by spk_model.c                 S   s   | d S )Nr   r   r  r   r   r   r  T  s    preset_spk_num)
oracle_numr~   aC  Only 'iic/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch'                                            and 'iic/speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch'                                           can predict timestamp, and speaker diarization relies on timestamps.)startendsentencer  r|   a;  Only 'iic/speech_paraformer-large-vad-punc_asr_nat-zh-cn-16k-common-vocab8404-pytorch'                                        and 'iic/speech_seaco_paraformer_large_asr_nat-zh-cn-16k-common-vocab8404-pytorch'                                       can predict timestamp, and speaker diarization relies on timestamps.en_post_proc
punc_array)r  sentence_infosentence_timestampr   r   z, time_speech: z 0.3fz, time_escape: )2r   r   r   rn   r   r   rm   r   rI   rO   r   ro   r   r   rU   r   r   r	  r   sortedrP   r   r   r   r   r   rw   nparrayr!   r   itemsrA   r   catrM   rt   ru   copyr   r   r   r"   r{   rT   r
   r	   r#   r   r   )>r   r   rW   r   r   beg_vadr   end_vadr   ro   r   batch_size_threshold_msrY   rX   results_ret_listtime_speech_total_all_samples	beg_total
pbar_totalr8   input_ir	  speechspeech_lengthsndata_with_indexsorted_dataresults_sortedr   beg_asr_totaltime_speech_total_per_sampleall_segmentsmax_len_in_batchr   jr1   sample_lengthpotential_batch_lengthspeech_jspeech_lengths_jr   _bvad_segmentssegmentsspeech_bspk_resrestored_dataindexresultkvtr  r  punc_resr  labels	sv_outputsentence_listrest
vadsegmentend_asr_totaltime_escape_total_per_sampler   r  r   r     s  



 & 



	



&

 













zAutoModel.inference_with_vadc           
      K   s   | dd}| jj|d}| j}t|| ||d< |d= |  | dd}t|d| dddd	\}}t  t	j
d||d
|}	W d   |	S 1 sOw   Y  |	S )z

        :param input:
        :param type:
        :param quantize:
        :param fallback_num:
        :param calib_num:
        :param opset_version:
        :param cfg:
        :return:
        rs   r   )rs   ro   r   onnxNr;   r   )ro   r:   r   )r   ro   r   r   r   r   rU   r   r   r   export)
r   r   r   rs   ro   r   r   rY   rX   
export_dirr   r   r   rT    s"   



zAutoModel.exportc                 C   s\   i }t | D ]}|dsqt| |d}t|tr t|||< qt| j|d< || _dS )zHSnapshot base kwargs for all submodules to allow reset before inference.r   N)	dirrJ   r   r?   dictr(  deepcopyr   _base_kwargs_map)r   baselinenamer   r   r   r   r     s   


zAutoModel._store_base_configsc                 C   s   t | dd}|s
dS | D ]\}}t|}t| || qt| jd}|| jd< | D ]\}}|dkr6q-t | |d}t|trG|	d| q-t
 |krUt
| dS dS )zBEnsure runtime kwargs reset to baseline defaults before inference.rY  Nr   r   r   )r   r&  r(  rX  setattrr    r   r?   rW  r   r   r   r   )r   base_mapr[  baserestoredr   r   r   r   r   r   r     s$   


z AutoModel._reset_runtime_configs)NN)NNNNNr+   )__name__
__module____qualname__r   staticmethodr   r   r   r   r   rT  r   r   r   r   r   r   rf   y   s$    B
}


V  
 rf   )r   )NNN)0rK   r   r(  r   r-   r<   r   os.pathrB   numpyr$  r   	omegaconfr   r   funasr.utils.miscr   funasr.registerr   funasr.utils.load_utilsr   funasr.download.filer   funasr.utils.timestamp_toolsr	   r
   'funasr.download.download_model_from_hubr   funasr.utils.vad_utilsr   r   r   &funasr.train_utils.set_all_random_seedr   (funasr.train_utils.load_pretrained_modelr   funasr.utilsr   r   r    funasr.models.campplus.utilsr!   r"   r#   &funasr.models.campplus.cluster_backendr$   rU   rf   r   r   r   r   <module>   sB   
	
G