o
    i                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlmZmZ d dlZ	d dl
Z
d dlmZ d dlmZ 	ddefddZdded	efd
dZejddddefddZ	 edkr[e  dS dS )    N)
DictConfig	OmegaConf)tqdmsourcetargetjsonl_file_outc              
      s"  z
t  }t  }W n   d}d}Y t pd}td|  |dkri }t|| D ]}\}i |< t|dh}	|	  t	 d | d t	 |krP|nd}
|
dkrt
jj|d( fddt|
D }t
j|D ]}| |  qrW d    n1 sw   Y  nt }| | W d    n1 sw   Y  q,t|d=}	||d   D ],}d	|i}|D ]}||| v r||| |  qtj|d
d}|	|d  |	  qW d    n1 sw   Y  tdt	||d   d n	 |dkrt   d S d S )Nr      z%convert wav.scp text to jsonl, ncpu: r)max_workersc              	      s0   g | ]} t | |d    |qS )r	   )submitparse_context_length).0idata_file_lists	data_typeexecutorlines_for_each_th \/home/ubuntu/.local/lib/python3.10/site-packages/funasr/datasets/audio_datasets/scp2jsonl.py
<listcomp>&   s    	z0gen_jsonl_from_wav_text_list.<locals>.<listcomp>wkeyF)ensure_ascii
z
processed z samples)distget_rankget_world_sizeos	cpu_countprintzipopen	readlineslen
concurrentfuturesThreadPoolExecutorrangeas_completedupdateresultr   keysjsondumpswriteflushbarrier)pathdata_type_listr   kwargsrank
world_size	cpu_cores	json_dict	data_fileftask_numr'   futureresr   
jsonl_liner   r   r   gen_jsonl_from_wav_text_list   s`   
	

	
r@   	data_listr   c                 C   s  t t| dd}i }t| D ]p\}}|d |d|  | jdd}|d }t|dkr5|d nd}| }|dkretj	|r]t
j|d	d
\}	}
t|	}t|d d	 d }ntd| qd|v rot| nt|}||| d|i||< q|S )NT)totaldynamic_ncolsr	   zcpu: )maxsplitr    r   i>  )sri  
   zsource file not found: {} _len)r   r%   	enumerater+   set_descriptionstripsplitr   r3   existslibrosaloadintr!   format)rA   r   idpbarr>   r   linelinesr   waveform_
sample_numcontext_lenr   r   r   r   K   s&   
r   )config_nameversion_basecfgc                 C   s^   t j| dd}t| |dd}t|trt|}|dd}|dd}t|||d	 d S )
NT)resolvescp_file_list)z)/Users/zhifu/funasr1.0/test_local/wav.scpz*/Users/zhifu/funasr1.0/test_local/text.txtr4   r   r   z6/Users/zhifu/funasr1.0/test_local/audio_datasets.jsonl)r4   r   )r   to_containerr!   get
isinstancestrevalr@   )r]   r5   r_   r4   r   r   r   r   
main_hydrac   s   

re   __main__)r   N)r   )r   r.   torchlogginghydra	omegaconfr   r   concurrent.futuresr&   rO   torch.distributeddistributedr   r   rc   r@   listr   mainre   __name__r   r   r   r   <module>   s*    
>
