o
    i                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlmZmZ d dlZ	d dl
Z
d dlmZ d dlZd dlmZ d dlmZ dd Zdd Zdd
dZejddddefddZ	 edkr_e  dS dS )    N)
DictConfig	OmegaConf)tqdm)ThreadPoolExecutorc           
   	      s   t |d}t | ddb}| t}|dkrEt|d  fddtt|D }tj|D ]}q2W d    n1 s?w   Y  nt|D ]}t	| qIt
d D ]}	||	d	  |  qXW d    n1 spw   Y  |  d S )
Nwzutf-8)encoding   )max_workersc                    s   h | ]	}  t|qS  )submitupdate_data).0iexecutorlinesr
   _/home/ubuntu/.local/lib/python3.10/site-packages/funasr/datasets/audio_datasets/update_jsonl.py	<setcomp>   s    z%gen_scp_from_jsonl.<locals>.<setcomp>z(All audio durations have been processed.
)open	readlineslenr   r   range
concurrentfuturesas_completedr   logginginfowriteflushclose)

jsonl_filejsonl_file_outncpujsonl_file_out_ffin	num_totalr   futurer   liner
   r   r   gen_scp_from_jsonl   s*   
	

r)   c                 C   s   | | }t | }|d dd}tj|rItj|dd\}}t	|}t
|d d d }|d }	||d< ||d< t j|d	d
}
|
| |< d S d S )Nsourcez/cpfs01z/cpfs_speech/datai>  )sri  
   
source_lenF)ensure_ascii)jsonloadsstripreplaceospathexistslibrosaloadr   intdumps)r   r   r(   datawav_pathwaveform_
sample_numr-   source_len_old
jsonl_liner
   r
   r   r   ,   s   r   r   c           	      C   s   t j|dd t| d?}| }t|D ]-\}}t j| }t j||}t	
| dt| d| d|  t| || qW d    d S 1 sNw   Y  d S )NT)exist_okr/z	, jsonl: z, )r3   makedirsr   r   	enumerater4   basenamer1   joinr   r   r   r)   )	jsonl_file_list_injsonl_file_out_dirr#   fdata_file_listsr   jsonlfilename_with_extensionr"   r
   r
   r   update_wav_len>   s   $"rN   )config_nameversion_basecfgc                 C   sL   t j| dd}t| |dd}|dd}|dd}t||| d S )	NT)resolverH   z0/Users/zhifu/funasr1.0/data/list/data_jsonl.listrI   z/Users/zhifu/funasr1.0/data_tmpr#   r   )r   to_containerr   r   getrN   )rQ   kwargsrH   rI   r#   r
   r
   r   
main_hydraL   s   
rV   __main__)r   )r3   r/   torchr   hydra	omegaconfr   r   concurrent.futuresr   r6   torch.distributeddistributeddist	threadingr   r   r)   r   rN   mainrV   __name__r
   r
   r
   r   <module>   s*    

