o
    i*                     @   sr  d Z ddlZddlZddlZddlZddlZddlZddlZddl	m
Z
 eje  ddlZddlZddlZddlmZ ddlmZ ddlZddlZddlmZ ddlmZ ddlmZ ed	d
ZdZe de! d Z"dZ#dZ$da%dd Z&e
dd Z'dd Z(efddZ)d)ddZ*d*ddZ+dd Z,dd Z-d+d e.d!e/fd"d#Z0d$d% Z1d&d' Z2e3d(kre2  dS dS ),a  
Usage:
    python prepare_csv_wavs.py /path/to/metadata.csv /output/dataset/path [--pretrain] [--workers N]

CSV format (header required, "|" delimiter):
    audio_file|text
    /path/to/wavs/audio_0001.wav|Yo! Hello? Hello?
    /path/to/wavs/audio_0002.wav|Hi, how are you doing today? I want to go shopping and buy me some lemons.

Notes:
    - audio_file must be an absolute path.
    N)contextmanager)files)Path)ArrowWriter)tqdm)convert_char_to_pinyinf5_ttsz(../../data/Emilia_ZH_EN_pinyin/vocab.txtd      AudioProcessorc                 C   s"   t |  }| o|j dkS )Nz.csv)r   
expanduseris_filesuffixlower)
input_pathfpath r   Z/home/ubuntu/.local/lib/python3.10/site-packages/f5_tts/train/datasets/prepare_csv_wavs.pyis_csv_wavs_format2   s   r   c               	   c   sd    dd } t  t j|  t  t j|  zdV  W tdur$tjdd dS dS tdur1tjdd w w )z0Context manager for graceful shutdown on signalsc                 S   s4   t d td urt d tjddd td d S )Nz-
Received signal to terminate. Cleaning up...zShutting down executor...FTwaitcancel_futuresr
   )printexecutorshutdownsysexit)signumframer   r   r   signal_handler;   s
   z%graceful_exit.<locals>.signal_handlerNF)r   )signalSIGINTSIGTERMr   r   )r   r   r   r   graceful_exit7   s   r#   c              
   C   s   t |  std|  d dS zt| }|dkr!td| d| ||fW S  tyD } ztd|  d| d	 W Y d}~dS d}~ww )
zNProcess a single audio file by checking its existence and extracting duration.zaudio z not found, skippingNr   z	Duration z is non-positive.zWarning: Failed to process z due to error: z. Skipping corrupt file.)r   existsr   get_audio_duration
ValueError	Exception)
audio_pathtext	polyphoneaudio_durationer   r   r   process_audio_fileM   s   r-   c                 C   s^   g }t tdt| |t| | d | ddD ]}| |||  }t||d}|| q|S )z-Convert a list of texts to pinyin in batches.r   r
   zConverting texts to pinyintotaldescr*   )r   rangelenr   extend)textsr*   
batch_sizeconverted_textsibatchconverted_batchr   r   r   batch_convert_texts\   s   
r;   c                    s&  t | std|  tt|   }d t|}|dkr#td|d ur)|ntt	|}t
d| d| d t  tjj|tdn}|ag }tdt|tD ]W}|||t  } fd	d
|D }	t|	t|d|t d  d|t d t  dD ]*}
z|
 }|d ur|| W q~ ty } zt
d|  W Y d }~q~d }~ww qRd aW d    n1 sw   Y  W d    n1 sw   Y  dd
 |D }|stddd
 |D }t| td}g }g }t }t||D ]\\}}}}||||d || |t| q|||fS )Nzinput must be a .csv file: Tr   zNo valid rows found in CSV.z
Processing z audio files using z workers...)max_workersthread_name_prefixc                    s$   g | ]}t t|d  |d  qS )r   r
   )r   submitr-   ).0pairr1   r   r   
<listcomp>   s   $ z(prepare_csv_wavs_dir.<locals>.<listcomp>zProcessing chunk r
   /r.   zError processing file: c                 S   s   g | ]}|d ur|qS Nr   )r?   resr   r   r   rA      s    z$No valid audio files were processed!c                 S   s   g | ]}|d  qS )r
   r   )r?   itemr   r   r   rA      s    )r6   )r(   r)   duration)r   r&   read_audio_text_pairsr   r   as_posixr3   RuntimeErrorminMAX_WORKERSr   r#   
concurrentfuturesThreadPoolExecutorTHREAD_NAME_PREFIXr   r2   
CHUNK_SIZEr   resultappendr'   r;   
BATCH_SIZEsetzipupdatelist)r   num_workersaudio_path_text_pairstotal_filesworker_countexecresultsr8   chunkchunk_futuresfuturerQ   r,   	processed	raw_textsr7   
sub_result	durations	vocab_setr(   _rF   	conv_textr   r1   r   prepare_csv_wavs_dirj   sf   "



rh      c              
   C   s6  zt | jW S  ty$ } ztd|  d| d W Y d}~nd}~ww z'ddddd	d
d| g}tj|tjtjdd|d}|j	 }|rIt
|W S td tjtjtfyn } ztd|  d| d W Y d}~nd}~ww zt| }|jdkr|j|j W S td ty } z
td|  d| d}~ww )z<Get the duration of an audio file in seconds with fallbacks.zWarning: soundfile failed for z with error: z. Falling back to ffprobe.Nffprobez-verrorz-show_entrieszformat=durationz-ofz"default=noprint_wrappers=1:nokey=1T)stdoutstderrr)   checktimeoutz#Empty duration string from ffprobe.zWarning: ffprobe failed for z". Falling back to torchaudio.info.r   z)Invalid sample_rate from torchaudio.info.zfailed to get duration for z: )sfinforF   r'   r   
subprocessrunPIPErl   stripfloatr&   TimeoutExpiredSubprocessError
torchaudiosample_rate
num_framesrI   )r(   ro   r,   cmdrQ   duration_strrq   r   r   r   r%      sH   "


"

r%   c                 C   s0  g }t |   }t| ddddz}tj|dd}t|d }|d u r.|W  d    S t|dk sD|d 	 d	ksD|d
 	 dkrHt
dt|ddD ]7\}}t|dk rYqN|d 	 }|d
 	 }	|shqNt | }
|
 s|t
d| d| ||
 |	f qNW d    |S 1 sw   Y  |S )Nr z	utf-8-sig)modenewlineencoding|)	delimiter   r   
audio_filer
   r)   z#CSV header must be: audio_file|text)startz)audio_file must be an absolute path (row z): )r   r   absoluteopenrH   csvreadernextr3   ru   r&   	enumerateis_absoluterR   )csv_file_pathaudio_text_pairscsv_pathcsvfiler   headerrow_idxrowr   r)   r(   r   r   r   rG      s4   
,
rG   c                 C   s  t | } | jddd td|  d | d }t| d}t|ddD ]}|| q%|  W d    n1 s;w   Y  | d	 }t| d
dd}	t	j
d|i|	dd W d    n1 sbw   Y  | d }
|rxt }t||
 n%t|
 d
}	t|D ]	}|	|d  qW d    n1 sw   Y  | j}td| dt|  td| dt|  td| dt|d dd d S )NT)exist_okparentsz
Saving to z ...z	raw.arrow)pathzWriting to raw.arrow ...)r0   zduration.jsonwzutf-8)r   rF   F)ensure_asciiz	vocab.txt
z
For z, sample count: zFor z, vocab size is: z, total i  z.2fz hours)r   mkdirr   r   rH   r   writefinalizer   jsondumpPRETRAINED_VOCAB_PATHshutilcopy2sortedstemr3   sum)out_dirrQ   duration_listtext_vocab_setis_finetuneraw_arrow_pathwriterlinedur_json_pathfvoca_out_pathfile_vocab_finetunevocabdataset_namer   r   r   save_prepped_dataset   s4   
$r   Tr   rX   c                 C   s@   |rt  sJ dt  t| |d\}}}t||||| d S )Nz pretrained vocab.txt not found: )rX   )r   r$   rh   r   )inp_dirr   r   rX   rc   rd   re   r   r   r   prepare_and_save_set  s   r   c                  C   s\   t jdd} | jdtdd | jdtdd | jdd	d
d | jdtdt dd |  S )NzPrepare and save dataset.)descriptionr   z?Input CSV with header 'audio_file|text' and absolute wav paths.)typehelpr   z+Output directory to save the prepared data.z
--pretrain
store_truez1Enable for new pretrain, otherwise is a fine-tune)actionr   z	--workersz#Number of worker threads (default: ))argparseArgumentParseradd_argumentstrintrK   
parse_args)parserr   r   r   get_args  s   r   c                  C   sd   zt  } t| j| j| j | jd W d S  ty1   td td ur)tj	ddd t
d Y d S w )N)r   rX   z,
Operation cancelled by user. Cleaning up...FTr   r
   )r   r   r   r   pretrainworkersKeyboardInterruptr   r   r   r   r   )argsr   r   r   cli  s    r   __main__rC   )ri   )TN)4__doc__concurrent.futuresrL   multiprocessingosr   r    rr   r   
contextlibr   r   rR   getcwdr   r   r   importlib.resourcesr   pathlibr   	soundfilerp   ry   datasets.arrow_writerr   r   f5_tts.model.utilsr   joinpathr   rS   max	cpu_countrK   rO   rP   r   r   r#   r-   r;   rh   r%   rG   r   boolr   r   r   r   __name__r   r   r   r   <module>   sR    


B% 
