o
    'ki39                     @   sj  d Z ddlZddlZddlZddlZddlZddlZddlmZ ddl	m
Z
mZ ddlZddlZddlZddlZedZedZed Zed Zed	 Zed
 Zed ZdZdZdZdZdZdZdZdd Z dd Z!dd Z"dd Z#dedefddZ$dedefdd Z%da&da'd!d" Z(d#ed$efd%d&Z)d'd( Z*ded)e+fd*d+Z,d,d- Z-d.d/ Z.e/d0kre.  dS dS )1uG  
Modi Voice Data Pipeline for VibeVoice Finetuning
==================================================
Demucs (music removal) → Resample 24kHz mono → Silence removal → Silero VAD → Smart segmentation (5-30s)

Fully resumable: tracks progress per file in progress.json.
Output: modi_processed/segments/ with manifest.csv
    N)Path)ProcessPoolExecutoras_completedz/home/ubuntu/modiz/home/ubuntu/modi_processedsegmentsdemucs_vocalsresampled_24kzprogress.jsonzmanifest.csvi]  g      @g      >@       @g333333?      c                   C   s   t  rtt  S i S )N)PROGRESS_FILEexistsjsonloads	read_text r   r   '/home/ubuntu/modi_processed/pipeline.pyload_progress-   s   r   c                 C   s   t tj| dd d S )Nr	   )indent)r   
write_textr   dumpsprogressr   r   r   save_progress2   s   r   c                 C   s   |  |i  ddS )Nstagependingget)r   namer   r   r   
file_stage5   s   r   c                 K   sH   || vri | |< || | d< t d| | d< | | | t|  d S )Nr   z%Y-%m-%d %H:%M:%Supdated)timestrftimeupdater   )r   r   r   extrar   r   r   
mark_stage8   s   r$   src_wavout_wavc                 C   s  |  r| jdkrdS |jd| j  }|jddd zztjddddd	d
ddt|t| g
dddd}|j	dkrTt
d|jdd   W W tjt|dd dS ddl}|t|d d d }|st|jt|d d dd}|st
d|  W W tjt|dd dS t|d t| W W tjt|dd dS  tjy   t
d Y W tjt|dd dS  ty } zt
d|  W Y d}~W tjt|dd dS d}~ww tjt|dd w )zRun htdemucs to extract vocals.  T_tmp_parentsexist_okpython3z-mdemucsz--two-stemsvocalsz-nhtdemucsz-o  capture_outputtexttimeoutr   z    Demucs error: iN)ignore_errorsF*z
vocals.wavz**)	recursivez    No vocals.wav found in z    Demucs timeoutz    Demucs exception: )r   statst_sizeparentstemmkdir
subprocessrunstr
returncodeprintstderrshutilrmtreeglobmoveTimeoutExpired	Exception)r%   r&   tmp_dirresultrE   r.   er   r   r   
run_demucsB   sP   
rL   c                 C   s   |  r| jdkrdS z tjdddt| dttddd	d
t|gdddd}|jdkW S  tyG } zt	d|  W Y d}~dS d}~ww )z$Resample to 24kHz mono using ffmpeg.r'   Tffmpegz-yz-iz-arz-ac1z-acodec	pcm_s16le,  r1   r   z    Resample error: NF)
r   r8   r9   r=   r>   r?   	TARGET_SRr@   rH   rA   )r%   r&   rJ   rK   r   r   r   resample_24kl   s"   rR   c                  C   s.   t d u rtjjdddd\} }| a |at tfS )Nzsnakers4/silero-vad
silero_vadT)repo_or_dirmodel
trust_repo)
_vad_modeltorchhubload
_vad_utils)rU   utilsr   r   r   get_vad   s   
r]   wav_pathout_dirc           6   
   C   s  |j ddd t \}}|d }tt| \}}|jd dkr'|jddd}|d}|dkr9tj	||d}n|}|||ddd	d
ddd}|sKg S t
d }	g }
|D ]}t|d |	 }t|d |	 }|
||f qStjt| dd\}}t|}|
d g}ttt
 }|
dd D ]\}}|d \}}|| |k r||f|d< q|||f qttt
 }g }|D ]\}}|| t
 }|tk r|r||d d  t
 tk r|d }||d  t
 }|tkr|d |f|d< q|dkr|||f q|tkr|||f q|}||k r|| t
 }|tkr|||f nf|ttt
  }|ttd t
  }t|tdt
  |} |}!|| k rv| |krvtdt
 }"td}#t|| |"D ]}$t|$|" |}%t||$|% d }&|&|#k rt|&}#|$}!qW|||!f |!}||k sqg }'|D ]m}(|'r|'d }|d |d  t
 })|(d |(d  t
 }*|(d |d  t
 }+|(d |d  t
 },|*tk r|,tkr|+dk r|d |(d f|'d< q|)tk r|,tkr|+dk r|d |(d f|'d< q|'|( qdd |'D }'g }-t|'D ]_\}.\}}td|| }/t||| }0||/|0 }1tt|1}2|2dkr1|1d|2  }1d|.dd}3||3 }4tt|4|1t
 t|1t
 }5|-|3t|5dt|/t
 dt|0t
 dd  q|-S )!zFRun Silero VAD, merge nearby speech chunks, split into 5-30s segments.Tr)   r      )dimkeepdimi>  gffffff?rP      d   F)	thresholdmin_speech_duration_msmin_silence_duration_msspeech_pad_msreturn_secondssampling_rateg     @@startendfloat32dtypeNg      ?g      @g?infr	   c                 S   s(   g | ]}|d  |d  t  dkr|qS )r`   r   r   )rQ   .0sr   r   r   
<listcomp>	  s   ( zvad_segment.<locals>.<listcomp>g{Gz?g?seg_04d.wav)filenameduration	start_secend_sec)r<   r]   
torchaudiorZ   r?   shapemeansqueeze
functionalresamplerQ   intappendsfreadlenMERGE_THRESHOLD_SECSILENCE_PAD_SECMIN_SEGMENT_SECMAX_SEGMENT_SECminfloatrangenp	enumeratemaxabswriteround)6r^   r_   rU   r\   get_speech_timestampsaudiosr	audio_16kspeech_timestampsratiochunkstsrk   rl   	audio_24k_total_samplesmerged	merge_gap
prev_startprev_endpadr   chunk_start	chunk_end	chunk_durprevnew_durpos	remaining
target_endsearch_start
search_end
best_splitwindow
min_energyspend_spenergyfinal_segmentssegprev_durthis_durgapcombined_durwrittenirt   rK   segment_audiopeakseg_nameseg_pathrz   r   r   r   vad_segment   s   









r   c                  C   s<   g } t tdD ]}d|jv sd|jv rq	| | q	| S )z@Find all raw Modi WAVs (exclude metadata, demucs outputs, etc.).z*.wav_demucs_16k)sortedSRC_DIRrE   r   r   )filesfr   r   r   get_source_files'  s   r   r   c              
   C   s  | j }t||}t| d }t| d }t| }|dv rrtd| d t }t| |}t | }	|sKt||d td| d|	dd d	S |	 j
d
 }
t||dt|	t|
dd td| d|
dd|	dd d}|dkrtd| d t||}|st||d td| d d	S t||d td| d|	 j
d
 dd d}|dkrtd| d t }t||}t | }	tdd |D }t||dt|t|dt|	d td| dt| d|dd|	dd	 |S |dkr	td| d  d	S d	S )!z4Process a single Modi WAV through the full pipeline.rx   )r   demucs_failed  [z] Running Demucs...r   z] Demucs FAILED (.0fzs)Ni   demucs_doner`   )demucs_time	demucs_mbz] Demucs done: .1fzMB in rt   z] Resampling to 24kHz mono...resample_failedz] Resample FAILED	resampledz] Resampled: MBz] Running VAD + segmentation...c                 s       | ]}|d  V  qdS rz   Nr   rr   r   r   r   	<genexpr>Z      z#process_one_file.<locals>.<genexpr>	segmented)num_segmentstotal_durationvad_time]  segments, z	s total (z] Already segmented, skipping)r;   r   
DEMUCS_DIRRESAMPLED_DIRSEGMENTS_DIRrA   r    rL   r$   r8   r9   r   rR   r   sumr   )r%   r   r   r   
demucs_wavresampled_wavseg_dirt0okelapsedsize_mbseg_info	total_durr   r   r   process_one_file0  sZ   

 
"
*
r   c              
   C   s>  g }t |  D ]L\}}|ddkrqt| }| sqt |dD ]/}ztjt|dd\}}t	|| }W n t
yD   d}Y nw ||t|t|dd q$qttd	d
d}	tj|	g dd}
|
  |
| W d   n1 syw   Y  tdd |D }tdt	| d|dd|d ddt  |S )z,Build manifest.csv from all segmented files.r   r   z	seg_*.wavrm   rn   r   r	   )sourcery   rz   w )newline)
fieldnamesNc                 s   r   r   r   )rs   rr   r   r   r     r   z!build_manifest.<locals>.<genexpr>z
Manifest: r   r   zs (r0   r   zhrs) -> )r   itemsr   r   r   rE   r   r   r?   r   rH   r   r   openMANIFEST_FILEcsv
DictWriterwriteheader	writerowsr   rA   )r   rowsr   infor   wavr   r   durr   writerr   r   r   r   build_manifestj  s8   
,r   c                     s  t ttfD ]	} | jddd qt  t }tdd  td tdd  tdt dt| d td	t	  t
 fd
d|D }td| dt| d tdd  t }t|dD ]Q\}}|j}t |}|dkrqq`td| dt| d| d| d	 zt|  W q` ty } ztd| d|  t |dt|d W Y d }~q`d }~ww tdd  td t }	t | }
t
dd   D }t
dd   D }td| d| d |
d d!d" d S )#NTr)   =<   zModi Voice PipelinezSource: z (z files)zOutput: c                 3   s$    | ]}t  |jd krdV  qdS )r   r`   N)r   r;   )rs   r   r   r   r   r     s   " zmain.<locals>.<genexpr>z
Progress: /z already doner`   r   z
[r   z	 (stage: )r   z] EXCEPTION: error)r  
z<============================================================zBuilding manifest...c                 s   s"    | ]}| d dkrdV  qdS )r   r   r`   Nr   rs   vr   r   r   r     s     c                 s   s2    | ]}d | ddv s| ddkrdV  qdS )failr   r   r  r`   Nr   r  r   r   r   r     s   0 z
Pipeline complete: z done, z	 failed, r   z	min total)r   r   r   r<   r   r   rA   r   r   OUT_DIRr   r    r   r;   r   r   rH   r$   r?   r   values)dsource_filesalready_donet_startr   srcr   r   rK   r   r   
done_countfailed_countr   r   r   main  sB   
& &r  __main__)0__doc__r   osrC   r=   r    r   pathlibr   concurrent.futuresr   r   numpyr   	soundfiler   rX   r}   r   r  r   r   r   r   r   rQ   r   r   r   r   DEMUCS_WORKERS	VAD_BATCHr   r   r   r$   rL   rR   rW   r[   r]   r   r   dictr   r   r  __name__r   r   r   r   <module>   sZ   	
* 	:,
