o
    i|                    @   sV  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlZd dlZd dlmZ d dlmZ d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlmZ d dlmZ d dlmZ d dl m!Z!m"Z" d dl#m$Z$ d d	l%m&Z& d d
l'm(Z( d dl)m*Z* da+e, Z,e
j-pdZ.da/da0da1da2e3ed4dZ5e3ed4dZ6e3ed4dZ7ej89 rdnej:9 rdn	ej;j<9 rdndZ=dd Z>dd Z?dd Z@G dd dZAd
dd ZBd!d" ZCd#d$ ZDd%d& ZEd'd( ZFd)d* ZGd+eH fd,d-ZId.d/ ZJ	0	1dd2d3ZKeH fd4d5ZLd6d7 ZMd8d9 ZNd:e3d;e3d<eOd=eOd>e3f
d?d@ZPddBdCZQdDdE ZRdFdG ZSdHdI ZTdJdK ZUdLdM ZVdNdO ZWdPdQ ZXdRdS ZYd
dTdUZZd
dVdWZ[dXdY Z\dZd[ Z]d\d] Z^d^d_ Z_e` \aaebd` ec # eF \ZdZeejfdag dbdcddZgejhdedfdgZiejdhZkW d   n	1 sw   Y  ec  ejledeediddjdkZmejjdldmdnZnW d   n	1 sw   Y  ekjeGeieggemgdo eo  epdp ebdq ejqdrd+dgZrejbdsd+dtZsejtdudvdwdxZuejhdydzdgZvejd{ ZwZkejhd|ddgZxewjeIemeuevergexgdo erjyeMergeuesgdo ejd}Zzec  ejhd~dZ{ej|ddvdZ}W d   n	1 s]w   Y  ezjeVemge{e}gdo W d   n	1 sxw   Y  epd| ebd ejdZ~ejhd|ddgZebd ejfdg ddddZec  ejhddddjdZejhdddmdZW d   n	1 sw   Y  ejdZejhd|ddgZejyeRegegd e~jeTemeggeegdo ejeSemeegegdo W d   n	1 sw   Y  epdm ebd ebd ejqdd+d+dZejd ZZkejhd|ddgZejhdddgZejeLemegeegdo ejd}Zec  ejhddZej|ddvdZW d   n	1 sbw   Y  ejeUemgeegdo W d   n	1 s}w   Y  epd ebd ec  ejfdg ddZejhddZejhddZW d   n	1 sw   Y  ec  ejqdd ZZkejddZejd ZZkW d   n	1 sw   Y  ec ! ejddZejdddZejddZejddZW d   n	1 sw   Y  ec ' ejfdddgddZejdddZejdddZejdddZW d   n	1 s?w   Y  ec ( ejddddZejddmd dddZejddddZejfdd W d   n	1 stw   Y  ec A ejqddZejfdg ddZejfdg ddZe  ejdZejjdd+dÍZW d   n	1 sw   Y  W d   n	1 sw   Y  eedure?ee\ZZZZZZZZZZZZZZZZZZZee_ee_ee_ee_ee_ee_ee_ee_ee_ee_ee_ee_ee_ee_eeg_ee_ee_ee_ee_ejqdddgZejhd|ddgZe[eed+\ZZeZeZedurBed7 Zed7 Zec . ejleedddjddȍZejjdldmdnZeje[emgegdo emjye[emgegdo W d   n	1 sxw   Y  ec  ej|ddvedʍZej|ddvedʍZW d   n	1 sw   Y  ejye_egeegdo ejeDemeeeeeeeeeeeeeeegeeeeegeeegdo ejeEeeegd̍ ejeNemeeeeeeegeeeeeegdo ejyeYegeeeggd dd΄ ZeÃ Zemjye?emgedo enje?emgedo W d   n	1 sw   Y  epdϡ ebdС ejfdg ddddZeZeed+\ZZec ' ejdddgZejdddddd؍ZejddddڍZejqddZW d   n	1 smw   Y  ec  ejqddddލZejleedddZejdlZW d   n	1 sw   Y  ejd}ZejhddZej|ddvdZejhddZejeWemgeeegdo ec  ejhdddZejhddZejdZW d   n	1 sw   Y  ej|ddvdZejeXemeeeeeeeeeegeeegdo ejeZemgegdo emjyeZemgegdo W d   n	1 s$w   Y  epdU ebd ejhddZejhddZec  ejqdddgZejqdddgZW d   n	1 s^w   Y  ejhd|ddgZejdZejePeeeegegdo W d   n	1 sw   Y  epd+ ejhdddZdd ZejdZejeed̍ dd Zejeg edo W d   n	1 sw   Y  W d   n	1 sw   Y  W d   n	1 sw   Y  e ejdddeddejdddddejd dd+dddejdddddddd Zed	kr)e  dS dS (      N)glob)files)cached_path)Dataset)ArrowWriter)	load_file	save_file)wavfile)F5TTS)
transcribe)convert_char_to_pinyinpython f5_ttsz
../../dataz../../ckptsztrain/finetune_cli.pycudaxpumpscpuc                 C   s   t jt| }t j|dd t j|d}i d|d|d|d|d|d	|d
|d|d|	d|
d|d|d|d|d|d|d|||d}t|d}tj||dd W d    dS 1 shw   Y  dS )NTexist_oksetting.jsonexp_namelearning_ratebatch_size_per_gpubatch_size_typemax_samplesgrad_accumulation_stepsmax_grad_normepochsnum_warmup_updatessave_per_updateskeep_last_n_checkpointslast_per_updatesfinetunefile_checkpoint_traintokenizer_typetokenizer_filemixed_precisionloggerbnb_optimizerw   )indentzSettings saved!)ospathjoinpath_project_ckptsmakedirsopenjsondump)project_namer   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r)   ch_8bit_adampath_projectfile_settingsettingsf r<   P/home/ubuntu/.local/lib/python3.10/site-packages/f5_tts/train/finetune_gradio.pysave_settings=   s^   	

r>   c                 C   st  |  dd dd} tjt| }tj|d}i ddddd	d
ddddddddddddddddddddddddd dd!d"d#d$d%}td&krZd#|d!< tj|rt|d'}t	|}W d    n1 suw   Y  |
| |d |d |d	 |d |d |d |d |d |d |d |d |d |d |d |d |d  |d! |d( |d) fS )*N_pinyinr   _charr   r   F5TTS_v1_Baser   h㈵>r   i  r   framer   @   r      r         ?r   d   r   r    i  r!   r"   r#   Tr$   r%   pinyinr&   r'   fp16noneFr(   r   rr)   r*   )replacer.   r/   r0   r1   deviceisfiler3   r4   loadupdate)r6   r8   r9   default_settingsr;   file_settingsr<   r<   r=   load_settingsr   s   	

rT   c                 C   s   t | \}}|jd | S )z-Calculate the duration mono of an audio file.rE   )
torchaudiorP   shape)
audio_pathaudiosample_rater<   r<   r=   get_audio_duration   s   rZ   c                   @   sJ   e Zd Z					ddededed	ed
edefddZdd Zdd ZdS )Slicer      D N  ,       sr	threshold
min_lengthmin_intervalhop_sizemax_sil_keptc                 C   s   ||  kr|kst d t d||kst d|| d }d|d  | _t|| d | _tt|d| j | _t|| d | j | _t|| j | _t|| d | j | _d S )NzQThe following condition must be satisfied: min_length >= min_interval >= hop_sizezCThe following condition must be satisfied: max_sil_kept >= hop_size  
   g      4@r,   )	
ValueErrorrb   roundre   minwin_sizerc   rd   rf   )selfra   rb   rc   rd   re   rf   r<   r<   r=   __init__   s   	zSlicer.__init__c                 C   s^   t |jdkr|d d || j t|jd || j f S ||| j t|jd || j  S )NrE   r   )lenrV   re   rk   )rm   waveformbeginendr<   r<   r=   _apply_slice   s   ,$zSlicer._apply_slicec              
   C   s  t |jdkr|jdd}n|}|jd | jkr|gS tjj|| j| jd	d}g }d }d}t
|D ]\}}|| jk rC|d u rB|}q3|d u rHq3|dkoP|| jk}	|| | jko^|| | jk}
|	sf|
sfd }q3|| | jkr|||d   | }|dkr|d|f n|||f |}n|| | jd kr||| j || j d   }||| j 7 }|||| j d   | }||| j |d   | | j }|dkr|d|f |}nJ|t||t||f t||}n7|||| j d   | }||| j |d   | | j }|dkr|d|f n|||f |}d }q3|jd }|d urZ|| | jkrZt||| j }|||d   | }|||d f t |dkrl|dt|| j ggS g }|d d dkr|| |d|d d dt|d d | j g tt |d D ].}|| ||| d ||d  d t|| d | j t||d  d | j g q|d d |k r|| ||d d |t|d d | j t|| j g |S )NrE   r   )axis)yframe_length
hop_length   rH   )ro   rV   meanrc   librosafeaturermsrl   re   squeeze	enumeraterb   rf   rd   argminappendrk   maxintrs   range)rm   rp   samplesrms_listsil_tagssilence_start
clip_startir|   is_leading_silenceneed_slice_middlepospos_lpos_rtotal_framessilence_endchunksr<   r<   r=   slice   s   
 $$

4 zSlicer.sliceN)r\   r]   r^   r_   r`   )__name__
__module____qualname__r   floatrn   rs   r   r<   r<   r<   r=   r[      s*    
r[   Tc              	   C   s   zt | }W n t jy   Y d S w |jdd}|D ]}z
t|jtj W q t	y0   Y qw |rIzt|jtj W d S  t	yH   Y d S w d S )NT)	recursive)
psutilProcessNoSuchProcesschildrenr.   killpidsignalSIGTERMOSError)r   including_parentparentr   childr<   r<   r=   terminate_process_tree*  s&   r   c                 C   s,   t dkrd|  }t | d S t|  d S )NWindowsztaskkill /t /f /pid )systemr.   r   )r   cmdr<   r<   r=   terminate_process>  s   
r   c           ,      c   s   t d urt d ur
b t  tj  d a tjt	| }tj
|s5d|  tjddtjddfV  d S tj|d}tj|sUd| tjddtjddfV  d S td urfdtjddtjddfS dtjddtjddfV  |d	kr| d
rd}n
| drd}nd}| d
d	dd	} |dkrd| }nd	}d| dt d| d| d| d| d| d| d| d| d|	 d|
 d| d| d|  }|r|d 7 }|d	kr|d!| d"7 }|d	kr|d#| 7 }|d$| 7 }|dkr|d%| 7 }|d&7 }|r	|d'7 }td(| d)  t| |||||||||	|
||||||||| z|sMtj|dd*atd+ d,tjddtjddfV  t  npd-d. }tj }d/|d0< tj|dtjtjdd1|d2ad3tjddtjddfV  t }t }tj|tj |fd4}tj|tj!|fd4}d|_"d|_"|#  |#  da$	 t$rt%  td5 t& d u rt'  d6tjddtjddfV  nt& }zj	 |( } t| d	d7 t)*d8| }!|!r#|!+d1}"|!+d9}#|!+d:}$|!+d;}%|!+d+}&|!+d<}'d=|" d>|# d?|$ d@|% dA|& dB|' }(|(tjddtjddfV  n| , r6| tjddtjddfV  q tj-yB   Y nw z#	 |( })t|)d	d7 |), re|),  tjddtjddfV  qE tj-yq   Y nw |d ur|. r|. r|dCkrdD| dEtjddtjddfV  ndFtjddtjddfV  ntdG qtj /  tj!/  t  td1 td u rdH}*ndI}*W n t0y }+ zdJt1|+ }*W Y d }+~+nd }+~+ww d a|*tjddtjddfV  d S )KNzThere is not project with name TinteractiveF	raw.arrowzThere is no file zTrain run already!zstart trainr   r?   rI   r@   charcustomrK   z--mixed_precision=zaccelerate launch z "z" --exp_name z --learning_rate z --batch_size_per_gpu z --batch_size_type z --max_samples z --grad_accumulation_steps z --max_grad_norm z
 --epochs z --num_warmup_updates z --save_per_updates z --keep_last_n_checkpoints z --last_per_updates z --dataset_name z --finetunez --pretrain ""z --tokenizer_path z --tokenizer z
 --logger z --log_samplesz --bnb_optimizerzrun command : 

)shell   ztrain startc              
   S   s   z;zt | jdD ]}|| qW n ty- } z|dt|  W Y d }~nd }~ww W |   d S W |   d S |   w )Nr   zError reading pipe: )iterreadlineput	Exceptionstrclose)pipeoutput_queuelineer<   r<   r=   stream_output  s    z%start_training.<locals>.stream_output1PYTHONUNBUFFEREDrE   )r   stdoutstderrtextbufsizeenvzTraining started ...)targetargs      ?zTraining stopped by user.)rr   zKEpoch (\d+)/(\d+):\s+(\d+)%\|.*\[(\d+:\d+)<.*?loss=(\d+\.\d+), update=(\d+)rx      r,      zEpoch: /z, Progress: z%, Elapsed Time: z, Loss: z
, Update: r   zProcess crashed with exit code !zTraining complete or paused ...皙?Train stopped !zTrain complete at end !An error occurred: )2tts_apigccollecttorchr   empty_cacher.   r/   r0   	path_dataisdirgrrQ   rO   training_processendswithrM   
file_trainprintr>   
subprocessPopentimesleepwaitenvironcopyPIPEqueueQueue	threadingThreadr   r   daemonstartstop_signal	terminatepollr   
get_nowaitresearchgroupstripEmptyemptyr   r   r   ),dataset_namer   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   streamr)   r7   r8   file_rawrJ   r   r   r   stdout_queuestderr_queuestdout_threadstderr_threadprocess_statusoutputmatchcurrent_epochtotal_epochspercent_completeelapsed_timelosscurrent_updatemessageerror_output	text_infor   r<   r<   r=   start_trainingF  s  


"

	


	








"







D


 r  c                   C   sJ   t d u rdtjddtjddfS tt j dadtjddtjddfS )NzTrain not running !Tr   Fr   )r   r   rQ   r   r   r   r<   r<   r<   r=   stop_trainingH  s
   
r  c                  C   sd   g } t tD ]}t jt|}t j|sq| }|dkr q| | q| s*d n| d }| |fS )Nemilia_zh_en_pinyinrH   )r.   listdirr   r/   r0   r   lowerr   )project_listfolderpath_folderprojects_selelectr<   r<   r=   get_list_projectsS  s   r  c                 C   sV   | d| 7 } t jt jt| dd t jt jt| ddd t \}}tj|| dS )N_Tr   datasetchoicesvalue)r.   r2   r/   r0   r   r  r   rQ   )namer%   r
  r  r<   r<   r=   create_data_projectc  s
   
r  Fc              	      sD  t jt| }t j|d t j|d}t j|d}|s$|d u r$dS t j|r/t| t j|r:t | t j	|dd |rS fddd	D }|g krRd
S n|}d}	d}
t
d}d}d}d}|j|dt|dD ]}tj|ddd\}}||}|j|t|ddD ]d\}}}t jd| }t j|| d}t| }|dkr|| }|| |
|	  d|	 |  }t|d|d tj zt||}| }|| d| d7 }|d7 }W q   |d7 }Y qqmt|ddd}|| W d    n	1 sw   Y  |g krd| }nd}d| d | | S )!Nr  wavsmetadata.csvzYou need to load an audio file.Tr   c                    s(   g | ]}t tj |D ]}|qqS r<   )r   r.   r/   r0   ).0formatfilepath_datasetr<   r=   
<listcomp>~  s    z"transcribe_all.<locals>.<listcomp>)*.wavz*.oggz*.opusz*.mp3z*.flacz'No audio file was found in the dataset.r   rF   ]  r   r   ztranscribe files)desctotal)ra   monozslicer filesr!  r   segment_.wavrE   i  |r   r+   	utf-8-sigencodingz
error files : ztranscribe complete samples : z
path : )r.   r/   r0   r   r   shutilrmtreerO   remover2   r[   tqdmro   rz   rP   r   npabsr   r	   writeastypeint16r   r   r3   )name_projectaudio_fileslanguageuserprogressr8   path_project_wavsfile_metadatafile_audiosalpha_maxslicernum	error_numdata
file_audiorX   r  list_slicerchunkr   rr   name_segmentfile_segmenttmp_maxr   r;   
error_textr<   r  r=   transcribe_allk  sh   





rH  c                 C   s6   t | d }t | d d }| d } d||t | S )Ni  <   z{:02d}:{:02d}:{:02d})r   r  )secondshoursminutesr<   r<   r=   format_seconds_to_hms  s   rM  r  
wavmp3aacflacm4aalacoggaiffwmaamrc                    s   d } fdd}t j| r|| r| }|S || r)t j| s)t j|| }|S || s]t j| s] D ]}t j||  d| }t j|rN|} |S q5t j||  d d  }|S )Nc                    s   t  fddD S )Nc                 3   s     | ]}  d | V  qdS ).N)r   )r  ext	file_namer<   r=   	<genexpr>  s    zJget_correct_audio_path.<locals>.has_supported_extension.<locals>.<genexpr>)anyr[  supported_formatsr[  r=   has_supported_extension  s   z7get_correct_audio_path.<locals>.has_supported_extensionrY  r   )r.   r/   isabsr0   exists)audio_input	base_pathr`  rA  ra  rZ  potential_filer<   r_  r=   get_correct_audio_path  s$   rg  c           %      C   sz  t jt| }t j|d}t j|d}t j|d}t j|d}t j|d}t j|s6d| dfS t|dd	d
}	|	 }
W d    n1 sLw   Y  g }g }g }|
d}d}g }g }t }|j	|
d|dD ]}|d}t
|dkr}qo|d d \}}t||}t j|s||dg qozt|}W n# ty } z||dg td| d|  W Y d }~qod }~ww |dk s|dkr|dkr||dg |dk r||dg qot
|dk r||dg qo| }t|gddd }|| || || ||||d |r|t| ||7 }qo|g kr/d| dfS tt|d}tt|d}t|d}|j	|t
|dd D ]}|| qM|  W d    n	1 sew   Y  t|d!}	tjd|i|	d"d# W d    n	1 sw   Y  d}|st j|st jtd$}t j|sd%S t|| t|dd	d
}	i }t|	D ]\} }!| ||!d d& < qW d    n	1 sw   Y  t
|}"n1t|d!d	d
}	t|D ]}#|	|#d  ||#d 7 }qW d    n	1 sw   Y  t
|}"|g krdd'd( |D }$nd}$d)t
| d*t | d+| d,| d-| d.|" d|$ |fS )/Nr  r  r   duration.json	vocab.txtzThe file was not found in r   rL   r'  r(  r   r   )r!  r&  rx   z
error pathdurationzError processing z: rE      zduration > 30 seczduration < 1 sec r   zvery short text length 3T	polyphone)rW   r   rj  z4Error: No audio files found in the specified path : )r/   zprepare datar#  r+   F)ensure_asciiEmilia_ZH_EN_pinyin/vocab.txt)z7Error: Vocabulary file 'Emilia_ZH_EN_pinyin' not found!r   rH   c                 S   s   g | ]}d  |qS )z = )r0   r  itemr<   r<   r=   r  =  s    z#create_metadata.<locals>.<listcomp>zprepare complete 
samples : z
time data : z
min sec : z
max sec : z
file_arrow : z	
vocab : )!r.   r/   r0   r   rO   r3   readsplitsetr-  ro   rg  r   rZ   r   r   r   r   rQ   listrj   rk   r   r   r0  finalizer4   r5   r*  copy2r~   sortedrM  )%r3  ch_tokenizerr7  r8   r8  r9  r   file_duration
file_vocabr;   r@  audio_path_list	text_listduration_listcountlenghtresulterror_filestext_vocab_setr   sp_line
name_audior   rA  rj  r   
min_second
max_secondwriter	new_vocalfile_vocab_finetunevocab_char_mapr   r   
vocab_sizevocabrG  r<   r<   r=   create_metadata  s   











4r  c                 C   s   t j|  dt j| dfS )Nvisibler   rQ   r  r<   r<   r=   
check_userG  s   r  c                 C   s  t jt| }t j|d}	d}
d}t j|	s |||||dfS t|	d}t|}W d    n1 s5w   Y  |d }t|| |
 }t	|}t
|}tj rotj }d}t|D ]}tj|}||jd 7 }q^n0tj rtj }d}t|D ]}tj|}||jd 7 }qntjj rd	}t jd }|| }|d
krttd|d  d t|}n|dkrtd||  }|dk rt|d }t|t|d }d}|d
kr|| |
 | }|| }n
|dkr|| | }t|| }|rd}nd}||||||fS )Nrh     r  zproject not found !rL   rj  r      @rE   rC   i   r   K   sample   rD   g      ?g?iO rB   ga2U0*?)r.   r/   r0   r   rO   r3   r4   rP   r   ro   sumr   r   is_availabledevice_countr   get_device_propertiestotal_memoryr   backendsr   r   virtual_memory	availabler   )r3  r   r   r   r   r   r   r#   r8   rz  rw   sampling_rater  r@  r~  max_sample_lengthtotal_samplestotal_duration	gpu_countr  r   gpu_propertiesavg_gpu_memorymax_updatesmini_batch_durationupdates_per_epochr<   r<   r=   calculate_trainK  sz   
	



 
r  checkpoint_pathnew_checkpoint_pathsave_emasafetensorsreturnc           	   
   C   s   zNt j| dd}td|  |rdnd}z|| }W n ty*   | d Y W S w |r9|dd}t|| n|dd}d|i}t || d	| W S  tye } zd
| W  Y d }~S d }~ww )NT)weights_onlyzOriginal Checkpoint Keys:ema_model_state_dictmodel_state_dictz not found in the checkpoint..pt.safetensorszNew checkpoint saved at: r   )	r   rP   r   keysKeyErrorrM   r   saver   )	r  r  r  r  
checkpoint	to_retainmodel_state_dict_to_retainnew_checkpointr   r<   r<   r=   prune_checkpoint  s(   r  *   c           	         s  d}t | t|tjd< t| tj| tj| dtj	j
_dtj	j
_| dr9t| dd}d|i}n| d	rEtj| dd
}|di }d}|| }|d|d   fdd}||| ||< |dr|t|| S |d	rt|| S )Ni  PYTHONHASHSEEDTFr  r   )rN   r  r  )map_locationz2ema_model.transformer.text_embed.text_embed.weightr   rE   c                    s4   t  f}| |d < t  f|d < |S N)r   zerosrandn)old_embeddingsnew_embeddings	embed_dimnum_new_tokens	vocab_new	vocab_oldr<   r=   expand_embeddings  s   z2expand_model_embeddings.<locals>.expand_embeddings)randomseedr   r.   r   r   manual_seedr   manual_seed_allr  cudnndeterministic	benchmarkr   r   rP   getsizer   r  )		ckpt_pathnew_ckpt_pathr  r  ckptema_sdembed_key_emaold_embed_emar  r<   r  r=   expand_model_embeddings  s6   











r  c                 C   s   t t| dS )N,)r   ro   rs  )r   r<   r<   r=   vocab_count  s   r  c                 C   s  |dkrdS | }t jt|}t j|d}t jtd}t j|s)d| dS |d}|g kr4dS t|d	d
d}| }|d}	W d    n1 sOw   Y  t|	}
g }|D ]}|	dd}||
v riq\|
| q\|g krudS t|	}|	  |D ]}|	
| q|	
d t|ddd}|d|	 W d    n1 sw   Y  |dkrttd}n|dkrttd}n
|dkrttd}t|}|	dd	dd}t jt|}t j|dd t j|dt j| }t|||d}d|}d| d| d| d | S )!Nr   zSymbols empty!ri  ro  	the file  not found !r  zSymbols to extend not found.rL   r'  r(  r    z#Symbols are okay no need to extend.r+   zutf-8rA   z:hf://SWivid/F5-TTS/F5TTS_v1_Base/model_1250000.safetensors
F5TTS_Basez.hf://SWivid/F5-TTS/F5TTS_Base/model_1200000.pt
E2TTS_Basez.hf://SWivid/E2-TTS/E2TTS_Base/model_1200000.ptr?   r@   Tr   pretrained_)r  zvocab old size : z
vocab new size : z
vocab add : z
new symbols :
)r.   r/   r0   r   rO   rs  r3   rr  rt  rM   r   ro   popr0  r   r   r1   r2   basenamer  )r6   symbols
model_typer3  r8   file_vocab_projectr{  r;   r@  r  vocab_checkmiss_symbolsrq  
size_vocabr  vocab_size_newr   r  new_ckpt_filer  r  r<   r<   r=   vocab_extend  s\   


r  c                 C   s  | }t jt|}t j|d}t jtd}t j|s%d| ddfS t|ddd}| }|d	}t|}W d    n1 sDw   Y  t j|sWd| ddfS t|ddd}| }W d    n1 smw   Y  g }	i }
|d	D ]7}|d
}t	|dkrq{|d 
 }|dkrt|gddd }|D ]}||vr||
vr|	| ||
|< qq{|	g krd}d}||fS d|	}dt	|	 d}||fS )Nr  ro  r  r  r   rL   r'  r(  r   r&  rx   rE   rI   Trl  r   z#You can train using your language !r  zThe following z' symbols are missing in your language

)r.   r/   r0   r   rO   r3   rr  rs  rt  ro   r   r   r   )r6   r%   r3  r8   r9  r{  r;   r@  r  r  miss_symbols_keeprq  spr   t
vocab_missinfor<   r<   r=   r  (  sL   





r  c                 C   s   | }t jt|}t j|d}t j|sdS t|}|jt	ddd
dg}dddd	 |d
 d D  d }|d d }||fS )Nr   r   Nr   rg   )r  [z , c                 S   s   g | ]}d | d qS )z' z 'r<   )r  r  r<   r<   r=   r  _  s    z-get_random_sample_prepare.<locals>.<listcomp>r   ]rW   )r.   r/   r0   r   rO   Dataset_	from_fileshuffler  randintselect)r6   r3  r8   
file_arrowr  random_sampler   rW   r<   r<   r=   get_random_sample_prepareW  s   
$r  c                 C   s   | }t jt|}t j|d}t j|sdS d}t|ddd}| }W d    n1 s0w   Y  g }|dD ]#}|d}t|d	krJq<t	|d
 t j|d}	|
|	|d g q<|g krfdS t|}
|
d |
d
 fS )Nr  r  r   rL   r'  r(  r   r&  rx   r   r  rE   )r.   r/   r0   r   rO   r3   rr  rs  ro   rg  r   r  choice)r6   r3  r8   r9  r@  r;   	list_datarq  r  rA  random_itemr<   r<   r=   get_random_sample_transcribed  s(   


r  c                 C   s   t | \}}|||fS r  )r  )r6   r   rX   r<   r<   r=   get_random_sample_infer  s
   r  c                 C   s  t j|sdS td urd}nd }t|ks!t|ks!t|ks!td u rKt|kr'|at|kr-|at|kr3|at jt	| d}t
|||||datd||| |	dkrQd }	tjddd	$}tj|| | |||
|j|	d
 |jtjttjfW  d    S 1 sw   Y  d S )N)Nzcheckpoint not found!r   ri  )model	ckpt_file
vocab_filerN   use_emaz
update >> rH   Fr%  )deletesuffix)ref_fileref_textgen_textnfe_stepspeedremove_silence	file_waver  )r.   r/   rO   r   last_checkpointlast_devicelast_emar   r0   r   r
   r   tempfileNamedTemporaryFileinferr   r  rN   r   r  )projectfile_checkpointr   r  	ref_audior  r  r  r  r  r  device_testr  r;   r<   r<   r=   r    s@    

$r  c                 C   s"   t j| dt j| dt j| dfS )Nr   r  )r#   r<   r<   r=   check_finetune  s   "r  c                 C   s   | d u rg dfS |  dd dd} tjtrFttjt| d}dd |D }dd |D }dd |D }t|d	d
 d}|| | }ng }|sLd n|d }|rYtj	||dS ||fS )Nr   r?   r@   z*.ptc                 S       g | ]}d t j|v r|qS )r  r.   r/   r  r  r;   r<   r<   r=   r         z+get_checkpoints_project.<locals>.<listcomp>c                 S   s0   g | ]}d t j|vrdt j|vr|qS )r  model_last.ptr  r  r<   r<   r=   r    s
     c                 S   r  )r   r  r  r<   r<   r=   r    r  c                 S   $   t tj| dd dd S Nr  rE   rY  r   r   r.   r/   r  rs  xr<   r<   r=   <lambda>     $ z)get_checkpoints_project.<locals>.<lambda>keyr   r  
rM   r.   r/   r   r1   r   r0   rx  r   rQ   )r6   	is_gradiofiles_checkpointspretrained_checkpointsregular_checkpointsr  selelect_checkpointr<   r<   r=   get_checkpoints_project  s&   r0  c                 C   s   | d u rg dfS |  dd dd} tjtr3ttjt| dd}t|dd d}d	d
 |D }ng }|s9d n|d }|rFtj	||dS ||fS )Nr   r?   r@   r   r  c                 S   r!  r"  r#  r$  r<   r<   r=   r&    r'  z#get_audio_project.<locals>.<lambda>r(  c                 S   s"   g | ]}| d r|d dqS )_gen.wavr   )r   rM   rp  r<   r<   r=   r    s   " z%get_audio_project.<locals>.<listcomp>r   r  r*  )r6   r+  files_audiosr/  r<   r<   r=   get_audio_project  s   r3  c                  C   s  d} t j rVt j }t|D ]C}t j|}t j|}|jd }t j|d }t j	|d }| d| d| d| d|dd	| d|dd
| d|dd7 } q| S t j
 rt j
 }t|D ]C}t j
|}t j
|}|jd }t j
|d }t j
	|d }| d| d| d| d|dd	| d|dd
| d|dd7 } qd| S t jj rd}| d7 } t jd }d}d}| d|dd|dd|dd7 } | S d} | S )Nr   r     zGPU z Name: z
Total GPU memory (GPU z): .2fz GB
Allocated GPU memory (GPU z MB
Reserved GPU memory (GPU z MB

rE   zMPS GPU
r   zTotal system memory: z  GB
Allocated GPU memory (MPS): z MB
Reserved GPU memory (MPS): z MB
zNo GPU available)r   r   r  r  r   get_device_namer  r  memory_allocatedmemory_reservedr   r  r   r   r  r!  )	gpu_statsr  r   gpu_namer  r  allocated_memoryreserved_memoryr<   r<   r=   get_gpu_stats  s   


'



	r=  c            	   
   C   st   t jdd} t  }|jd }|jd }|j}t }t |}|	 }d| dd|dd|dd| d	| 
}|S )
NrE   )intervalr4  zCPU Usage: r5  z%
System Memory: z MB used / z MB total (z'% used)
Process Priority (Nice value): )
r   cpu_percentr  usedr!  percentr.   getpidr   nice)		cpu_usagememory_infomemory_usedmemory_totalmemory_percentr   process
nice_value	cpu_statsr<   r<   r=   get_cpu_stats+  s&   



rL  c                  C   s    t  } t }d|  d| }|S )Nz### GPU Stats
z

### CPU Stats
)r=  rL  )r9  rK  combined_statsr<   r<   r=   get_combined_stats?  s   rN  c                 C   s(   | }| }| d ur|d7 }|d7 }||fS )N_ref.wavr1  r<   )file_sampleselect_audio_refselect_audio_genr<   r<   r=   get_audio_selectF  s   rS  a  
# F5 TTS Automatic Finetune

This is a local web UI for F5 TTS finetuning support. This app supports the following TTS models:

* [F5-TTS](https://arxiv.org/abs/2410.06885) (A Fairytaler that Fakes Fluent and Faithful Speech with Flow Matching)
* [E2 TTS](https://arxiv.org/abs/2406.18009) (Embarrassingly Easy Fully Non-Autoregressive Zero-Shot TTS)

The pretrained checkpoints support English and Chinese.

For tutorial and updates check here (https://github.com/SWivid/F5-TTS/discussions/143)
zTokenizer Type)rI   r   r   rI   )labelr  r  zProject Namemy_speak)rT  r  zCreate a New ProjectProjectr   )r  r  rT  allow_custom_valuescaleRefreshrE   )rX  )fninputsoutputszTranscribe Dataz```plaintext 
Skip this step if you have your dataset, metadata.csv, and a folder wavs with all the audio files.                 
```zAudio from Pathu  ```plaintext    
     Place your 'wavs' folder and 'metadata.csv' file in the '{your_project_name}' directory. 
                 
     my_speak/
     │
     └── dataset/
         ├── audio1.wav
         └── audio2.wav
         ...
     ```r  Voicefilepathmultiple)rT  type
file_countLanguageEnglish
TranscribeInfozRandom SampleText)rT  Audio)rT  r`  zVocab Checkz```plaintext 
Check the vocabulary for fine-tuning Emilia_ZH_EN to ensure all symbols are included. For fine-tuning a new language.
```zCheck Vocabz```plaintext 
Using the extended model, you can finetune to a new language that is missing symbols in the vocab. This creates a new model with a new vocabulary size and saves it in your ckpts/project folder.
```Model)rA   r  r  rA   Symbolsz8To add new symbols, make sure to use ',' for each symbol)rT  r  placeholderrX  zNew Vocab Size)rT  r  rX  Extend)r[  r\  zPrepare Datazb```plaintext 
Skip this step if you have your dataset, raw.arrow, duration.json, and vocab.txt
```ur  ```plaintext    
     Place all your "wavs" folder and your "metadata.csv" file in your project name directory.

     Supported audio formats: "wav", "mp3", "aac", "flac", "m4a", "alac", "ogg", "aiff", "wma", "amr"

     Example wav format:                               
     my_speak/
     │
     ├── wavs/
     │   ├── audio1.wav
     │   └── audio2.wav
     |   ...
     │
     └── metadata.csv
      
     File format metadata.csv:

     audio1|text1 or audio1.wav|text1 or your_path/audio1.wav|text1 
     audio2|text1 or audio2.wav|text1 or your_path/audio2.wav|text1 
     ...

     ```zCreate Vocabulary)rT  r  r  PrepareVocab	TokenizerzTrain Modelz```plaintext 
The auto-setting is still experimental. Set a large value of epoch if not sure; and keep last N checkpoints if limited disk space.
If you encounter a memory error, try reducing the batch size per GPU to a smaller number.
```)rT  r  zTokenizer Filez!Path to the Pretrained CheckpointFinetuneSampleszAuto SettingsEpochszLearning Rategh㈵>)rT  stepzMax Gradient NormzWarmup UpdateszBatch Size TyperC   r  z;frame is calculated as seconds * sampling_rate / hop_length)rT  r  r  zBatch Size per GPUzN frames or N samples)rT  r  zGradient Accumulation Stepsz0Effective batch size is multiplied by this valuezMax Samplesz.Maximum number of samples per single GPU batchzSave per Updatesz-Save intermediate checkpoints every N updatesrh   )rT  r  minimumzKeep Last N Checkpointsz>-1 to keep all, 0 to not save intermediate, > 0 to keep last NrH   )rT  rr  	precisionr  rs  zLast per Updatesz;Save latest checkpoint with suffix _last.pt every N updateszUse 8-bit Adam optimizerzMixed Precision)rK   rJ   bf16Logger)rK   wandbtensorboardzStart TrainingzStop Trainingr   zStream Output ExperimentrO  r1  Audios)r  r  rT  rW  rX  r   Original)rT  r`  r  Generate)rZ  r\  c                  C   s.   t ttttttttt	t
ttttttttg} | S r  )r   r   r   r   r   r   r   r   r   r    r!   r"   ch_finetuner$   r%   r&   r'   	cd_loggerr7   )output_componentsr<   r<   r=   setup_load_settings  s*   r  z
Test Modelz```plaintext 
Check the use_ema setting (True or False) for your model to see what works best for you. Set seed to -1 for random.
```zNFE Step    SpeedrF   g333333?g       @r   )rT  r  rs  maximumrr  zRandom Seed)rT  r  rs  zRemove SilencezUse EMAz2Turn off at early stage might offer better results)rT  r  r  Checkpoints)r  r  rT  rW  zReference TextzReference AudiozText to GeneratezInference on Device :zUsed Random Seed :	InferencezGenerated AudiozPrune Checkpointz```plaintext 
Reduce the Base model size from 5GB to 1.3GB. The new checkpoint file prunes out optimizer and etc., can be used for inference or finetuning afterward, but not able to resume pretraining.
```zPath to Checkpoint:zPath to Output:zSave EMA checkpointzSave with safetensors formatPrunezSystem InfozGPU and CPU Informationr_   )rT  linesc                   C   s   t  S r  )rN  r<   r<   r<   r=   update_stats,  s   r  zUpdate Statsc                   c   s    t jt dV  d S )Nr  )r   rQ   r  r<   r<   r<   r=   auto_update2  s   r  z--portz-pzPort to run the app on)defaultr`  helpz--hostz-HzHost to run the app on)r  r  z--sharez-sz#Share the app via Gradio share link)r  is_flagr  z--apiz-azAllow API accessc                 C   s&   t d tj|dj|| ||d d S )NzStarting app...)api_open)server_nameserver_portshareshow_api)r   appr   launch)porthostr  apir<   r<   r=   main8  s   r  __main__)T)r  rN  )r  )r   r4   r.   platformr   r  r   r*  r   r   sysr  r   r   r   importlib.resourcesr   clickgradior   rz   numpyr.  r   r   rU   r   datasetsr   r  datasets.arrow_writerr   safetensors.torchr   r   scipy.ior	   
f5_tts.apir
   f5_tts.infer.utils_inferr   f5_tts.model.utilsr   r   r   
executablepython_executabler   r  r  r  r   joinpathr   r1   r   r   r  r   r  r   rN   r>   rT   rZ   r[   r   r   r  r  r  r  ProgressrH  rM  rg  r  r  r  boolr  r  r  r  r  r  r  r  r  r  r0  r3  r=  rL  rN  rS  Blocksr  MarkdownRowprojectsr  Radior%   Textboxr6   Button	bt_createDropdown
cm_projectch_refresh_projectTabsTabItemCheckbox	ch_manualmark_info_transcribeFileaudio_speakertxt_langbt_transcribetxt_info_transcribechangerandom_sample_transcriberandom_text_transcriberg  random_audio_transcribecheck_buttontxt_info_checkexp_name_extend
txt_extendtxt_count_symbolextend_buttontxt_info_extendch_tokenizern
bt_preparetxt_info_preparetxt_vocab_preparerandom_sample_preparerandom_text_preparerandom_audio_preparer   r&   r$   r|  Label
lb_samplesbt_calculateNumberr   r   r   r   r   r   r   r   r    r!   r"   r7   r'   r}  Columnstart_buttonstop_buttonexp_name_valuelearning_rate_valuebatch_size_per_gpu_valuebatch_size_type_valuemax_samples_valuegrad_accumulation_steps_valuemax_grad_norm_valueepochs_valuenum_warmup_updates_valuesave_per_updates_valuekeep_last_n_checkpoints_valuelast_per_updates_valuefinetune_valuefile_checkpoint_train_valuetokenizer_type_valuetokenizer_file_valuemixed_precision_valuelogger_valuebnb_optimizer_valuer  	ch_streamtxt_info_trainlist_audiosselect_audiorQ  rR  ch_list_audiobt_stream_audioaudio_ref_streamaudio_gen_streamr  r\  list_checkpointscheckpoint_selectr  Sliderr  r  r  
ch_use_emacm_checkpointbt_checkpoint_refreshrandom_sample_inferr  r  r  txt_info_gpu	seed_infocheck_button_infer	gen_audiotxt_path_checkpointtxt_path_checkpoint_smallch_save_emach_safetensorstxt_info_redusereduse_button
output_boxr  update_buttonr  rQ   commandoptionr   r  r   r<   r<   r<   r=   <module>   s   

5=u  G	oY(?/	0"6








,





	$


1












  j




	
>




      j
