o
    imP                     @  s   d dl mZ d dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZmZ d dlmZmZmZ d d	lmZ d d
lmZ d dlmZmZ d dlmZmZ G dd dZ dS )    )annotationsN)Accelerator)DistributedDataParallelKwargs)EMA)AdamW)LinearLRSequentialLR)
DataLoaderDatasetSequentialSampler)tqdm)CFM)DynamicBatchSampler
collate_fn)defaultexistsc                   @  sx   e Zd Zdddddddddddd	d
dddde e dddde fd3d%d&Zed'd( Zd4d)d*Zd+d, Zd5d6d1d2Z	dS )7Traineri N  i  N    sample         ?wandbztest_f5-ttstest_runFvocos modelr   keep_last_n_checkpointsintbatch_size_typestrnoise_scheduler
str | Noneduration_predictortorch.nn.Module | Noneloggerwandb_resume_idlog_samplesboolaccelerate_kwargsdict
ema_kwargsbnb_optimizermel_spec_typeis_local_vocoderlocal_vocoder_pathmodel_cfg_dictc                  C  s  t dd}|dkrtjjsd }|| _td|dkr|nd |g|d|| _|| _| jdkr`t|r:dd||di}ndd|di}|sP|||||	|
||||d
}| jj	|d	< | jj
|||d
 n| jdkr{ddlm} d | _| jjr{|d| d| _|| _| jrt|fddi|| _| j| jj td|  |dkrtd || _|| _|| _|| _t||| _t|d| _|| _|	| _|
| _|| _ || _!|| _"|| _#|| _$|| _%|| _&|rdd l'}|j(j)|* |d| _+n
t,|* |dd| _+| j-| j| j+\| _| _+d S )NT)find_unused_parametersr   )log_withkwargs_handlersgradient_accumulation_stepsallow)resumenameid)r6   r7   )
epochslearning_ratenum_warmup_updatesbatch_size_per_gpur   max_samplesgrad_accumulation_stepsmax_grad_normr!   r,   gpus)project_nameinit_kwargsconfigtensorboardr   )SummaryWriterzruns/)log_dirinclude_online_modelFzUsing logger: r   zfGradient accumulation checkpointing with per_updates now, old logic per_steps used with before f992c4ezckpts/test_f5-tts)lr)rH   fused ).r   r   apiapi_keyr'   r   acceleratorr%   r   num_processesinit_trackerstorch.utils.tensorboardrE   writeris_main_processr   is_mainr   	ema_modeltodeviceprintr9   r;   save_per_updatesr   r   last_per_updatescheckpoint_pathr<   r   r=   r>   r?   vocoder_namer.   r/   r!   r#   bitsandbytesoptim	AdamW8bit
parameters	optimizerr   prepare) selfr   r9   r:   r;   rX   r   rZ   r<   r   r=   r>   r?   r!   r#   r%   wandb_projectwandb_run_namer&   r'   rY   r)   r+   r,   r-   r.   r/   r0   
ddp_kwargsrB   rE   bnbrJ   rJ   H/home/ubuntu/.local/lib/python3.10/site-packages/f5_tts/model/trainer.py__init__   s   


zTrainer.__init__c                 C  s   | j jS )N)rM   rR   )rb   rJ   rJ   rg   rS      s   zTrainer.is_mainc                 C  s<  | j   | jrt| j | j | j | j | j	 |d}t
j| js.t
| j |rD| j || j d td|  d S | jdkrKd S | j || j d| d | jdkrdd t
| jD }|jd	d
 d t|| jkr|d}t
t
j| j| td|  t|| jksxd S d S d S d S )N)model_state_dictoptimizer_state_dictema_model_state_dictscheduler_state_dictupdatez/model_last.ptz Saved last checkpoint at update r   z/model_.ptc                 S  s6   g | ]}| d r| ds|dr|dkr|qS )model_pretrained_rn   model_last.pt
startswithendswith.0frJ   rJ   rg   
<listcomp>   s    z+Trainer.save_checkpoint.<locals>.<listcomp>c                 S  s   t | dd dd S )N_r   .r   )r   splitxrJ   rJ   rg   <lambda>   s    z)Trainer.save_checkpoint.<locals>.<lambda>keyzRemoved old checkpoint: )rM   wait_for_everyonerS   r*   unwrap_modelr   
state_dictr`   rT   	schedulerospathr   rZ   makedirssaverW   r   listdirsortlenpopremovejoin)rb   rm   last
checkpointcheckpointsoldest_checkpointrJ   rJ   rg   save_checkpoint   s<   




zTrainer.save_checkpointc                 C  s(  t | jrtj | jrtdd t| jD sdS | j  dt| jv r+d}n(dd t| jD }dd |D }|rJt|dd	 d
d }n	t	dd |D }|
droddlm} || j d| dd}d|i}n|
drtj| j d| ddd}dD ]}||d v r|d |= q| jr| j|d  d|v sd|v rd|v r|d | j |d< | jdkr| jrtd dD ]}||d v r|d |= q| j| j|d  | j|d  | jr| j|d  |d }ndd  |d  D |d< | j| j|d  d}~t  |S )!Nc                 s  s    | ]}| d V  qdS )rn   .safetensorsN)rt   )rv   filenamerJ   rJ   rg   	<genexpr>   s    z*Trainer.load_checkpoint.<locals>.<genexpr>r   rq   c                 S  s.   g | ]}| d s| dr|dr|qS )ro   rp   r   rr   ru   rJ   rJ   rg   rx      s    z+Trainer.load_checkpoint.<locals>.<listcomp>c                 S  s"   g | ]}| d r|dkr|qS )ro   rq   rs   ru   rJ   rJ   rg   rx      s   " c                 S  s   t dttj| S )Nr   )r   r   filterr    isdigitr|   rJ   rJ   rg   r~      s    z)Trainer.load_checkpoint.<locals>.<lambda>r   r   c                 s  s    | ]
}| d r|V  qdS )rp   Nr   ru   rJ   rJ   rg   r      s    r   )	load_file/cpu)rV   rk   rn   T)weights_onlymap_location)z(ema_model.mel_spec.mel_stft.mel_scale.fbz.ema_model.mel_spec.mel_stft.spectrogram.windowrm   stepr   zF5-TTS WARNING: Loading checkpoint saved with per_steps logic (before f992c4e), will convert to per_updates according to grad_accumulation_steps setting, may have unexpected behaviour.)zmel_spec.mel_stft.mel_scale.fbz$mel_spec.mel_stft.spectrogram.windowri   rj   rl   c                 S  s&   i | ]\}}|d vr| dd|qS ))inittedrm   r   z
ema_model.r   )replace)rv   kvrJ   rJ   rg   
<dictcomp>   s
    z+Trainer.load_checkpoint.<locals>.<dictcomp>)r   rZ   r   r   anyr   rM   r   sortednextrt   safetensors.torchr   torchloadrS   rT   load_state_dictr>   rW   r   r   r`   r   itemsgccollect)rb   latest_checkpointall_checkpointstraining_checkpointsr   r   r   rm   rJ   rJ   rg   load_checkpoint   sx   









zTrainer.load_checkpoint   train_datasetr
   resumable_with_seedc           /      C  sx  | j r/ddlm}m}m}m} || j| j| jd}| j	
| jjj}	| j d}
tj|
dd t|r=t }|| nd }| jdkrRt|t|dd| jd|d}n+| jd	krud
| j	_t|}t|| j| j|d
d}t|t|dd|d}ntd| j | j| j	j }t !t"|| j# | j$ }|| }t%| j&dd|d}t%| j&dd|d}t'| j&||g|gd| _(| j	)|| j(\}| _(| * }|}t|rt"|}|| j# }t+|| }|| }| j	j,||d}nd}t-|| j$D ]@}| j.  t|r	||kr	t !|| j# }|}nd}|}t/|dr t/|j0dr |j01| t2t-t !t"|| j# d|d  d| j$ d| j	j3 |d}|D ]}| j	4| jr |d } |d 5ddd}!|d }"| j6d ur~| j	j3r~| j6|!|7dd}#| j	j8d |#9 i|d! | j|!| |"| j:d"\}$}%}&| j	;|$ | j<dkr| j	j=r| j	>| j? | j< | j&@  | j(@  | j&A  W d    n	1 sw   Y  | j	j=r| jBr| jCD  |d7 }|Dd |jEtF||$9 d# | j	j3r| j	j8|$9 | j(G d d$|d! | jHd%kr!| j	jIr!| jJKd&|$9 | | jJKd'| j(G d | || jL dkr5| j	j=r5| jM|dd( || jN dkr+| j	j=r+| M| | j r+| j	j3r+|"d }'| d tO| d tPrbd)gnd) | d  g}(tQ  | j	R  | j	
| jjS|!d d |' Td|(|'d |||d*\})}*|)UtjV})|)d d |'d d d f 5dddU| j	jW}+|d dd d d |'f Td},| jd+kr|X|+Y }-|X|,Y }.n| jd,kr||+ZdY }-||,ZdY }.W d    n	1 sw   Y  W d    n	1 sw   Y  t[\|
 d-| d.|-|	 t[\|
 d-| d/|.|	 | j.  qAq| jM|dd( | j	]  d S )0Nr   )cfg_strengthload_vocodernfe_stepsway_sampling_coef)r[   is_local
local_pathz/samplesT)exist_okr   )r   num_workers
pin_memorypersistent_workers
batch_sizeshuffle	generatorframeF)r=   random_seeddrop_residual)r   r   r   r   batch_samplerzAbatch_size_type must be either 'sample' or 'frame', but received g:0yE>r   )start_factor
end_factortotal_iters)
schedulers
milestones)num_batchesr   	set_epochzEpoch r   r   rm   )descunitdisableinitialtextmel   mel_lengths	durations)lenszduration loss)r   )r   r   r!   )rm   loss)r   rH   rD   r   rH   )r    )condr   durationstepsr   r   r   bigvganz/update_z_gen.wavz_ref.wav)^r'   f5_tts.infer.utils_inferr   r   r   r   r[   r.   r/   rM   r   r   mel_spectarget_sample_raterZ   r   r   r   r   	Generatormanual_seedr   r	   r   r<   even_batchesr   r   r=   
ValueErrorr;   rN   mathceilr   r>   r9   r   r`   r   r   ra   r   r   skip_first_batchesrangetrainhasattrr   r   r   is_local_main_process
accumulatepermuter#   getlogitemr!   backwardr?   sync_gradientsclip_grad_norm_r_   r   	zero_gradrS   rT   rm   set_postfixr    get_last_lrr%   rR   rQ   
add_scalarrY   r   rX   
isinstancelistinference_modeautocastr   	unsqueezerU   float32rV   decoder   squeeze
torchaudior   end_training)/rb   r   r   r   r   r   r   r   vocoderr   log_samples_pathr   train_dataloadersamplerr   warmup_updatestotal_updatesdecay_updateswarmup_schedulerdecay_schedulerstart_updateglobal_updateorig_epoch_step
start_stepskipped_epochskipped_batchskipped_dataloaderepochprogress_bar_initialcurrent_dataloaderprogress_barbatchtext_inputsr   r   dur_lossr   r   predref_audio_len
infer_text	generatedry   gen_mel_specref_mel_spec	gen_audio	ref_audiorJ   rJ   rg   r   	  s.  


	











(
.  
MzTrainer.train)r   r   r   r   r   r    r!   r"   r#   r$   r%   r"   r&   r    r'   r(   r)   r*   r+   r*   r,   r(   r-   r    r.   r(   r/   r    r0   r*   )F)r   N)r   r
   r   r   )
__name__
__module____qualname__r*   rh   propertyrS   r   r   r   rJ   rJ   rJ   rg   r      s>    w

#Pr   )!
__future__r   r   r   r   r   r  r   
accelerater   accelerate.utilsr   ema_pytorchr   torch.optimr   torch.optim.lr_schedulerr   r   torch.utils.datar	   r
   r   r   f5_tts.modelr   f5_tts.model.datasetr   r   f5_tts.model.utilsr   r   r   rJ   rJ   rJ   rg   <module>   s$    