o
    ॵij*                     @   s4  d dl Z d dlZd dlZd dlZd dlmZmZmZmZm	Z	m
Z
 d dlZd dlmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZm Z m!Z!m"Z" d dl#m$Z$ d dl%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+ d dl,m-Z- d dl.m/Z/ e/ Z0ej1ej2dG dd deZ3dS )    N)CallableDictListOptionalTupleUnion)PreprocessorsTrainers)Model)SambertHifigan)	MsDataset)build_preprocessor)BaseTrainer)TRAINERS)TtsTrainType) TtsTrainingCfgNotExistsException"TtsTrainingDatasetInvalidException"TtsTrainingHparamsInvalidException TtsTrainingInvalidModelException$TtsTrainingWorkDirNotExistsException)Config)DEFAULT_DATASET_NAMESPACEDEFAULT_DATASET_REVISIONDEFAULT_MODEL_REVISION	ModelFileTasksTrainerStages)	to_device)
get_logger)module_namec                   @   s   e Zd ZdZdZdZdZdddddeee	j
i e	ji idef
d	eeef d
ededededeeef dedededee fddZdd ZdefddZdd Zdd Zdd Zd d! Zd"ed#eeef fd$d%ZdS )&KanttsTrainerdatatmp_amtmp_voc
orig_modelNF7PinYinFmodelwork_dirspeaker	lang_typecfg_filetrain_datasettrain_dataset_namespacetrain_dataset_revision
train_typemodel_revisionc                 K   s,  |st  j| _tj| jst| j n|| _tj| js(t| j dt	 | _
t|	t	rN|	 D ]\}}|tjksH|tjksH|tjkrM|| j
|< q5t| j
dkrftd i | j
tj< i | j
tj< td| j  tj| j| j| _tj| j| j| _tj| j| j| _tj| j| j| _d| _|
| _d| _d| _ d| _!t"j#| jdd t"j#| jdd t"j#| jdd t"j#| jdd t| j t| j t| j |rt|t$rtj|rtd|  || _n)td| d	|  t%j&|||d
}td|j'  | (|| _n| (|| _|s#t)dt|t$r0| *||}n|j+}t",|| j | j| _+|sJtj| j+t-j.}| /| tj| js[t0dd| _1|| _2d | _3|4dd| _5| 6| j+| j2| _3| j3j7| _7tj| j
v stj| j
v rt8t	t9j:dt;j<| _=d S d S )Nz not existsr   z,train type empty, default to sambert and voczSet workdir to  T)ignore_errorszload /dataset_name	namespaceversionztrain dataset:zmodel param is nonezdataset raw path not existsFdevicegpu)type)>tempfileTemporaryDirectorynamer(   ospathexistsmakedirsr   dictr/   
isinstanceitemsr   TRAIN_TYPE_SAMBERTTRAIN_TYPE_VOCTRAIN_TYPE_BERTlenloggerinfojoinDATA_DIRdata_dir
AM_TMP_DIR
am_tmp_dirVOC_TMP_DIRvoc_tmp_dirORIG_MODEL_DIRorig_model_dirraw_dataset_pathskip_scriptaudio_config_patham_config_pathvoc_config_pathshutilrmtreestrr   loadconfig_kwargsload_dataset_raw_pathr   get_or_download_model_dir	model_dircopytreer   CONFIGURATION	parse_cfgr   finetune_from_pretrainr)   r'   getr8   	get_modelr*   r   r   kantts_data_preprocessorr   text_to_speechaudio_data_preprocessor)selfr'   r(   r)   r*   r+   r,   r-   r.   r/   preprocess_skip_scriptr0   kwargskvr`    ro   Y/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/trainers/audio/tts_trainer.py__init__(   s   










zKanttsTrainer.__init__c                 C   s  t j|}t|ddd}t|}d|vrtdd|d v r5t j||d d }t j|r5|| _	d|d v rOt j||d d }t j|rO|| _
d|d v rit j||d d }t j|ri|| _| jsd	|d v r|d d	 }t j|r|| _nBd
|v r|dt}	|dt}
tj|d
 |	|
d}| || _n1d|v r|d | _W d    d S W d    d S W d    d S W d    d S W d    d S W d    d S 1 sw   Y  d S )Nrzutf-8)encodingtrainzmodel not support finetuneaudio_config	am_config
voc_configr,   idr6   revisionr4   r?   )r>   r?   dirnameopenjsonr\   r   rK   r@   rV   rW   rX   rT   re   r   r   r   r^   )rj   r+   cur_dirfconfigru   rv   rw   datasetr6   ry   msro   ro   rp   rc      sp   


$""zKanttsTrainer.parse_cfgr   c                 C   s6   d|j vr	tdd|j d vrtd|j d d S )Nsplit_configz'split_config not found in config_kwargsrt   zno train split in split_config)r]   r   )rj   r   ro   ro   rp   r^      s   
z#KanttsTrainer.load_dataset_raw_pathc              	   C   s`   | j r.| j}|rtj|s| j| j}| j| j}|  | j	| j
|| j| j| j| d S d S N)ri   rV   r>   r?   r@   r'   get_voice_audio_config_pathr)   get_voice_se_model_pathrT   rM   r*   rU   )rj   ru   se_modelro   ro   rp   prepare_data   s   zKanttsTrainer.prepare_datac                 C   s   d S r   ro   )rj   ro   ro   rp   prepare_text   s   zKanttsTrainer.prepare_textc                 C   s>   t tj| jtj}|di }t	d| jdd|}|S )Nr'   T)r`   is_trainro   )
r   	from_filer>   r?   rK   r`   r   rb   re   r   )rj   r`   r)   cfg	model_cfgr'   ro   ro   rp   rf      s   zKanttsTrainer.get_modelc                 O   s   | j stdd}d|v r|d }tj| jv stj| jv r!|   tj| jv r+|   | j	| j
| j| jd}| j| jd}| j | j|| j|| d S )Nzmodel is noneFignore_pretrain)r(   rO   rQ   rM   )rv   rw   )r'   r   r   rE   r/   rF   r   rG   r   r(   rO   rQ   rM   rW   rX   rt   r)   )rj   argsrl   r   dir_dictconfig_dictro   ro   rp   rt      s(   zKanttsTrainer.traincheckpoint_pathreturnc                 O   s   i S r   ro   )rj   r   r   rl   ro   ro   rp   evaluate   s   zKanttsTrainer.evaluate)__name__
__module____qualname__rL   rN   rP   rR   r   r   r   rE   rF   r   r   r
   r[   r   rB   r   rq   rc   r^   r   r   rf   rt   r   floatr   ro   ro   ro   rp   r    !   s^    

	
l)	
r    )4r>   rY   r;   zipfiletypingr   r   r   r   r   r   r|   modelscope.metainfor   r	   modelscope.modelsr
   modelscope.models.audio.ttsr   modelscope.msdatasetsr    modelscope.preprocessors.builderr   modelscope.trainers.baser   modelscope.trainers.builderr   "modelscope.utils.audio.audio_utilsr   %modelscope.utils.audio.tts_exceptionsr   r   r   r   r   modelscope.utils.configr   modelscope.utils.constantr   r   r   r   r   r   modelscope.utils.data_utilsr   modelscope.utils.loggerr   rI   register_modulespeech_kantts_trainerr    ro   ro   ro   rp   <module>   s,     