o
    ߥiE.                     @   s
  d dl mZmZmZmZ d dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlmZ d dlZd dlZd dlmZ d dlmZ d dlmZ d dlmZmZmZ d dlmZmZmZmZ d dl m!Z! d d	l"m#Z# d
dl$m%Z% dgZ&e# Z'ej(e!j)ej*dG dd deZ+dS )    )absolute_importdivisionprint_functionunicode_literalsN)Models)Model)MODELS)TtsCustomParamsTtsTrainTypendarray_pcm_to_wav)$TtsFrontendInitializeFailedException'TtsFrontendLanguageTypeInvalidExceptionTtsModelConfigurationExceptionTtsVoiceNotExistsException)Tasks)
get_logger   )VoiceSambertHifigan)module_namec                       s   e Zd Z fddZdd Zdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dd Zdd Z				dddZddedefddZ  ZS )r   c           	         s  t  j|g|R i | || _|dd| _d| _d|v r*|d }t|tr*|| _d| _d|v rCd|d v rC|d d dd | _| 	||d	i \| _
| _| _t| j
d
kset| jdg d
kritd| jd rw| jd d
 | _ntdd
d l}| }tj|d}tj|d| _t|d}|| W d    n1 sw   Y  || jstd| j|| jstd| j|| _d S )Nsample_ratei>  Fis_trainamlinguistic_unithas_maskTcustom_ckptr   voiceszmodelscope error: voices emptyz0modelscope error: voices is empty in voices.jsonzresource.zipresourcerz&modelscope error: resource invalid: {}z+modelscope error: language type invalid: {})super__init__	model_dirgetr   r   
isinstanceboolignore_mask
load_voicer   	voice_cfg	lang_typelenr   default_voice_namettsfrdTtsFrontendEngineospathjoinres_pathzipfileZipFile
extractall
initializer   formatset_lang_typer   frontend)	selfr!   argskwargsr   r+   r7   zip_filezip_ref	__class__ \/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/models/audio/tts/sambert_hifi.pyr    &   sT   
$


zSambertHifigan.__init__c           
      C   s   t jt jt jt jt jf}i }i }d}|D ]}||vr!td| q|t j }t|||| j| j	d}	|	||< |g|d< |	j
}|||fS )NPinYinzcustom ckpt must have: )
voice_name
voice_pathr   r%   r   r   )r	   
VOICE_NAMEAM_CKPTVOC_CKPT	AM_CONFIG
VOC_CONFIGTtsModelNotExistsExceptionr   r%   r   r(   )
r8   r!   r   necessary_filesr   
voices_cfgr(   krB   voicer?   r?   r@   build_voice_from_customN   s2   


z&SambertHifigan.build_voice_from_customc                 C   s   i }t j|d}t j|d}d}t|dkr| ||S t j|r*t j|s/|i |fS t|ddd}t|}W d    n1 sFw   Y  d|vrT|i |fS |d D ]!}	t j||	}
t j|
shqXt	|	|
| j
| jd||	< ||	 j}qX|||fS )	Nr   voices.jsonrA   r   r   utf-8encoding)r%   r   )r-   r.   r/   r)   rN   existsopenjsonloadr   r%   r   r(   )r8   r!   r   r   voices_pathvoices_json_pathr(   fr'   namerC   r?   r?   r@   r&   e   s6   



zSambertHifigan.load_voicec                 C   s   t j| jdd}t j|rt | i }g |d< | j D ]	}|d | qt	|ddd}t
|| W d    d S 1 sBw   Y  d S )Nr   rO   wrP   rQ   )r-   r.   r/   r!   rS   remover   keysappendrT   rU   dump)r8   rX   save_voicesrL   rY   r?   r?   r@   r`      s   
"zSambertHifigan.save_voicesc                 C   s   | j | jfS N)r   r'   )r8   r?   r?   r@   
get_voices   s   zSambertHifigan.get_voicesc                 C   s6  t j| jd|}t j|rt| t j|dd |r/t j|r/t j|r/t	|| t j|d}t j|d}|rXt j|rXt jt
rXt j|d}t	|| |rst j|rst jtrst j|d}	t	||	 t j|d}
t j|d}t j|
dd t j|dd t||dd| j|< d S )	Nr   T)exist_okr   voczconfig.yamlckpt)rB   rC   allow_empty)r-   r.   r/   r!   rS   shutilrmtreemakedirsisfilecopy	am_config
voc_configr   r   )r8   rB   audio_configam_config_pathvoc_config_pathvoice_name_pathvoice_am_pathvoice_voc_patham_config_namevoc_config_nameam_ckpt_pathvoc_ckpt_pathr?   r?   r@   create_empty_voice   sB   




z!SambertHifigan.create_empty_voicec                 C      || j vrdS | j | jS N )r   rn   r8   rM   r?   r?   r@   get_voice_audio_config_path      
z*SambertHifigan.get_voice_audio_config_pathc                 C   s*   || j vrdS | j | jr| j | jS dS rz   )r   	se_enablese_model_pathr|   r?   r?   r@   get_voice_se_model_path   s
   
z&SambertHifigan.get_voice_se_model_pathc                 C   ry   rz   )r   lang_dirr|   r?   r?   r@   get_voice_lang_path   r~   z"SambertHifigan.get_voice_lang_pathc                 C   s*   || j vrtd| d| j | |S )Nmodelscope error: Voice  not exists)r   r   forward)r8   rB   textr?   r?   r@   synthesis_one_sentences   s
   

z&SambertHifigan.synthesis_one_sentencesNFc                 C   s  t d |d }|d }	|d }
|d }d }|| jvrI|s&tdt d|dd	}|d
d}tj|v r=ts=t	dtj
|v rHtsHt	dn-| j| }|j}|j}|rvd|v rf|d }tj|rf|}d
|v rv|d
 }tj|rv|}td tj|v rtd tj }|tj }|||	|||| tj | }td| d  ntd tj
|v rtd tj }|tj
 }|||
|||| tj | }td| d  d S td d S )Ninfowork_dir
am_tmp_dirvoc_tmp_dirdata_dirr   r   rl   zam_config.yamlrm   zvoc_config.yamlz*training new voice am with empty am_configz,training new voice voc with empty voc_configzStart training....zStart SAMBERT training...z%SAMBERT training spent: {:.2f} hours
g      @zskip SAMBERT training...zStart HIFIGAN training...z%HIFIGAN training spent: {:.2f} hours
zskip HIFIGAN training...)pltset_loglevelr   r   rB   r"   r
   TRAIN_TYPE_SAMBERTrl    TtsTrainingCfgNotExistsExceptionTRAIN_TYPE_VOCrm   ro   rp   r-   r.   rS   loggerr   datetimenowtrain_sambertr5   total_secondstrain_hifigan)r8   rM   dirs
train_typeconfigs_path_dictignore_pretraincreate_if_not_existshparamr   am_dirvoc_dirr   target_voicero   rp   am_overridevoc_override	totaltimehparamsr?   r?   r@   train   s   


















zSambertHifigan.trainr   rB   c           	      C   s   | j }|d ur	|}| j|}dd | D }tjddd}|D ] }| d}| ||d }d| }tj	||
ddd	}q!t| j|S )
Nc                 S   s   g | ]}|d kr|qS )r{   r?   ).0sr?   r?   r@   
<listcomp>  s    z*SambertHifigan.forward.<locals>.<listcomp>r   int16)dtype	r   g      @)axis)r*   r7   gen_tacotron_symbols
splitlinesnpemptystripsplitr   r^   astyper   r   )	r8   r   rB   rM   resulttextsaudio_totallineaudior?   r?   r@   r     s   zSambertHifigan.forward)NFFNra   )__name__
__module____qualname__r    rN   r&   r`   rb   rx   r}   r   r   r   r   strr   __classcell__r?   r?   r=   r@   r   "   s"    (

E),
__future__r   r   r   r   r   r-   rg   waver1   rU   matplotlib.pyplotpyplotr   numpyr   yamlmodelscope.metainfor   modelscope.models.baser   modelscope.models.builderr   "modelscope.utils.audio.audio_utilsr	   r
   r   %modelscope.utils.audio.tts_exceptionsr   r   r   r   modelscope.utils.constantr   modelscope.utils.loggerr   rM   r   __all__r   register_moduletext_to_speechsambert_hifiganr   r?   r?   r?   r@   <module>   s0   