o
    wiW(                     @   s   d dl Z d dlmZ d dlmZ d dlmZmZ d dlm	Z	 d dl
mZ d dlmZmZ d dlmZ d d	lmZmZ d d
lmZmZmZmZmZmZ d dlmZ d dlmZmZ G dd deeZ dS )    N)instantiate)TensorBoardLogger)
DictConfig	open_dict)WaveGlowLoss)GlowVocoder)OperationModewaveglow_log_to_tb_func)
Exportable)PretrainedModelInfo	typecheck)AudioSignalLengthsTypeLogDeterminantTypeMelSpectrogramTypeNormalDistributionSamplesTypeVoidType)
NeuralType)loggingmodel_utilsc                       sL  e Zd ZdZd=deddf fddZejjdd	 Ze	 d
dddZ
e	ede ed
ded
ded
dddede id	d>dejdedededejf
ddZdd Zdd  Zd!d" Zd?d$ed%efd&d'Zd(d) Zd*d+ Zed@d-d.ZdA fd/d0	Zed1d2 Zed3d4 Zd5d6 Z ed7d8 Z!ed9d: Z"d=d;d<Z#  Z$S )BWaveGlowModelzfWaveGlow model (https://arxiv.org/abs/1811.00002) that is used to generate audio from mel spectrogram.NcfgtrainerTrainerc                    sV   t |}t |}t j||d | jj| _t| jj| _	t| jj
| _
t | _d S )N)r   r   )r   #convert_model_config_to_dict_configmaybe_update_config_versionsuper__init___cfgsigmar   preprocessoraudio_to_melspec_precessorwaveglowr   loss)selfr   r   	__class__ a/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/nemo/collections/tts/models/waveglow.pyr   )   s   


zWaveGlowModel.__init__c                 C   s.   |t jkr
|   n|   || _|| j_d S N)r   trainingtraineval_moder"   mode)r$   new_moder'   r'   r(   r.   5   s
   

zWaveGlowModel.modeT)run_inversec                C   s   | j | jj krtd| j  d| jj  | ||\}}| j|||| jd}| j tjkr2|d d S | j tjkrF|\}}}	}
|||	|
||fS |S )NzWaveGlowModel's mode z& does not match WaveGlowModule's mode )specaudior0   r   )r.   r"   
ValueErrorr!   r   r   r*   
validation)r$   r2   	audio_lenr0   r1   spec_lentensorsz
log_s_listlog_det_W_list
audio_predr'   r'   r(   forward>   s   zWaveGlowModel.forward)BDToptional)r1   r   denoisedenoiser_strengthr2   r>   r@   )input_typesoutput_types      ?{Gz?r1   r   rC   rD   returnc                 C   sv   |   - | j  | j|| jjjj|d}|r)| j||d}W d    |S W d    |S 1 s4w   Y  |S )N)r1   r   )r2   strength)
nemo_inferr"   remove_weightnormtoupsampleweightdtyperC   )r$   r1   r   rC   rD   r2   r'   r'   r(   convert_spectrogram_to_audioM   s   



z*WaveGlowModel.convert_spectrogram_to_audioc           
      C   sP   t j| _|\}}| ||dd\}}}| j|||| jd}|d|id|id}	|	S )NFr2   r6   r0   r9   r:   r;   r   training_lossr#   )r#   progress_barlog)r   r*   r.   r#   r   )
r$   batch	batch_idxr2   r6   r9   r:   r;   r#   outputr'   r'   r(   training_stepa   s   zWaveGlowModel.training_stepc                 C   s`   t j| _|\}}| |||dkd\}}}}}	}
| j|||| jd}|||	|
d}| j| |S )Nr   rS   rT   )val_lossr<   
mel_targetmel_len)r   r5   r.   r#   r   validation_step_outputsappend)r$   rX   rY   r2   r6   r9   r:   r;   r<   r1   r7   r#   r'   r'   r(   validation_stepn   s   
zWaveGlowModel.validation_stepc                 C   s   | j d ur1| j jd ur1| j j}| jjD ]}t|tr|j} nqt|| jd  | j	d| j
jd tdd | jD  }| d| | j  d S )Nr   r,   )tagmel_fbc                 S   s   g | ]}|d  qS )r\   r'   ).0xr'   r'   r(   
<listcomp>   s    z9WaveGlowModel.on_validation_epoch_end.<locals>.<listcomp>r\   )logger
experimentr   loggers
isinstancer   r	   r_   valuesglobal_stepr!   fbtorchstackmeanrW   clear)r$   	tb_loggerrg   avg_lossr'   r'   r(   on_validation_epoch_end~   s"   
z%WaveGlowModel.on_validation_epoch_endr+   shuffle_should_benamec                 C   s  d|vs
t |jtstd| d|vst |jts"td| |rad|jvrPtd|  d| d t|d  d	|j_W d    n1 sJw   Y  n#|jjs`t	d
| d|  d n|ss|jjrst	d
| d|  d t
|j}tjjj|fd|ji|jS )NdatasetzNo dataset for dataloader_paramszNo dataloder_params for shufflez"Shuffle should be set to True for z's zE dataloader but was not found in its config. Manually setting to TrueTzThe z dataloader for z has shuffle set to False!!!z has shuffle set to True!!!
collate_fn)rj   rw   r   r4   rx   r   warningr   ry   errorr   rn   utilsdata
DataLoaderrz   )r$   r   ru   rv   rw   r'   r'   r(   __setup_dataloader_from_config   s(   


z,WaveGlowModel.__setup_dataloader_from_configc                 C   s   |  || _d S r)   ),_WaveGlowModel__setup_dataloader_from_config	_train_dlr$   r   r'   r'   r(   setup_training_data   s   z!WaveGlowModel.setup_training_datac                 C   s   | j |ddd| _d S )NFr5   )ru   rv   )r   _validation_dlr   r'   r'   r(   setup_validation_data   s   z#WaveGlowModel.setup_validation_dataList[PretrainedModelInfo]c                 C   s(   g }t ddd| ddgd}|| |S )z
        This method returns a list of pre-trained model which can be instantiated directly from NVIDIA's NGC cloud.
        Returns:
            List of available pre-trained models.
        tts_en_waveglow_88mzhhttps://api.ngc.nvidia.com/v2/models/nvidia/nemo/tts_waveglow_88m/versions/1.0.0/files/tts_waveglow.nemozThis model is trained on LJSpeech sampled at 22050Hz, and has been tested on generating female English voices with an American accent and Mandarin voices.zWaveGlow-22050Hztts_waveglow)pretrained_model_namelocationdescriptionclass_aliases)r   r`   )clslist_of_modelsmodelr'   r'   r(   list_available_models   s   
z#WaveGlowModel.list_available_modelsc                    sF   t | jjjD ]}d| d|v r|d| d= qt j||d d S )Nzwaveglow.convinv.z.inv_conv.weight)strict)ranger   r"   n_flowsr   load_state_dict)r$   
state_dictr   ir%   r'   r(   r      s
   zWaveGlowModel.load_state_dictc                 C      | j S r)   r"   r$   r'   r'   r(   input_module      zWaveGlowModel.input_modulec                 C   r   r)   r   r   r'   r'   r(   output_module   r   zWaveGlowModel.output_modulec                 K   s   |    | jjdi | d S )Nr'   )update_bias_spectr"   _prepare_for_export)r$   kwargsr'   r'   r(   r      s   z!WaveGlowModel._prepare_for_exportc                 C   s"   t dt t dt t dddS )NrE   r>   TrA   rS   )r   r   r   r   r'   r'   r(   rF      s   

zWaveGlowModel.input_typesc                 C   s   | j tjks| j tjkr@tdt tdt gtt dgd}| j tjkr>tdt |d< tdt	 |d< tdt
 |d	< |S dtdt iS )
N)r>   	flowgroupr@   )elements_type)pred_normal_distr:   r;   rE   r<   )r>   r@   r?   r1   r>   r7   )r.   r   r*   r5   r   r   r   r   r   r   r   )r$   output_dictr'   r'   r(   rG      s   
zWaveGlowModel.output_typesc                 C   s   |  ||S r)   r   )r$   r1   r9   r'   r'   r(   forward_for_export   s   z WaveGlowModel.forward_for_exportr)   )rH   TrI   )Tr+   )rJ   r   )T)%__name__
__module____qualname____doc__r   r   r   r.   setterr   r=   r   r   r   rn   TensorfloatboolrR   r[   ra   rt   strr   r   r   classmethodr   r   propertyr   r   r   rF   rG   r   __classcell__r'   r'   r%   r(   r   &   s\    







r   )!rn   hydra.utilsr   lightning.pytorch.loggersr   	omegaconfr   r   (nemo.collections.tts.losses.waveglowlossr    nemo.collections.tts.models.baser   (nemo.collections.tts.parts.utils.helpersr   r	   nemo.core.classesr
   nemo.core.classes.commonr   r   nemo.core.neural_types.elementsr   r   r   r   r   r   "nemo.core.neural_types.neural_typer   
nemo.utilsr   r   r   r'   r'   r'   r(   <module>   s    