o
    }oi*                     @   s   d dl mZ d dlZd dlmZmZ d dlmZmZm	Z	 d dl
mZmZmZ d dlmZmZmZmZmZ d dlmZ G dd	 d	eeZdS )
    )TupleN)Invertible1x1ConvWaveNet)OperationModeremove
split_view)
ExportableNeuralModule	typecheck)AudioSignalIntTypeMelSpectrogramTypeNormalDistributionSamplesTypeVoidType)
NeuralTypec                       s   e Zd Zdedededededededef fd	d
Z fddZe d%ddZedd Z	edd Z
d&ddZdejdejdeejeef fddZdddd efd!d"Zd#d$ Z  ZS )'WaveGlowModulen_mel_channelsn_flowsn_groupn_early_everyn_early_sizen_wn_channelsn_wn_layerswn_kernel_sizec	              
      st  t    tjj||ddd| _|| _|d dksJ || _|| _|| _	|| _
tj | _tj | _tj| _|d }	|}
t|D ]2}|| j	 dkr\|dkr\|	t| j
d  }	|
| j
 }
| jt|
 | jt|	|| |||d qA|
| _| jjd | jjd  | _g }| jd }	tt| jD ]}||	 || j	 dkr|dkr|	t| j
d  }	q|  || _d| _dS )	a  
        WaveGlow module

        Args:
            n_mel_channels (int): Number of mel channels to output.
            n_flows (int): Number of flow layers
            n_group (int): Number of groups to respace the inputs
            n_early_every (int): Every n_early_every layers, n_early_size gets skip connected to the output
            n_early_size (int): The size of the chunk to be skip connected
            n_wn_channels (int): Number of channels for the non-invertible wavenet transformation
            n_wn_layers (int): Number of layers for the non-invertible wavenet transformation
            wn_kernel_size (int): Kernel size for the non-invertible wavenet transformation
        i      )stride   r   )n_layers
n_channelskernel_sizeFN)super__init__torchnnConvTranspose1dupsampler   r   r   r   r   
ModuleListwavenetconvinvr   infermoderangeintappendr   r   n_remaining_channelsr   r   time_cutoffreversedreversen_halvesremoved_weightnorm)selfr   r   r   r   r   r   r   r   n_halfr.   kr2   	__class__ Y/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/tts/modules/waveglow.pyr!       sN   

	


zWaveGlowModule.__init__c                    s   |    t jdi | dS )z
        Override this method to prepare module for export. This is in-place operation.
        Base version does common necessary module replacements (Apex etc)
        Nr9   )remove_weightnormr    _prepare_for_export)r4   kwargsr7   r9   r:   r<   g   s   z"WaveGlowModule._prepare_for_exportNT      ?c           
      C   s   | j r| jtj krt|  d| j s | jtj kr t|  dtd}|r/| j|||d}|durI| jtjkrI| j||d\}}}	|||	|fS |S )z TODO
        zM has self.training set to True but self.OperationMode was not set to trainingzJ has self.training set to False but self.OperationMode was set to training)   r?   )specsigmazN)r@   audio)	trainingr*   r   
ValueErrorr"   zerosnorm_dist_to_audior)   audio_to_normal_dist)
r4   r@   rB   rC   run_inverserA   
audio_predz1
log_s_listlog_det_W_listr9   r9   r:   forwardo   s   
zWaveGlowModule.forwardc                 C   sr   | j tjkrtdt tdt ddtdddS tdt tdt ddtdt ddtt ddtdddS )N)BDTT)optional)r@   rB   rA   rO   rQ   )elements_typerR   )r@   rB   rC   rI   rA   )r*   r   r)   r   r   r   r   r4   r9   r9   r:   input_types   s   

zWaveGlowModule.input_typesc                 C   sZ   | j tjks| j tjkr%tdt tdt gtt dgtdt dS dtdt iS )N)rO   	flowgrouprQ   )rT   rS   )pred_normal_distrL   rM   rJ   rC   )r*   r   rD   
validationr   r   r   r   rU   r9   r9   r:   output_types   s   

zWaveGlowModule.output_typesr?   r   c                 C   s`   t |  }tj|| j|f|j|jd}tj|| j|| jjd  | j	 f|j|jd}||dS )zs
        Generates input examples for tracing etc.
        Returns:
            A tuple of input examples.
        )devicedtyper   )r@   rB   )
next
parametersr"   randnr   r[   r\   r%   r   r   )r4   	max_batchmax_dimparmelrB   r9   r9   r:   input_example   s   
zWaveGlowModule.input_exampler@   rC   returnc                C   s  |  |}|d|dksJ |d|dkr+|d d d d d |df }t|| jddddd}| |d|dd}|ddd}t|| jdddd}g }g }g }t| jD ]}|| j	 dkr|dkr|
|d d d | jd d f  |d d | jd d d f }| j| |\}}|
| t|dd }|d d d |d d f }	|d d |d d d f }
| j| |	|f}|d d |d d d f }|d d d |d d f }t||
 | }
|
| t|	|
gd}qe|
| t|d||fS )Nr   r?   r      )r%   sizer   r   permute
contiguousviewr+   r   r   r-   r   r(   r,   r'   r"   expcat)r4   r@   rC   output_audiorL   rM   r6   	log_det_Wr5   audio_0audio_1outputlog_sbr9   r9   r:   rH      s8   
 "


z#WaveGlowModule.audio_to_normal_dist)rB   rA   rA   c                C   s  |  |}| |d|dd}| jdkr'|d d d d d | jf }t|| jddddd}| |d|dd}|ddd}t	|d| j|dg}|d u rk|tj
||jd|j }t|| j|d| j gd\}}tt| jD ]q}| j| }t|||d| gd\}}	| j| ||f}
t|
||
d| gd\}}|	| }	|	t| }	t||	fd}| j| |dd}|| j dkr|dkrt|| j|d| j gd\}}t||fd}q|ddd |ddS )	Nr   r?   rg   r   rf   )r[   T)r1   )r%   rj   rk   rh   r/   r   r   ri   r"   Sizer_   r[   tor\   splitr.   r0   r+   r   r2   r'   rl   rm   r(   r   r   )r4   r@   rB   rA   z_sizerC   r6   r5   rp   rq   rr   rt   srK   r9   r9   r:   rG      s2   

$
  $ z!WaveGlowModule.norm_dist_to_audioc                 C   s\   | j rd S | jD ] }tjj|j|_t|j|_tjj|j	|_	t|j
|_
qd| _ d S )NT)r3   r'   r"   r#   utilsremove_weight_normstartr   	in_layers
cond_layerres_skip_layers)r4   r'   r9   r9   r:   r;      s   

z WaveGlowModule.remove_weightnorm)NNTr>   )r?   r   )__name__
__module____qualname__r,   r!   r<   r
   rN   propertyrV   rZ   rd   r"   Tensorr   listrH   floatrG   r;   __classcell__r9   r9   r7   r:   r      s:    	G


&(#r   )typingr   r"   'nemo.collections.tts.modules.submodulesr   r   (nemo.collections.tts.parts.utils.helpersr   r   r   nemo.core.classesr   r	   r
   nemo.core.neural_types.elementsr   r   r   r   r   "nemo.core.neural_types.neural_typer   r   r9   r9   r9   r:   <module>   s   