o
    wit                     @   s  d dl mZmZ d dlZd dlZd dlmZmZ d dlm	Z
 d dlmZmZmZ d dlmZmZmZ d dlmZmZmZ ejjdd	 ZG d
d dejjZG dd dejZG dd dejZG dd dejZdddefddZG dd dejjZ G dd dejjZ!G dd dejjZ"G dd dejjZ#G dd  d ejjZ$G d!d" d"ejjZ%G d#d$ d$ejjZ&G d%d& d&ejjZ'G d'd( d(ejjZ(dS ))    )OptionalTupleN)Tensornn)
functional)ConvNorm
LinearNormMaskedInstanceNorm1d)get_mask_from_lengthssort_tensorunsort_tensor)"piecewise_linear_inverse_transformpiecewise_linear_transform'unbounded_piecewise_quadratic_transformc                 C   s    t | }t |}|| }|S N)torchtanhsigmoid)input_ainput_bt_acts_actacts r   `/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/nemo/collections/tts/modules/common.pyfused_add_tanh_sigmoid_multiply!   s   

r   c                       s$   e Zd Z fddZdd Z  ZS )ExponentialClassc                    s   t t|   d S r   )superr   __init__)self	__class__r   r   r   *   s   zExponentialClass.__init__c                 C   s
   t |S r   )r   exp)r   xr   r   r   forward-   s   
zExponentialClass.forward__name__
__module____qualname__r   r$   __classcell__r   r   r    r   r   )   s    r   c                       s.   e Zd Zdddgf fdd	Zdd Z  ZS )
DenseLayer   c                    s@   t t|   |g|d d  }tdd t||D | _d S )Nc                 S   s   g | ]\}}t ||d dqS )T)bias)r   ).0in_sizeout_sizer   r   r   
<listcomp>6   s    z'DenseLayer.__init__.<locals>.<listcomp>)r   r*   r   r   
ModuleListziplayers)r   in_dimsizesin_sizesr    r   r   r   2   s
   
zDenseLayer.__init__c                 C   s   | j D ]	}t||}q|S r   )r4   r   r   )r   r#   linearr   r   r   r$   9   s   
zDenseLayer.forwardr%   r   r   r    r   r*   1   s    r*   c                
       s   e Zd Zd fdd	Zddeded	eeeef  d
efddZddeded	eeeef  d
efddZdeded
efddZ	deded
efddZ
  ZS )BiLSTM   spectral@   c                    s   t    tj|||ddd| _|d ur/d|v r"td tjjj}nd|v r/td tjjj	}|| jd || jd | jj
d	krE| jj
n| jj| _| j  d S )
NT)batch_firstbidirectionalr;   zApplying spectral norm to LSTMweightzApplying weight norm to LSTMweight_hh_l0weight_hh_l0_reverser   )r   r   r   LSTMbilstmprintr   utilsspectral_normweight_norm	proj_sizehidden_sizereal_hidden_sizeflatten_parameters)r   
input_sizerI   
num_layerslstm_norm_fnmax_batch_sizelstm_norm_fn_pntrr    r   r   r   @   s   

zBiLSTM.__init__Ncontextlenshxreturnc                 C   sD   t jjj||  ddd}| ||\}}t jjj|ddd S )NT)r=   enforce_sorted)r=   r   )r   rE   rnnpack_padded_sequencelongcpurC   pad_packed_sequence)r   rQ   rR   rS   seqret_r   r   r   lstm_sortedR   s   zBiLSTM.lstm_sortedc                 C   s*   t ||\}}}| j|||d}t||S )NrS   )r   r^   r   )r   rQ   rR   rS   
unsort_idsr\   r   r   r   lstmW   s   
zBiLSTM.lstmc                 C   s   |j }tjj| jjdd? |jd }|jtjd}| j	j
d |f}|jg || jR  |jg || j	jR  f}| j|||dj|dW  d    S 1 sOw   Y  d S )NFenabledr   dtype   r_   )re   r   ampautocastdevicetypeshapetofloat32rC   rM   	new_zerosrJ   rI   ra   )r   rQ   rR   re   rO   common_shaperS   r   r   r   lstm_nocast]   s   
$zBiLSTM.lstm_nocastc                 C   s,   | j   tj r| ||S | ||S r   )rC   rK   r   jit
is_tracingrp   ra   )r   rQ   rR   r   r   r   r$   l   s   

zBiLSTM.forward)r:   r;   r<   r   )r&   r'   r(   r   r   r   r   r^   ra   rp   r$   r)   r   r   r    r   r9   ?   s    ((r9   c                       sD   e Zd Z								d fdd	Zd	ed
edefddZ  ZS )ConvLSTMLinearNrf         皙?Fc	                    s   t t|   t|t|d d| _t | _|dkr"tj	|d| _
|d u }	t|D ]6}
t|
dkr3|n|||dt|d d dd|	||d
}|d urStd|| ntd| | j| q*d | _|d urqt||| _d S d S )	Nrf   r:   r   )prelu)kernel_sizestridepaddingdilationw_init_gainuse_weight_normuse_partial_paddingnorm_fnzApplying {} norm to {}zApplying weight norm to {})r   rs   r   r9   intrC   r   r2   convolutionsDropoutdropoutranger   rD   formatappenddenseLinear)r   r5   out_dimn_layers
n_channelsry   	p_dropoutr   r   r~   i
conv_layerr    r   r   r   t   s6   
zConvLSTMLinear.__init__rQ   rR   rT   c                 C   sx   t ||}|j|jdd}| jD ]}| t|||}q| j|	dd|d}| j
d ur:| 
|ddd}|S )Nrd   r:   rf   )rR   r   )r
   rl   re   	unsqueezer   r   Frx   rC   	transposer   permute)r   rQ   rR   maskconvr   r   r   r$      s   


zConvLSTMLinear.forward)NNrf   rt   ru   rv   FN)r&   r'   r(   r   r   r$   r)   r   r   r    r   rs   s   s    +rs   ru         c              	   C   s   t || ||dd|dS )Ng      ?T)r5   r   r   ry   r   r   r   )rs   )encoder_n_convolutionsencoder_embedding_dimencoder_kernel_sizer   r   r   r   get_radtts_encoder   s   r   c                       s6   e Zd Z fddZejjddddddZ  ZS )	Invertible1x1ConvLUSc                    s   t t|   tjt|| \}}t|dk r,d|d d df  |d d df< tj	t
| \}}}| d| t|d}tt||}| d| t|| _tt|| _tt|d| _d S )Nr   r,   rw   
lower_diagr:   )r   r   r   r   linalgqrFloatTensornormal_det	lu_unpackluregister_buffertrildiageyer   	Parameterlower
upper_diagtriuupper)r   cWr]   rw   r   r   r   r    r   r   r      s    zInvertible1x1ConvLUS.__init__cudaF)device_typerc   c                 C   s   t | jdt | j }t | jdt | j }t | j	t ||}|rNt
| ds<|  j|jd}|d | _tj|| jj|jdd ddd}|S |d }tj||d ddd}t t t | j}||fS )Nr:   r,   	W_inverserd   .Nr   r-   rz   r{   )r   r   r   r   r   r   r   r   mmrw   hasattrfloatinverserl   re   r   r   conv1dsumlogabs)r   zr   ULr   r   	log_det_Wr   r   r   r$      s   

 zInvertible1x1ConvLUS.forwardF)	r&   r'   r(   r   r   rg   rh   r$   r)   r   r   r    r   r      s    r   c                       s*   e Zd ZdZ fddZdddZ  ZS )Invertible1x1Convz
    The layer outputs both the convolution, and the log determinant
    of its weight matrix.  If inverse=True it does convolution with
    inverse
    c                    s   t t|   tjj||ddddd| _tt||	 d }t
|dk r8d|d d df  |d d df< |||d}|| jj_d S )Nr:   r   F)ry   rz   r{   r-   r,   )r   r   r   r   r   Conv1dr   r   r   r   r   viewr?   data)r   r   r   r    r   r   r      s    zInvertible1x1Conv.__init__Fc                 C   st   | j j }|r*t| ds|  j|jd}|d | _t	j
|| jd ddd}|S t| }|  |}||fS )Nr   rd   r   r:   r   r   )r   r?   squeezer   r   r   rl   re   r   r   r   r   logdetclone)r   r   r   r   r   r   r   r   r   r$      s   


zInvertible1x1Conv.forwardr   r&   r'   r(   __doc__r   r$   r)   r   r   r    r   r      s    r   c                       s>   e Zd Z						d fdd	Zddee fd	d
Z  ZS )SimpleConvNetrf   r   Tr+   c
                    s   t t|   tj | _|| _|| }
d}|	| _t	|D ].}|r%d| nd}t
|| | d }t||
d }| jt|
||d||dd|	d	 |}
qtjj||dd| _|rl| jj jd9  _| jj jd9  _d S d S )	Nr,   rf   r:   Trx   )ry   rz   r{   r|   r-   r}   r   )ry   r   )r   r   r   r   r   r2   r4   r   r   r   r   minr   r   r   
last_layerr?   r   r-   )r   n_mel_channelsn_context_dimfinal_out_channelsr   ry   with_dilationmax_channels	zero_initr   in_channelsout_channelsr   r|   r{   r    r   r   r   	  s:   zSimpleConvNet.__init__Nseq_lensc                 C   sR   t ||dj|jd}t| jD ]}| j| ||}t|}q| 	|}|S )Nr:   rd   )
r
   r   rl   re   r   r   r4   r   rx   r   )r   z_w_contextr   r   r   r   r   r   r$   4  s   
zSimpleConvNet.forward)rf   r   Tr+   TTr   )r&   r'   r(   r   r   r   r$   r)   r   r   r    r   r     s    +r   c                       sD   e Zd ZdZ			d fdd	Zddeeef d	efd
dZ  ZS )WNzT
    Adapted from WN() module in WaveGlow with modififcations to variable names
    r   softplusTc              
      s>  t t|   |d dksJ |d dksJ || _|| _tj | _tj | _	tj
|| |d}tjjj|dd}|| _tj | _|| _|| _tj
|d| d}	|	jj  |	jj  |	| _t|D ]4}
d|
 }t|| | d }t||||||dd}| j| t
||d}tj|}| j	| qhd S )Nrf   r:   r   r?   )nameT)ry   r|   r{   r   r~   )r   r   r   r   r   r   r   r2   	in_layersres_skip_layersr   rE   rG   startSoftplusr   affine_activationr   r?   r   zero_r-   endr   r   r   r   )r   n_in_channelsr   r   r   ry   r   r   r   r   r   r|   r{   in_layerres_skip_layerr    r   r   r   G  sD   
	zWN.__init__Nforward_inputr   c           
      C   s   |\}}t ||fd}| |}t |}d }| jr$t|d }t j}| j	dkr/| j
}t| jD ]}|| j| ||}|| j| |}	||	 }q4| |}|S )Nr:   r   )r   catr   
zeros_liker   r
   r   r   rx   r   r   r   r   r   r   r   )
r   r   r   r   rQ   outputr   non_linearityr   res_skip_actsr   r   r   r$   v  s    




z
WN.forward)r   r   Tr   )	r&   r'   r(   r   r   r   r   r$   r)   r   r   r    r   r   B  s    
$/r   c                       sN   e Zd Z												d fd
d	Zdd Zdd ZdddZ  ZS )SplineTransformationLayerARsimple_convr:   r"   r   r+         Fc              
      s   t t|   || _|
| _|| _|| _|| _|	| _t	| _
t| _|| _| jr3t| _
t| _d| j d | _| j| j }t|d||ddddd| _d S )Nrf   r:   r   FT)r   ry   r   r   )r   r   r   r   leftrightbottomtopn_binsr   	spline_fnr   inv_spline_fnuse_quadraticr   r   param_predictor)r   r   r   r   affine_modelry   
scaling_fnr   r   r   r   r   r   r   r   r   r    r   r   r     s2   z$SplineTransformationLayerAR.__init__c                 C   s8   |r|| j  | j| j   }|S || j | j| j  }|S r   )r   r   r   r   r   r   r   r   r   r   	normalize  s
   z%SplineTransformationLayerAR.normalizec                 C   s8   |r|| j | j  | j }|S || j| j  | j }|S r   )r   r   r   r   r   r   r   r   denormalize  s
   z'SplineTransformationLayerAR.denormalizec                 C   s  | d| d| d}}}| ||}| dk s#| dkr-td| |  |ddd|| d}| |}|ddd|| |d}	tj	j
| jjdd	I | jr|	d d d d d | jd f }
|	d d d d | jd d f }| j| |
 | |d
\}}n| | |	 \}}W d    n1 sw   Y  |||dddd}| ||}|r|S |||d}|ddd}||t| j| j t| j| j    }||fS )Nr   r:   rf   g              ?zspline z scaled beyond [0, 1]r,   Frb   r   )sizer   r   maxrD   r   reshaper   r   rg   rh   ri   rj   r   r   r   r   r   npr   r   r   r   r   )r   r   rQ   r   b_sc_st_s
z_reshapedaffine_paramsq_tildewv	z_tformedlog_sr   r   r   r$     s.   "
  $,z#SplineTransformationLayerAR.forward)r   r:   r"   r   r+   r   r   r   r   r   Fr   )r&   r'   r(   r   r   r   r$   r)   r   r   r    r   r     s     0	r   c                       s>   e Zd Z												d fd
d	ZdddZ  ZS )SplineTransformationLayerTr   r"   r   r+   r      Fc              	      s   t t|   || _t|d | _|
| _|| _|| _|| _	|	| _
t| _t| _|| _| jr:t| _t| _d| j
 d | _
| j| j
 }t| j|||||dd| _d S )Nrf   r:   F)r   ry   r   )r   r  r   r   r   half_mel_channelsr   r   r   r   r   r   r   r   r   r   r   r   r   )r   r   r   r   r   ry   r   r   r   r   r   r   r   r   r   r   r    r   r   r     s2   z"SplineTransformationLayer.__init__Nc                 C   sr  | d| d| d}}}| j}|d d d |f |d d |d f }	}
|r7|
| j | j| j  }
n|
| j | j| j  }
t|	|fd}| ||}|
	ddd
|| d}|	ddd
|| || j}tjj| jjdda | jr|d d d d d | jd f }|d d d d | jd d f }| j| | | |d\}}|st|d}n|r| | | d\}}n| | | \}}W d    n1 sw   Y  |
||d	ddd}
|r|
| j| j  | j }
tj|	|
fdd}|S |
| j| j  | j }
tj|	|
fdd}|
||d|t| j| j t| j| j    }||fS )	Nr   r:   rf   r,   Frb   r   dim)r   r  r   r   r   r   r   r   r   r   r  r   rg   rh   ri   rj   r   r   r   r   r   r   r  r   )r   r   rQ   r   r   r  r  r  n_halfz_0z_1r   r  z_1_reshapedr	  r
  r  z_1_tformedr  _dcr   r   r   r$     sD   "*  ""z!SplineTransformationLayer.forward)Tr   r"   r   r+   r   r  r  r  r  FFNr%   r   r   r    r   r    s    /r  c                       s>   e Zd Z							d fdd		Zd
d ZdddZ  ZS )AffineTransformationLayerr   Tr   r"   r   r+   Fc              	      s   t t|   |dvrtd|t|tr(tdd |D s'td|n|dvr3td||| _|| _	|dkrMt
t|d |||	||
d	| _n|d
krbtt|d ||||||
d| _ntd| d|| _d S )N)wavenetr   z{} affine model not supportedc                 S   s   g | ]}|d v qS )	translater"   r   r   r   )r.   r#   r   r   r   r1   ]  s    z6AffineTransformationLayer.__init__.<locals>.<listcomp>z{} scaling fn not supportedr  r  rf   )r   r   r   r   r   )r   ry   r   Affine model is not supported: z9. Please choose either 'wavenet' or'simple_conv' instead.)r   r  r   	Exceptionr   
isinstancelistallr   r   r   r   affine_param_predictorr   
ValueErrorr   )r   r   r   r   r   r   ry   r   r   r   r   r    r   r   r   L  sD   







z"AffineTransformationLayer.__init__c           
      C   s  | j dkrt|d }|d }||fS | j dkr$t|}|}||fS | j dkr;t|d d }t|}||fS | j dkrRt|d d }t|}||fS t| j trg g }}t|j	d D ]~}| j | }|dkrt|d | d }|d d |f d }	nG|dkrt|d d |f }|d d |f }	n/|dkrt|d d |f d d }t|}	n|dkrt|d d |f d }t|}	|
|d d d f  |
|	d d d f  qdtj|dd	}tj|dd	}||fS td
| j  d)Nr  r   r"   r   r:   gư>r   
   r  z#Scaling function is not supported: zH. Please choose either 'translate', 'exp', 'tanh', or 'sigmoid' instead.)r   r   r"   r   r   r   r!  r"  r   rk   r   r   r%  )
r   scale_unconstrainedsr  s_list
log_s_listr   	scaling_is_ilog_s_ir   r   r   get_scaling_and_logs  sP   
#

 






z.AffineTransformationLayer.get_scaling_and_logsNc                 C   s   t | jd }|d d d |f |d d |d f }}| jdkr+| j||f|d}n| jdkr@t||fd}	| j|	|d}n	td| j d|d d d |d d f }
|d d |d d d f }| |
\}}|r}|| | }tj||fdd}|S || | }tj||fdd}||fS )	Nrf   r  )r   r   r:   r  z:. Please choose either 'wavenet' or 'simple_conv' instead.r  )r   r   r   r$  r   r   r%  r.  )r   r   rQ   r   r   r  r  r  r  r   r'  br(  r  r   r   r   r$     s(   *

z!AffineTransformationLayer.forward)r   Tr   r"   r   r+   Fr  )r&   r'   r(   r   r.  r$   r)   r   r   r    r   r  K  s    4)r  c                       s(   e Zd Zd
 fdd	Zddd	Z  ZS )ConvAttentionP      r   r   c                    s   t t|   || _tjjdd| _tjjdd| _	t
|| _tt||d ddddtj t|d |ddd| _tt||d ddddtj t|d |dddtj t||ddd| _d S )	Nru   r  rf   Trx   )ry   r-   r}   r:   )ry   r-   )r   r0  r   temperaturer   r   Softmaxsoftmax
LogSoftmaxlog_softmaxr   
query_proj
Sequentialr   ReLUkey_proj)r   r   n_speaker_dimn_text_channelsn_att_channelsr3  r    r   r   r     s"   

zConvAttention.__init__Nc                 C   s   d}|  |}| |}	|	dddddddf |dddddf  d }
d}| |
jddd }
|durI| |
t|dddf |  }
|
 }|durc|
j|	ddd
dtd	  | |
}
|
|fS )
a  Attention mechanism for radtts. Unlike in Flowtron, we have no
        restrictions such as causality etc, since we only need this during
        training.

        Args:
            queries (torch.tensor): B x C x T1 tensor (likely mel data)
            keys (torch.tensor): B x C2 x T2 tensor (text data)
            query_lens: lengths for sorting the queries in descending order
            mask (torch.tensor): uint8 binary mask for variable length entries
                                 (should be in the T2 domain)
        Output:
            attn (torch.tensor): B x 1 x T1 x T2 attention mask.
                                 Final dim T2 should sum to 1
        gMb@?Nrf   g:0yE>r:   T)keepdimr   inf)r;  r8  r   r7  r   r   r   r   masked_fill_r   r   r   r5  )r   querieskeys
query_lensr   key_lens
attn_priortempkeys_encqueries_encattnepsattn_logprobr   r   r   r$     s   

6$$
zConvAttention.forward)r1  r2  r   r1  r   )NNNr%   r   r   r    r   r0    s    r0  c                       s*   e Zd ZdZd fdd	Zdd Z  ZS )GaussianDropouta  
    Gaussian dropout using multiplicative gaussian noise.

    https://keras.io/api/layers/regularization_layers/gaussian_dropout/

    Can be an effective alternative bottleneck to VAE or VQ:

    https://www.deepmind.com/publications/gaussian-dropout-as-an-information-bottleneck-layer

    Unlike some other implementations, this takes the standard deviation of the noise as input
    instead of the 'rate' typically defined as: stdev = sqrt(rate / (1 - rate))
    r   c                    s   t t|   || _d S r   )r   rM  r   stdev)r   rN  r    r   r   r     s   
zGaussianDropout.__init__c                 C   s.   | j s|S tjd| j|j|jd}|| }|S )Nr   )meanstdr   ri   )trainingr   normalrN  rk   ri   )r   inputsnoiseoutr   r   r   r$     s
   zGaussianDropout.forward)r   r   r   r   r    r   rM    s    rM  ))typingr   r   numpyr  r   r   r   torch.nnr   r   'nemo.collections.tts.modules.submodulesr   r   r	   (nemo.collections.tts.parts.utils.helpersr
   r   r   (nemo.collections.tts.parts.utils.splinesr   r   r   rq   scriptr   Moduler   r*   r9   rs   r   r   r   r   r   r   r  r  r0  rM  r   r   r   r   <module>   s8   
49
&&:J`_z=