o
    }oi                      @   s   d dl Z d dlZd dlmZ d dlmZ d dlmZ defddZG dd	 d	ej	Z
G d
d dej	Zdd ZdddZG dd dej	ZG dd dej	ZG dd dej	ZG dd dej	ZG dd dej	ZdS )    N)nn)ConvNorm)PositionalEmbeddingreturnc                 C   s(   |dkr|dkrt d|| d  d S )N   z-Only stride OR dilation may be greater than 1   )
ValueError)kernel_sizestridedilation r   Z/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/tts/modules/mixer_tts.pyget_same_padding   s   r   c                       $   e Zd Z fddZdd Z  ZS )SameLensMaskedConv1dc              	      s(   t    tj|||||||d| _d S )Nr	   r
   r   paddinggroups)super__init__r   Conv1dconv)selfin_channelsout_channelsr	   r
   r   r   r   	__class__r   r   r      s   
zSameLensMaskedConv1d.__init__c                 C   s&   |  |dddd| }||fS )Nr   r   )r   	transposer   xmaskr   r   r   forward,   s   zSameLensMaskedConv1d.forward__name__
__module____qualname__r   r!   __classcell__r   r   r   r   r      s    r   c                       r   )SameLensMaskedLinearc                    s   t    t||| _d S N)r   r   r   Linearlinear)r   r   r   r   r   r   r   2   s   
zSameLensMaskedLinear.__init__c                 C   s   |  || }||fS r(   )r*   r   r   r   r   r!   6   s   zSameLensMaskedLinear.forwardr"   r   r   r   r   r'   1   s    r'   c                 C   s
   t | |S r(   )r'   in_featout_featr   r   r   create_channel_mix_layer;   s   
r.      r   
depth-wisec           	   	   C   sH   t |||d}|dkrd}n	|dkr| }ntt| ||||||d}|S )N)r
   r   originalr   r0   r   )r   NotImplementedErrorr   )	r,   r-   r	   r
   	conv_typer   r   r   r   r   r   r   create_time_mix_layer?   s   r4   c                       s&   e Zd Z fddZdddZ  ZS )Mixc                    s<   t    || _t | _t|| _|| _t|| _	d S r(   )
r   r   first_mix_layerr   GELUactDropoutdrop_1second_mix_layerdrop_2)r   r6   r;   dropoutr   r   r   r   Q   s   

zMix.__init__Nc                 C   sF   |  ||\}}| |}| |}| ||\}}| |}||fS r(   )r6   r8   r:   r;   r<   r   r   r   r   r!   Z   s   


zMix.forwardr(   r"   r   r   r   r   r5   P   s    	r5   c                       r   )PreNormResidualc                    s    t    || _t|| _d S r(   )r   r   fnr   	LayerNormnorm)r   r?   feature_dimr   r   r   r   d   s   
zPreNormResidual.__init__c                 C   s&   |  | ||\}}|| }||fS r(   )r?   rA   )r   r   r    new_xr   r   r   r!   i   s   zPreNormResidual.forwardr"   r   r   r   r   r>   c   s    r>   c                       r   )MixerTTSBlockc              	      sn   t    ttt||||dt||||d|d|d| _ttt||| dt|| |d|d|d| _d S )N)r,   r-   r	   r3   )r6   r;   r=   )r?   rB   r+   )r   r   r>   r5   r4   time_mixr.   channel_mix)r   r,   expansion_factorr	   r3   r=   r   r   r   r   p   s*   
	zMixerTTSBlock.__init__c                 C   s(   |  ||\}}| ||\}}||fS r(   )rE   rF   r   r   r   r   r!      s   zMixerTTSBlock.forwardr"   r   r   r   r   rD   o   s    rD   c                       s0   e Zd Z				d	 fdd	Zd
ddZ  ZS )MixerTTSModuler   r0              c	           	         sr   t    t||krt| _|dkrtj||dnt | _tj	 fdd|D  | _
t| _d S )N)padding_idxc                    s   g | ]
}t | qS r   )rD   ).0r	   r3   r=   rG   rB   r   r   
<listcomp>   s    z+MixerTTSModule.__init__.<locals>.<listcomp>)r   r   lenr   d_modelr   	EmbeddingIdentityto_embed
Sequentialmixer_blocksr@   rA   )	r   
num_tokensrB   
num_layerskernel_sizesrL   r3   rG   r=   r   rN   r   r      s   
zMixerTTSModule.__init__c                 C   sF   |  |}|| }|| }| jD ]	}|||\}}q| |}||fS r(   )rT   rV   rA   )r   r   r    conditioningblocklensr   r   r   r!      s   


zMixerTTSModule.forward)r   r0   rI   rJ   )r   r"   r   r   r   r   rH      s    rH   c                       s,   e Zd ZdZd	 fdd	Zd
ddZ  ZS )SelfAttentionModulez'Self-attention for lm tokens and text.      c              
      s   t    t|| _t|| _tt||ddddtj	 t||ddd| _
tt||ddddtj	 t||ddd| _tt||ddddtj	 t||ddd| _t|| _d S )Nr/   Trelu)r	   biasw_init_gainr   )r	   ra   )r   r   r   text_pos_emb
lm_pos_embr   rU   r   torchReLU
query_projkey_proj
value_projmathsqrtscale)r   n_text_channelsn_lm_tokens_channelsr   r   r   r      s&   


zSelfAttentionModule.__init__Nc                 C   s$  t j|d|jd|j}t j|d|jd|j}| |}| |}	|dur3||d }|dur>|	|d }	|| 	dd}||	 	dd}||	 	dd}| 
|	dd}
| |}| |	dd}t |
|| j }|dur||d td  t t j|dd|S )	a  Forward pass of self-attention.

        Args:
            queries (torch.tensor): B x T1 x C1 tensor
            keys (torch.tensor): B x T2 x C2 tensor
            values (torch.tensor): B x T2 x C2 tensor
            q_mask (torch.tensor): B x T1 tensor, bool mask for variable length entries
            kv_mask (torch.tensor): B x T2 tensor, bool mask for variable length entries
        Output:
            attn_out (torch.tensor): B x T1 x C1 tensor
        )deviceNr   r   rK   inf)dim)re   arangesizerp   todtyperc   rd   	unsqueezer   rg   rh   ri   matmulrl   masked_fill_floatsoftmax)r   querieskeysvaluesq_maskkv_mask	pos_q_seq
pos_kv_seq	pos_q_emb
pos_kv_embqueries_enckeys_enc
values_encscoresr   r   r   r!      s$   


zSelfAttentionModule.forward)r^   r_   )NN)r#   r$   r%   __doc__r   r!   r&   r   r   r   r   r]      s    r]   )r/   r   r0   r   )rj   re   r   'nemo.collections.tts.modules.submodulesr   (nemo.collections.tts.modules.transformerr   intr   Moduler   r'   r.   r4   r5   r>   rD   rH   r]   r   r   r   r   <module>   s   

 *