o
    }oi@)                     @   s   d Z ddlZddlZddlZddlmZ ddlmZm	Z	 dd Z
G dd dejjZG dd	 d	ejZG d
d dejZG dd dejjZG dd dejZG dd dejZG dd dejZdS )z]
Adapted from:
https://github.com/openai/guided-diffusion/blob/main/guided_diffusion/unet.py
    N)
custom_bwd
custom_fwdc                 C   sL   |d j ^}}}tt|}d| |d  | }|  jt|g7  _dS )a(  
    A counter for the `thop` package to count the operations in an
    attention operation.
    Meant to be used like:
        macs, params = thop.profile(
            model,
            inputs=(inputs, timestamps),
            custom_ops={QKVAttention: QKVAttention.count_flops},
        )
    r      N)shapeintnpprod	total_opstorchDoubleTensor)model_xybcspatialnum_spatial
matmul_ops r   y/home/ubuntu/.local/lib/python3.10/site-packages/nemo/collections/multimodal/modules/imagen/diffusionmodules/attention.pycount_flops_attn   s   r   c                   @   s$   e Zd Zedd Zedd ZdS )StableAttentionOpc              	   C   s8   t d||t|jd  jdd}| ||| |S )Nncq,nck->nqk   r   dim)r
   einsummathsqrtr   softmaxsave_for_backward)ctxqkwr   r   r   forward5   s   &zStableAttentionOp.forwardc           	      C   s   | j \}}}| jtdddgddjdd}|| }tj||d|jd|j}|t	
|jd  }td	||| }td
||| }||fS )Ninfr   r   Tr   keepdim-C6?mingrad_outputoutputr   input_dtypenck,nqk->ncqncq,nqk->nck)saved_tensorsdetachnormfloatclipr
   _softmax_backward_datadtypetor   r   r   r   )	r!   dwr"   r#   r$   sdbdqdkr   r   r   backward;   s   $zStableAttentionOp.backwardN)__name__
__module____qualname__staticmethodr%   r?   r   r   r   r   r   /   s
    
r   c                       4   e Zd ZdZ fddZdd Zedd Z  ZS )QKVStableAttentionP
    A module which performs QKV attention and splits in a different order.
    c                       t    || _d S Nsuper__init__n_headsselfrL   	__class__r   r   rK   R      

zQKVStableAttention.__init__c              	   C   s   |j \}}}|d| j  dksJ |d| j  }|jddd\}}}||| j ||}||| j ||}t||}	td|	||| j ||}
|
|d||	fS )
        Apply QKV attention.

        :param qkv: an [N x (3 * H * C) x T] tensor of Qs, Ks, and Vs.
        :return: an [N x (H * C) x T] tensor after attention.
           r   r   r   bts,bcs->bct)r   rL   chunkreshaper   applyr
   r   )rN   qkvbswidthlengthchr"   r#   vweightar   r   r   r%   V   s   zQKVStableAttention.forwardc                 C      t | ||S rH   r   r   r   r   r   r   r   count_flopsn      zQKVStableAttention.count_flops	r@   rA   rB   __doc__rK   r%   rC   rd   __classcell__r   r   rO   r   rE   M   s    rE   c                       rD   )QKVAttentionrF   c                    rG   rH   rI   rM   rO   r   r   rK   x   rQ   zQKVAttention.__init__c              	   C   s   |j \}}}|d| j  dksJ |d| j  }|jddd\}}}dtt| }	td||	 || j ||||	 || j ||}
tj|
	 dd
|
j}
td|
||| j ||}||d||
fS )rR   rS   r   r   r   bct,bcs->btsrU   rT   )r   rL   rV   r   r   r
   r   viewr   r5   typer8   rW   )rN   rY   rZ   r[   r\   r]   r"   r#   r^   scaler_   r`   r   r   r   r%   |   s   zQKVAttention.forwardc                 C   ra   rH   rb   rc   r   r   r   rd      re   zQKVAttention.count_flopsrf   r   r   rO   r   ri   s   s    ri   c                   @   s,   e Zd Zeedd Zeedd ZdS )StableMaskedAttentionOpc              	   C   s\   t d }td||t|jd  }|||}|jdd}| }| 	|||| |S )Nr&   r   r   r   r   )
r5   r
   r   r   r   r   masked_fillr   nan_to_num_r    )r!   r"   r#   maskmax_neg_valuer$   r   r   r   r%      s   
zStableMaskedAttentionOp.forwardc                 C   s   | j \}}}}t|jj }| jtdddgddjdd}|| }tj	||d|jd}|
 |d	}	|t|jd  }td
||	| }
td||	| }|| |
  | }|
||fS )Nr&   r   r   Tr'   r)   r*   r,   r   r0   r1   )r2   r
   finfor8   maxr3   r4   r5   r6   r7   clonemasked_fill_r   r   r   r   )r!   r:   r"   r#   r$   rq   rr   r;   r<   db_inr=   r>   dmaskr   r   r   r?      s   $
z StableMaskedAttentionOp.backwardN)r@   rA   rB   rC   r   r%   r   r?   r   r   r   r   rn      s    rn   c                       rD   )QKVMaskedAttentionY
    A module which performs QKV attention.
    Attention mask is accepted as input.
    c                    rG   rH   rI   rM   rO   r   r   rK      rQ   zQKVMaskedAttention.__init__c              	   C   s  |j \}}}|j \}}}	|| j dksJ || j }
dtt|
 }td|| || j |
||| || j |
|	}|j| jdd}|j |j ksOJ td }|	| |}tj
| dd|j}| }td|||| j |
|	}||d||fS )M  
        Apply QKV attention with attention mask.

        Args:
            q: an [N x d x n_seq1] of queries.
            k: an [N x d x n_seq2] of keys.
            v: an [N x d x n_seq2] of values.
            mask: Attention mask of size N x n_seq1 x n_seq2

        Returns: an [N x d x n_seq1] tensor after attention.
        r   r   rj   r   r&   rU   rT   )r   rL   r   r   r
   r   rk   repeat_interleaver5   ro   r   rl   r8   rp   rW   )rN   r"   r#   r^   rq   rZ   r[   length_q_length_kr]   rm   r_   rr   r`   r   r   r   r%      s$   

zQKVMaskedAttention.forwardc                 C   ra   rH   rb   rc   r   r   r   rd      re   zQKVMaskedAttention.count_flopsrf   r   r   rO   r   ry      s    )ry   c                       rD   )QKVStableMaskedAttentionrz   c                    rG   rH   rI   rM   rO   r   r   rK      rQ   z!QKVStableMaskedAttention.__init__c              	   C   s   |j \}}}|j \}}}	|| j dksJ || j }
||| j |
|}||| j |
|	}|j| jdd}t||| }td|||| j |
|	}||d||fS )r{   r   r   rT   rU   )	r   rL   rk   r|   rn   rX   r
   r   rW   )rN   r"   r#   r^   rq   rZ   r[   r}   r~   r   r]   r_   r`   r   r   r   r%      s   
z QKVStableMaskedAttention.forwardc                 C   ra   rH   rb   rc   r   r   r   rd     re   z$QKVStableMaskedAttention.count_flopsrf   r   r   rO   r   r      s    r   c                       s(   e Zd ZdZ fddZdd Z  ZS )SelfAttentionPoolingz
    Implementation of SelfAttentionPooling 
    Original Paper: Self-Attention Encoding and Pooling for Speaker Recognition
    https://arxiv.org/pdf/2008.01077v1.pdf
    Taken from: https://gist.github.com/pohanchi/c77f6dbfbcbc21c5215acde4f62e4362
    c                    s    t t|   t|d| _d S )Nr   )rJ   r   rK   nnLinearW)rN   	input_dimrO   r   r   rK   *  s   zSelfAttentionPooling.__init__c                 C   s<   t jj}|| |dddd}tj|| dd}|S )z
        input:
            batch_rep : size (N, T, H), N: batch size, T: sequence length, H: Hidden dimension
        
        attention_weight:
            att_w : size (N, T, 1)
        
        return:
            utter_rep: size (N, H)
        rU   r   r   )r   
functionalr   r   squeeze	unsqueezer
   sum)rN   	batch_repr   att_w	utter_repr   r   r   r%   .  s   zSelfAttentionPooling.forward)r@   rA   rB   rg   rK   r%   rh   r   r   rO   r   r   "  s    r   )rg   r   numpyr   r
   torch.nnr   torch.cuda.ampr   r   r   autogradFunctionr   ModulerE   ri   rn   ry   r   r   r   r   r   r   <module>   s   &#&8.