o
    i                     @   sb   d dl mZ d dlZd dlZd dlmZ d dlmZ ddlmZ G dd dZ	G d	d
 d
eZ
dS )    )OptionalN)Tensor)Module   )Errorsc                   @   s\   e Zd ZU eed< ee ed< defddZedefddZde	fdd	Z
ed
d ZdS )AttentionMask	bool_mask_logit_maskc                 C   s2   |j tjkr
td|| _tjtt d | _	d S )Nz7Expected the attention mask to be of dtype 'torch.bool')
dtypetorchbool
ValueErrorr   jitannotater   r   r	   )selfr    r   Y/home/ubuntu/.local/lib/python3.10/site-packages/curated_transformers/models/attention.py__init__   s   zAttentionMask.__init__returnc                 C   s4   | j d u rd| j  d | _ | j }|d usJ |S )Ng      ?g   )r	   r   int)r   
logit_maskr   r   r   r      s
   
zAttentionMask.logit_maskc                 C   s
   | j  S N)r   dimr   r   r   r   r       s   
zAttentionMask.dimc                 C   s   | j jS r   )r   shaper   r   r   r   r   #   s   zAttentionMask.shapeN)__name__
__module____qualname__r   __annotations__r   r   propertyr   r   r   r   r   r   r   r   r   
   s   
 r   c                
       sF   e Zd Zdddef fddZdededed	ed
ef
ddZ  ZS )ScaledDotProductAttentiong?)dropout_probr!   c                   s   t    tjj|d| _d S )N)p)superr   r   nnDropoutdropout)r   r!   	__class__r   r   r   *   s   
z"ScaledDotProductAttention.__init__kqv	attn_maskr   c                 C   sz   |  dkr
td|jd }||dd }|t| }|j\}}||j|dd|7 }|jdd}	| 	|	| }
|
S )zw
        Shapes:
            k, q, v - (batch, heads, seq_len, width)
            attn_mask - (batch, seq_len)
        r   z@The attention mask must be a 2D-tensor of shape [batch, seq_len]   )r   )
r   r   r   	transposemathsqrtr   viewsoftmaxr&   )r   r)   r*   r+   r,   	model_dimattn_scoresbatchseq_lenattn_weightsattn_valuesr   r   r   forward.   s   	

z!ScaledDotProductAttention.forward)	r   r   r   floatr   r   r   r;   __classcell__r   r   r'   r   r    )   s    r    )typingr   r1   r   r   torch.nnr   errorsr   r   r    r   r   r   r   <module>   s    