o
    iQ                     @   s   d dl Z d dlZd dlmZmZ d dlmZ d dlm  mZ d dl	m
Z
mZmZmZ d dlmZmZ dZdd Zd	d
 Zdd Zdd Zdd ZdddZdddZG dd deZdS )    N)nneinsum)Module)	rearrangerepeatpackunpack)SinusoidalEmbeddingsapply_rotary_pos_embg     jc                 C   s   | d uS N )valr   r   S/home/ubuntu/.local/lib/python3.10/site-packages/local_attention/local_attention.pyexists   s   r   c                 C   s   t | s|S | S r   )r   )valuedr   r   r   default      r   c                 C   s   | j | jdS )Ndevicedtyper   tr   r   r   to   s   r   c                 C   s   t | jj S r   )torchfinfor   max)tensorr   r   r   max_neg_value   r   r   c                 C   s   | j }tj| dd}||S )Ndim)r   F	normalizetype)r   r   normedr   r   r   l2norm   s   
r&   r   c                 C   sf   | j | }|| }| rd| fS t|| | }dd|  d }dtj| g |d|R |dfS )NF)r   r      Tr   r   )shape
is_integermathceilr"   pad)r   multipler!   r   seqlenm	remainder
pad_offsetr   r   r   pad_to_multiple#   s   
"r3      r'   c                 C   s^   t | j| d }tj| g |||R |d}|d|| d d}|d|||d S )N)r   r   r(   r4   r   )lenr)   r"   r-   unfoldmovedimflatten)xbackwardforward	pad_valuer!   dimspadded_xtensorsr   r   r   look_around,   s   r@   c                       sJ   e Zd Z													d
 fdd	Z				ddd	Z  ZS )LocalAttentionFr4   N        Tc                    s   t    t||rdnd}|r|dkrJ d|| _|| _|	| _|
| _|| _|| _|| _	t
|| _|| _d | _|| _|r]t|sFt|r_t|rN|d }t||t||d d| _d S d S d S )Nr   r4   z!you cannot look forward if causalr'   )use_xpos
scale_base)super__init__r   scalewindow_sizeautopadexact_windowsizecausallook_backwardlook_forwardr   Dropoutdropout	shared_qkrel_posrC   r   r	   )selfrH   rK   rL   rM   rO   rP   rel_pos_emb_configr!   rI   rJ   rG   use_rotary_pos_embrC   xpos_scale_base	__class__r   r   rF   5   s.   
zLocalAttention.__init__c           1         s  t ||}t|r jsJ d|j jdt | j j j j j	f\}}	}
}}}}}t
dd |||f\\}}\}}\}}|	r[|jd }t
 fdd|||f\\}}\}}\}}g |j|j|jR \}}}}}t  j|d }|| dksJ d	| d
| d|| |rt|}tj||d}t|d|d}t
fdd|||f\}}}|| }t|||
d}t|fi |}t|fi |}t jrވ |\}} t|||| d\}}|}!t|fi |}"t|!d}!t|"d}"|"|
k}#td||}$t|r|jd }%||% dksJ t|d||% d}|$| }$t|$}&|r.|!|"k}'|$|'t}$~'|rN|!|"k }( jrG j j })|(|!|"|) kB }(|$|(|&}$~(|sv jrv j j }* j j }+|"|+ |!k|!|"|* kB |#B },|$|,|&}$n|$|#|&}$t|r|jd }-||- dksJ ||jd  }.|	rt||ddd\}}t|d|d}t|fi i |ddi}t|d}t|d|.d}|$| |&}$~|$jdd}/ |/}/td|/|}0t|0d}0|	r|0d d d |d d f }0t|0|d ^}0}|0S )!NzAcannot perform window size extrapolation if xpos is not turned onr   c                 S   s   t | gdS )N* n d)r   r   r   r   r   <lambda>x   s    z(LocalAttention.forward.<locals>.<lambda>r4   c                    s   t |  jddS )Nr    )r3   rH   r   )rR   r   r   rY   ~   s    g      r   zsequence length z" must be divisible by window size z for local attention)r   z(w n) -> 1 w n)wnc                    s   t | d dS )Nzb (w n) d -> b w n d)r[   )r   r   )windowsr   r   rY      s    )r:   r;   r<   )rG   z... i -> ... i 1z... j -> ... 1 jzb h i e, b h j e -> b h i jzh i j -> (b h) 1 i j)bF)r!   r   z... (w n) -> (...) w nr<   zb ... -> (b h) ...)hr    zb h i j, b h j e -> b h i ezb w n d -> b (w n) drX   ) r   r   rC   r)   rI   rH   rK   rL   rM   rP   mapr   r   rG   r&   r   aranger   dictr@   rQ   r
   r   r   r   masked_fillTOKEN_SELF_ATTN_VALUErJ   r3   softmaxrO   r   )1rR   qkvmask
input_mask	attn_biasrH   r)   rI   r<   rK   rL   rM   rP   packed_shape_orig_seq_len
needed_padr^   r\   dim_headr   r   rG   seqb_tbqbkbvlook_around_kwargspos_emb
xpos_scalebq_tbq_kpad_masksimheads
mask_value	self_maskcausal_maskmax_causal_window_sizemax_backward_window_sizemax_forward_window_sizewindow_maskbatchr_   attnoutr   )rR   r]   r   r;   h   s   
	8&
*""









zLocalAttention.forward)Fr4   NrB   FNNFFNTFN)NNNN)__name__
__module____qualname__rF   r;   __classcell__r   r   rV   r   rA   4   s(    6rA   )r   r   )r4   r   r   r'   )r+   r   r   r   torch.nnr   torch.nn.functional
functionalr"   einopsr   r   r   r   local_attention.rotaryr	   r
   rd   r   r   r   r   r&   r3   r@   rA   r   r   r   r   <module>   s     

	