o
     i&                     @   s  d dl Z d dlmZ d dlZd dlZd dlmZmZm	Z	m
Z
 dd Zddddd	Zd
dddZddddZdd Zdd ZdejdejfddZdededejfddZdededejfddZdedejfd d!ZdJd"d#ZdJd$d%ZdJd&d'ZdKd(d)ZdJd*d+Zd,d- ZdLd.d/ZdMd1d2Zd3d4 Z d5ejd6edejfd7d8Z!d9ed:ej"dejfd;d<Z#d=ed6ed>efd?d@Z$d=ed6ed>efdAdBZ%d=ed6ed>efdCdDZ&d=ed6ed>efdEdFZ'dGejd6efdHdIZ(dS )N    N)List)BigBirdSparsityConfigBSLongformerSparsityConfigFixedSparsityConfigVariableSparsityConfigc                  G   s   dd | D }t j| S )Nc                 S   s   g | ]}t |qS  )torcharange).0sr   r   d/home/ubuntu/.local/lib/python3.10/site-packages/xformers/components/attention/attention_patterns.py
<listcomp>   s    z%_generate_nd_grid.<locals>.<listcomp>)r   meshgrid)sizescoordsr   r   r   _generate_nd_grid   s   
r          @pweightsc                 G   sj   |d u r
dt | }t |t |ksJ t| }dd t||D }tj|dd }tj||| d}|S )N   c                 S   s   g | ]
\}}|  | qS r   )flatten)r
   iwr   r   r   r       s    z%local_nd_distance.<locals>.<listcomp>r   )dimr   )lenr   zipr   stackfloatcdist)r   r   r   griddr   r   r   local_nd_distance   s   r$   r   sigmac                 G   s,   t |ddid }td| d  | }|S )Nr   r      g      g       )r$   r   exp)r&   r   r#   r   r   r   local_nd_gaussian_distribution&   s   r)   r   c                 G   s   t |d|i}|| k S )Nr   r$   )distancer   r   r#   r   r   r   local_nd_pattern,   s   r,   c                  G   s   t | ddi}|dk S )Nr   r   r'   r*   )r   r#   r   r   r   axial_nd_pattern1   s   r-   c                 C   s~   t j| t jd}|  dkr,|  } | |   } tjj|  || 	 dd}t 
|}n
t j| 	 |dd}d|d|< |S )Ndtypei   F)r   replace)replacementT)r   
zeros_likeboolnumeldoublesumnprandomchoicer   	as_tensormultinomialview)dist_matrixnnzattidxsr   r   r   &random_pattern_from_probability_matrix7   s   rB   attention_query_maskreturnc                 C   sB   | j dksJ | jtjksJ | d d d f } | | ddB }|S )Nr   r   )ndimr/   r   r4   	transpose)rC   maskr   r   r   global_token_patternL   s
   rH   	attn_sizesparsityc                 C   s0   d|  k rdk sJ  J t | | |k}|S )Nr   r   )r   rand)rI   rJ   rG   r   r   r   random_patternT   s   rL   window_sizec                 C   s.   |d dks
J d|d d }t | |ddS )Nr'   r   zFThe window size is assumed to be odd (counts self-attention + 2 wings)g      ?r+   r   r,   )rI   rM   
h_win_sizer   r   r   local_1d_pattern[   s
   rQ   c                 C   s   t t j| | t jd}|S )Nr.   )r   trilonesr4   )rI   rG   r   r   r   causal_1d_patternc   s   rT   c                 C      t | ||dd}|S )Nr   r   r   r*   HWr   r#   r   r   r   horizontal_axial_2d_distancei      rZ   c                 C   rU   )Nr   r   r   r*   rW   r   r   r   vertical_axial_2d_distancen   r[   r]   c                 C      t | ||dS )Nr   r*   )rX   rY   r   r   r   r   local_2d_distances      r_   c                 C   r^   )Nr%   )r)   )rX   rY   r&   r   r   r   local_2d_gausian_distributionw   r`   ra   c                 C   s   t | |||dS )NrN   rO   )rX   rY   r+   r   r   r   r   local_2d_pattern{   s   rb   c                 C   s
   t | |S )N)r-   )rX   rY   r   r   r   axial_2d_pattern   s   
rc   c                 C   s:  | | dksJ || dksJ d|  kr|k s"J d J dt | |\}}|d |d }}t|| dk}| | | }|| | }t ||\}	}
| | }|d | }|	| | }	|
| | }
t| | gd }t|	 |
 gd }tj||ddd}|d d d f |d d d f k}|S )Nr   z shift_size must in 0-window_sizeg      ?r'   r   r   )r   intr   r   r   r    r!   argmin)rX   rY   rM   
shift_sizer   jextragrid_hgrid_wiijjr   offsetinput_coordsanchors_coords	anchor_idrG   r   r   r   swin_attention_pattern   s$   $
 rq   r'   c                 C   sD   t | |ddd}t | |ddd}| | dk| | dk@ }|S )z
    Returns a 2d pattern that samples 1 every k elements in the attention mask.
    Can be seen as a form of downsampling, where every pixel attends to a downsampled
    version of the input.
    r   rV   r   r\   r   )r$   floor)rX   rY   kd_hd_wr#   r   r   r   dilated_2d_pattern   s    rv   c                 C   s   t j| d| ||f| j| jd}tt|jdd D ]-\}\}}}| dd||| |d | || |d | f |dd|ddddf< q|S )z>
    Block sparsify a tensor, given a mask and block size
    r   )r/   deviceT)as_tupleNr   )	r   emptysizer7   r/   rw   	enumerater   nonzero)xrG   
block_sizeretidxhr   rg   r   r   r   block_sparsify_tensor   s   "r   rG   r~   c                 C   s   | j dks	J dd}| j dkr| d} d}| jd | dkr)| jd | dks-J dtjjj| tj||d}|tj	}|rH|
d |S )	z
    Given a mask pattern and blocksize, return the corresponding layout
    which makes sure that all the positives in the mask are covered
    r'   z/We're expecting [Heads, Seq, Seq] or [Seq, Seq]Fr   Tr   z1We're only handling masks divisible by block_size)kernel_sizestride)rE   	unsqueezeshaper   nn
functional
max_pool2dtor    longsqueeze_)rG   r~   _should_squeezelayoutr   r   r   pattern_to_layout   s   

&
r   	threshold
mask_shapec                    sv   dt f fdd |d }|d }t |}|ddt|dd|dd }||d|}|| k S )a1  
    Use the additive bias computation from ALiBi_ to generate a mask.
    Note that this mask can in turn be used to generate a blocksparse attention computation layout

    .. note: mask_shape is expected to hold the [heads, seq, seq] dimensions

    .. _ALiBi: https://arxiv.org/pdf/2108.12409.pdf
    nc                    sj   dt dtt fdd}t|  r|| S dtt|  }|| d| dd d d | |   S )Nr   rD   c                    s6   ddt | d       fddt| D S )Nr'      c                    s   g | ]} |  qS r   r   )r
   r   ratiostartr   r   r      s    zTalibi_pattern.<locals>.get_slopes.<locals>.get_slopes_power_of_2.<locals>.<listcomp>)mathlog2range)r   r   r   r   get_slopes_power_of_2   s   z@alibi_pattern.<locals>.get_slopes.<locals>.get_slopes_power_of_2r'   r   )rd   r   r    r   r   
is_integerrr   )r   r   closest_power_of_2
get_slopesr   r   r      s    z!alibi_pattern.<locals>.get_slopesr   r   r2   )rd   r   Tensorr   r	   expandr=   )r   r   maxpos
attn_headsslopesalibir   r   r   alibi_pattern   s   r   	num_headsseq_lenc                 C      t | |d}||S N)r   r~   )r   make_layoutr   r~   r   configr   r   r   quick_fixed_layout     
r   c                 C   r   r   )r   r   r   r   r   r   quick_variable_layout  r   r   c                 C   r   r   )r   r   r   r   r   r   quick_bigbird_layout  r   r   c                 C   r   r   )r   r   r   r   r   r   quick_bslongformer_layout  r   r   r   c                 C   s   t | t ||S )z
    create a pattern of shape [heads, seq, seq] out of a blocksparse
    layout of shape [heads, seq/block_size, seq/block_size]
    )r   kronrS   )r   r~   r   r   r   layout_to_pattern"  s   r   )r   r   )r   )r'   ))r   typingr   numpyr8   r   -xformers.components.attention.sparsity_configr   r   r   r   r   r$   r)   r,   r-   rB   r   rH   rd   r    rL   rQ   rT   rZ   r]   r_   ra   rb   rc   rq   rv   r   r   Sizer   r   r   r   r   r   r   r   r   r   <module>   s>   	






1