o
     iK                     @   s  d dl Z d dlZd dlmZmZmZmZmZ d dlZd dl	m
Z
mZ d dlmZ d dlmZ dedejfdd	Z		d.d
edededededededeee  dee fddZdejdedededee deee ee f fddZ	d/dejdededededee fdd Zdejdedededeee ee f f
d!d"Zd0d$d%Zd1dejfd&d'Zd(ejd)ejd*ee d+edeejejejf f
d,d-ZdS )2    N)ListOptionalSequenceTupleType)AttentionBiasfmha)AttentionBiasSubTensor)AttentionOpBaseshapereturnc                  O   sN   d}t jg | d d || d | d |  R fi |d dd| d S )N         r   )torchrandnnarrow)r   kwargsalign_to r   L/home/ubuntu/.local/lib/python3.10/site-packages/xformers/attn_bias_utils.py_create_aligned_bias   s   
r   
batch_size	num_headsnum_heads_groupsq_lenkv_lenrequires_gradfmtop	page_sizec                    s  | d u s	t d | rd S tdtt|| ||	gdddddd }| tju r|	dkr6||9 }d	}|
d ur]t	|
t
jjr]tj|||| f||d
d }|	dv r\|d d df }n*t|||| ||d
}tj |dddd |d	 d  d	 f< |	dv r|d d df }|r|d |	dkr|d d df }|S | t
jju r|  S | t
jju r|  S | t
jju r| |S | t
jju rt|||| ||d
}|	dv r|d d df }|	dkr|d d df }|r|d t
j|S | t
jjt
jjt
jjt
jjt
jjfv r|	dv s	J d }| t
jjt
jjhv rd}n| t
jjkr+|d us'J |d	 }t
jjjt|| |d }| t
jju rD| }| t
jjt
jjhv rtt
jj|j|j |j!d}|d usbJ | t
jju ro|"|}n|#|}| t
jju r|$ }|S | t
jj%t
jj&t
jj't
jj(t
jj)fv r|	dv sJ t*|| \}}t	| t
jj(r| j+n| }| t
jj&u r|j,| |t-|t-|d}n|j| |d}t	| t
jj(r|d usJ  | d	 | }tj./t0|| || |tj1d
2||}|j3||| dS |S | t
jj4t
jj5fv rJ|	dv sJ | t
jj5u r#d nd}t|| |\}} | fdd|D g }t
jj5j|||dS t	| t
jj6r|	dv sYJ |d us`J  | d	 | }|| }t	| t
jj7r~t*|| \}}nt8|||d} fddt0|D }|| fddt9|D }|:|| |  | j+}|j|||d}tj./t0|| || |tj1d
2||}|j3||| || dS | t
jj;kr| dddddS J d|  )N-r      i,  )r   r      r   r$   BMKr   devicedtype)r%   BMHKT)BMGHKr)   )max_q_minus_k)	q_seqinfo	k_seqinfo_batch_sizes)r)   r*   )q_seqlen
kv_padding	kv_seqlenwindow_size)r/   r0   r1   )block_tablesr!   
paged_typec                    s   g | ]
}  d | qS r   randint).0ki)rtotal_kv_lenr   r   
<listcomp>   s    z$create_attn_bias.<locals>.<listcomp>)r/   kv_seqstartsr1   Fc                    s   g | ]} d  qS r   r6   )r8   _r   r:   r   r   r<          c                    s(   g | ]\}}|   d |  qS r5   r6   )r8   ir9   )r:   row_sizer   r   r<      s   ( )r3   r!   r4   notional_padding   )window_leftwindow_rightzUnsupported bias type: )<
isinstancerandomRandomjoinmapstrr7   r   Tensor
issubclassr   triton_splitkFwOpr   r   mathinfrequires_grad_	attn_biasLowerTriangularMask"LowerTriangularFromBottomRightMask0LowerTriangularFromBottomRightLocalAttentionMask!LowerTriangularMaskWithTensorBiasBlockDiagonalMaskBlockDiagonalCausalMask&BlockDiagonalCausalFromBottomRightMask%BlockDiagonalCausalLocalAttentionMask4BlockDiagonalCausalLocalAttentionFromBottomRightMaskfrom_seqlens_rand_seqlensmake_causalr,   r-   r.   make_local_attention%make_local_attention_from_bottomrightmake_causal_from_bottomrightBlockDiagonalPaddedKeysMask/BlockDiagonalCausalLocalAttentionPaddedKeysMask+BlockDiagonalCausalWithOffsetPaddedKeysMask PagedBlockDiagonalPaddedKeysMask0PagedBlockDiagonalCausalWithOffsetPaddedKeysMask_rand_seqlens_padded_k_UNPAGED_TYPEfrom_seqlens_localmintensorsamplerangeint32reshape
make_paged*BlockDiagonalCausalWithOffsetGappyKeysMaskBlockDiagonalGappyKeysMaskPagedBlockDiagonalGappyKeysMask/PagedBlockDiagonalCausalWithOffsetGappyKeysMask_rand_maxed_partition	enumerateappend!LocalAttentionFromBottomRightMask)	bias_typer   r   r   r   r   r'   r(   r   r   r    r!   r2   rU   r+   
block_diagqkblock_diag_typeg_block_diagpages_per_rowr3   startstotal_queriesr   )r   r:   rC   r;   r   create_attn_bias   s   
&
	
	



r   r:   bsr+   c                 C   s  |dkr
||ks
J ||9 }||9 }g }g }t d|d t d|d g}t d|d t d|d g}t||k rt||k r|du rU| j| }	||	 || j|  nd|t|d }
|t|d }|
|| ksJ d|
d|d|d	|d
|d|d|t||
| }| d|d }	||	 |
| | d }|dksJ |dkr||	| d|  n||	 t||k rt||k s>|t|dd  |d< |t|dd  |d< ||fS )aC  
    Generates lists of lengths of query blocks and corresponding key blocks.
    The total number of queries will be bs * q_len and the
    total number of keys will be bs * kv_len.
    max_q_minus_k: maximum allowed num_queries - num_keys.
        For "bottom-right" masks it's 0, we need to have more keys than
        queries, otherwise some queries have no keys to attend to.
        For BlockDiagonalCausalMask it's None, there is no constraint
        on num_queries - num_keys.
        For BlockDiagonalCausalLocalAttentionMask it's equal
        to the window size.
    r   r   
   r$   Nz
keys_left=z queries_left=z max_q_minus_k=z kv_len=z q_len=z seqlens_k=z seqlens_q=r   )maxsum	randrangerz   rm   )r:   r   r   r   r+   	seqlens_q	seqlens_kstep_qstep_knum_queries	keys_leftqueries_leftmax_queries_to_takeextra_keys_availabler   r   r   r`      s<   

,

$r`   Ttotalnmxpositivec                 C   sd   |r
||8 }|d8 }|  t|| |}tj||tjd}d| |< |d}|r.|d7 }| S )Nr   r(   )ro   rp   r   zerosrq   flattenr   tolist)r:   r   r   r   r   idxsyzr   r   r   rx   H  s   
rx   c                    s\   | krt d| kr g|  }}||fS t|| | } fdd|D }||fS )Nzneed more queries than keysc                    s   g | ]} | qS r   r6   )r8   rB   r@   r   r   r<   j  rA   z*_rand_seqlens_padded_k.<locals>.<listcomp>)
ValueErrorrx   )r:   r   r   r   	q_seqlens	k_seqlensr   r@   r   rj   Z  s   	rj           c           	         s  j dkr%dtf fddtjfddtjd D ddS j d	kr9|d
ks0J t dS    d urKndjd d   dd } d urt	 t
tfr jjd djd jd fjtjd}n }|j d	krjd |jd |jd  ksJ |dg|jdd  }||  }|d}|d ur||d|   }| S )NrE   groupc                    sH   t  tjjr jr d d | f S  S t  tjr" d d | f S  S N)rH   r   rU   r	   HOLDS_DENSE_TENSORr   rN   )r   )rU   r   r   attn_bias_groupq  s   z&ref_attention.<locals>.attn_bias_groupc              	      sX   g | ](}t d d d d |f d d d d |f d d d d |f  |dqS )NscalerU   )ref_attention_bmhk)r8   g)r   r   r~   r   vr   r   r<   z  s    z!ref_attention.<locals>.<listcomp>r$   dim   r   r   r   r   g      ?r   r&   )ndimintr   stackrp   r   r   float	transposerH   r   r	   materializer'   float32rr   softmax)	r~   r   r   rU   	drop_maskpr   attnattn_bias_tensorr   )rU   r   r   r~   r   r   r   ref_attentionn  sF   



"
r   c                 C   s   | j dksJ dd }t|ttfr@|j| jd | jd | jd |jd f| jtjd	| jd | jd  | jd |jd g}t
|| ||||||d}|	| jd | jd | jd |jd	 g}|d
S )Nr   c                 S   s2   |  d| jd | jd  | jd | jd gS )Nr   r$   r   r   r   r$   r   r   )permuterr   r   )tr   r   r   T  s   
$zref_attention_bmhk.<locals>.Tr   r$   r   r&   )r   r   r   )r   rH   r   r	   r   r   r'   r   r   rr   r   r   )r~   r   r   rU   r   r   outr   r   r   r     s   "(*
r   cache_kcache_v
kv_seqlensBLOCK_Nc                    sP   fdd|D }t |}| j\}}}}	tj|||	| j| jd}
tj|||	| j| jd}d}t|D ]0}| |d|| f  |
||||  < ||d|| f  |||||  < ||| 7 }q2|  d   }tj|dtj	d
d||}t|jddt| jdd	
d  }||  jtj	d
}||

d|
dfS )a  
    Create block tables and pages K/V cache for testing paged attention.
    Args:
        cache_k, cache_v: K/V caches, each of shape [B, MAX_T, H_kv, D].
            Note that these tensors are unexpanded,
            i.e. for multiquery case cache_k.shape[2] = 1
        kv_seqlens: list of K/V sequence lengths
        BLOCK_N: number of tokens per per paged attention block
        B: batch size
    Returns:
        block_tables: [B, MAX_BLOCKS]
        packed_cache_k: [1, total_len_rounded, H_kv, D]
        packed_cache_v: [1, total_len_rounded, H_kv, D]
    where total_len_rounded is a sum of K/V seqlens, each rounded up
    to a multiple of BLOCK_N.
    c                    s    g | ]}|  d      qS r>   r   )r8   xr   r   r   r<     s     z!pack_kv_cache.<locals>.<listcomp>r&   r   Nr   cudar   )r'   r   )r   r   r   emptyr'   r(   rp   clonearangerq   	unsqueezeexpandrn   cumsumto
contiguous)r   r   r   r   kv_seqlens_roundedtotal_len_roundedBMAX_THDpacked_cache_kpacked_cache_vseqstartbnum_blocks_per_rowr3   	seqstartsr   r   r   pack_kv_cache  sP   r   )NN)T)NNr   Nr   )rR   rI   typingr   r   r   r   r   r   xformers.opsr   r   xformers.ops.fmha.attn_biasr	   xformers.ops.fmha.commonr
   r   rN   r   boolrM   r   rJ   r`   rx   rj   r   r   r   r   r   r   r   <module>   s   	


 a
J


9