o
    Ii2,                     @   s   d dl Z d dlZd dlmZmZ d dlmZmZ dddZ		ddd	Z		
	 				dddZ
									
	 				dddZdS )    N)	rearrangerepeat)	pad_inputunpad_inputrandomFc                 C   s   |dv sJ |dkrt j|df| |t jd}n.|dkr2t jt|r"dnd| d | d |df|d}n|d	krEt j| d
 | d |df|d}|r\t|D ]}|d dkrWd||< qKd|d< tt j| |dd|d|k }|S )N)fullr   thirdr      devicedtyper   r      )r   r         zs -> b sb)torchr   int32randintmaxranger   arange)
max_seqlen
batch_sizer   modezero_lengthslengthsipadding_mask r    D/home/ubuntu/.local/lib/python3.10/site-packages/hopper/test_util.pygenerate_random_padding_mask	   s$   "r"   c	                     s  |r|rJ | j \ }	}
|j \}}}|j  ||
fksJ |j  ||
fks*J |dus2|dur:|r6J |r:J |durRt| ||\}}}} fdd}nt| d}tjd d  tj|jd}d}} fdd}|durt|||\}}}}t|||^}}nt|d}t|d}tjd d  tj|jd}d}}|r||k sJ |	|ksJ tj|||gdd	}tj| ||gd
d	}|dur؇ fdd}n fdd}|	 
 |||	 
 ||fS |r6tj||gdd	}tj||gd
d	}|}|dur fdd}n fdd}|	 
 |	 
 ||||| 	 
 |	 
 |||fS |}|durF fdd}n fdd}|	 
 |	 
 |	 
 ||||||| 	 
 |	 
 |	 
 |||fS )a  
    Arguments:
        q: (batch_size, seqlen_q, nheads, d)
        k: (batch_size, seqlen_k, nheads_k, d)
        v: (batch_size, seqlen_k, nheads_k, d)
        query_padding_mask: (batch_size, seqlen), bool
        key_padding_mask: (batch_size, seqlen), bool
    Nc                       t |  S Nr   output_unpadr   	indices_qseqlen_qr    r!   <lambda>9   s    zgenerate_qkv.<locals>.<lambda>zb s h d -> (b s) h dr   r	   )stepr   r   c                       t | d dS Nz(b s) h d -> b s h dr   r   r&   r   r    r!   r+   C       dim   c                    r#   r$   r%   
dqkv_unpadr(   r    r!   r+   [       c                    r-   Nz(b s) t h d -> b s t h dr   r/   r5   r0   r    r!   r+   ]   r1   c                    r#   r$   r%   	dkv_unpadr   	indices_kseqlen_kr    r!   r+   m   r7   c                    r-   r8   r/   r9   r0   r    r!   r+   o   r1   c                    r#   r$   r%   dk_unpadr;   r    r!   r+      r7   c                    r-   r.   r/   r>   r0   r    r!   r+      r7   )shaper   r   r   r   r   r   allstackdetachrequires_grad_) qkvquery_padding_maskkey_padding_maskkvpacked	qkvpackedquery_unused_maskkey_unused_masknheadsd_nheads_kq_unpadcu_seqlens_qmax_seqlen_q	seqused_qoutput_pad_fnk_unpadcu_seqlens_kmax_seqlen_k	seqused_kv_unpadrest	qkv_unpadqkvdqkv_pad_fnkv_unpadkv	dq_pad_fn
dkv_pad_fn	dk_pad_fnr    )r   r<   r)   r=   r*   r!   generate_qkv    s   
















re   r   r   c              	   C   s"  t tj| |tjdd}tj||tjd}	|d ur3t |d}t|	d|jd d}	t|	|k|	| d}	|d u r9|nt |dd}
|d u rG| nt |dd}|d dk ra|	||
 | |d	  kS |d u rkt|	|n|
}
t	|	t
||
 | |d	  |
kt|	||
 | |d  k |	|kS )
Nr
   zs -> s 1zb -> b 1 1 1zs -> b 1 1 sr   r   l        r   r	   )r   r   r   longr   r@   wheresum	full_like
logical_orminimumlogical_and)r*   r=   window_sizesink_token_lengthrH   rI   key_leftpadr   row_idxcol_idxsksqr    r    r!   construct_local_mask   s*   

"ru           Tc              
   C   s  |	r|d df}| j }|r|  | | } }}|
dur<t|
d| jd |jd  d}
|  t|
d j| j d} |durN| t|d j|j d}|dur`| t|d j|j d}| jd |jd }}t|d	| jd |jd  d}t|d	| jd |jd  d}| jd
 }|std| t	| |}ntd| |t	| }|dkrt
|| | }|dur|t| dtd |d dks|d dkrt|||||||| jd}||td |dur|| }tj|d
d|j }|dur	|t| dd}|dur|t| dd}|d dks&|d dkr2|tj|d
ddd}dd|  }|durE|| d}n|}|durU|||j }td||| }|durm|t| dd |j|d|j|dfS )a  
    Arguments:
        q: (batch_size, seqlen_q, nheads, head_dim)
        k: (batch_size, seqlen_k, nheads, head_dim)
        v: (batch_size, seqlen_k, nheads, head_dim)
        query_padding_mask: (batch_size, seqlen_q)
        key_padding_mask: (batch_size, seqlen_k)
        attn_bias: broadcastable to (batch_size, nheads, seqlen_q, seqlen_k)
        dropout_p: float
        dropout_mask: (batch_size, nheads, seqlen_q, seqlen_k)
        causal: whether to apply causal masking
        upcast: whether to cast all inputs to fp32, do all computation in fp32, then cast
            output back to fp16/bf16.
        reorder_ops: whether to change the order of operations (scaling k instead of scaling k, etc.)
            without changing the math. This is to estimate the numerical error from operation
            reordering.
    Output:
        output: (batch_size, seqlen_q, nheads, head_dim)
        attention: (batch_size, nheads, seqlen_q, seqlen_k), softmax after dropout
    r   Nzb h -> b (h g)r4   )gzb h -> b 1 h 1)r   r	   zb s h d -> b s (h g) dr   zbthd,bshd->bhtszb s -> b 1 1 sz-inf)rp   r   r2   zb s -> b 1 s 1rv   T)r3   keepdimg      ?zbhts,bshd->bthdzb s -> b s 1 1)r   floatr   r@   r   tor   einsummathsqrttanhmasked_fill_ru   r   softmaxmasked_fillrA   )rE   rF   rG   rH   rI   rp   	attn_bias	dropout_pdropout_maskcausal	q_descale	k_descale	v_descalern   ro   softcapupcastreorder_opsintermediate_dtypedtype_ogr*   r=   rO   scores
local_mask	attentiondropout_scalingattention_dropoutputr    r    r!   attention_ref   sl   '






r   )r   F)NNFFNN)rf   r   NNNN)NNNNrv   NFNNNrf   r   rv   TFN)r|   r   einopsr   r   paddingr   r   r"   re   ru   r   r    r    r    r!   <module>   s<    

{
(