o
     iu'                     @   s  d dl Z d dlZd dlmZ d dlZd dlmZ d dlZd dl	m
  mZ d dlmZmZ d dlmZmZ dejjj_dZedZejdkrJd	gnd	d
gZg dZg ee d	dgg dddgdgee dgg ddgg dZej
jjjej
jjj fej
jj!jej
jj!j fej
jj"jej
jj"j fej
jj#jej
jj#j fgZ$dd Z%dd eefD \Z&Z'e&( D ]1Z)e)( Z)e)*e+e,e)d -ddidej.dfidej.dfidej/idej0ig e&1e) qdej.dfidej
j2dfigdd dD Z3e'( D ]Z)e3D ]Z*e)( Z)e)*e* e'1e) qqe&e' Z4d*d d!Z5		d+d"e6fd#d$Z7	d,d"e6fd%d&Z8d'd( Z9e:d)krDe9  dS dS )-    N)partial)	benchmark)create_attn_biasref_attention)benchmark_main_helpercreate_argparserFg      ?cuda   (   ))     r	   X   )r   r   r	   P   )r   r   r	   @   )   r   r	   r   )r   r   r	   r   )r   r   r	   r   )   r   r	   r   )    r      r   )r   r   r   r   )r   r   r      )   r   r	   r   )r   r   r   r   )r   r   r   r   )r   r   r   r   )r   R      r   )   r   r   r   )r   r      r   )r	      r   r
   )r	    @  r   r
   )r	   r   r   r   )r	   r   r   r   )   r   r   r
   )r   r   r   r
   )r   r   r   r   )r   r   r   r   )r   r   r   r   )r         r      )r   r   i    r   r   r   r   )r   r   r   )r   r   r   r      r   c                  k   s6    |   }|  }tj| D ]
}tt||V  qd S N)keysvalues	itertoolsproductdictzip)kwargsr"   valsinstance r+   c/home/ubuntu/.local/lib/python3.10/site-packages/xformers/benchmarks/benchmark_mem_eff_attention.pyproduct_dictP   s   r-   c              
   C   s2   g | ]}t t|td gtddfgtjgdqS )g        NF)shape_qnum_threads	dropout_pattn_bias_cfgdtype)listr-   NUM_THREADStypetorchhalf).0SHAPESr+   r+   r,   
<listcomp>W   s    
r:   r.   r0   g333333?r1   Tr2   c                 C   s&   g | ]}t jjjjd f|tjdqS )F)r1   Hkvr2   )xformersopsfmha	attn_bias+BlockDiagonalCausalWithOffsetPaddedKeysMaskr6   bfloat16)r8   r;   r+   r+   r,   r:   x   s    	
)r	   r   c                 C   s  t | }| d }|rdnd}||d tj|t||d}tj| t||d}	| d | d || d f}
tj|
t||d| d | d d|| d | d | d || || d | }tj|
t||d| d | d d|| d | d | d || || d | }||	||fS )Nr   r      )devicer2   requires_gradr	   )r3   insertr6   randrC   reshapeexpand)r.   r;   r2   rD   packedstacked_shapeHqstacked_dimqkvqshape_kvkvr+   r+   r,   create_tensors   s(   rR   r/   c                 c   s   | \}}}	}
|p
|	}t | ||d|d\}}}}|\}}|r d S tjdtjdtjdi| }| d| d| d|	 d| d|
 d| d	|j }d}tD ]`\}}t|||	|	| |||t|d
|d}t	j
|||||d}t|t	jjt	jjfrdd |||fD \}}}||sqKtjd||||j|ttjj||fddd| d|j||dV  d}qK|sd S tjd||||j|tdd| dd||dV  d S )NF)rD   rI   b16f16f32 -, p=, BiasT=BMHK

batch_size	num_headsnum_heads_groupsq_lenkv_lenr2   rC   rD   fmtopquerykeyvaluer?   pc                 S   s(   g | ]}| d dg|jdd qS )r	   r   N)rG   shape)r8   xr+   r+   r,   r:      s   ( z(mem_eff_attention_fw.<locals>.<listcomp>zfn(q, k, v, attn_bias, p)rb   )rN   rP   rQ   r?   rg   fnzattention (attn_bias=)stmtglobalslabeldescription	sub_labelr/   Teager)rR   r6   rA   r7   float__name__OPSr   rC   r>   Inputs
isinstancer?   BlockDiagonalMaskr@   supportsr   Timerr   r<   r=   memory_efficient_attentionNAMEr   )r.   r/   r1   r0   r2   rI   r;   BMrK   K_rN   rP   rQ   attn_bias_typeattn_bias_requires_grad	dtype_strrs   has_runfw_opbw_opbiasinpr+   r+   r,   mem_eff_attention_fw   s   	*



r   c                 c   s   | \}}}}	|p
|}t | ||dd\}
}}}|\}}tjdtjdtjdi| }| d| d| d| d| d|	 d| d	|j d
| }d}tD ]Y\}}t||||| |||t|d|d}t	j
|||||d}||rr||ssqId}tjj|j|j|j|j|j||fd}t|}tjd||dd| d|j||dV  ~qI|sd S tjdt||||j||dd| dd||dV  d S )NT)rD   rS   rT   rU   rV   rW   rX   rY   z, BiasGrad=FrZ   r[   rc   rk   z%out.backward(grad, retain_graph=True))outgradzattention backward (attn_bias=rm   rn   vanilla)rR   r6   rA   r7   ru   rv   rw   r   rC   r>   rx   r{   r<   r=   r}   rd   re   rf   r?   rg   	ones_liker   r|   r~   r   )r.   r/   r1   r0   r2   r;   r   r   rK   r   r   rN   rP   rQ   r   r   r   rs   r   r   r   r   r   r   grad_benchmarkr+   r+   r,   mem_eff_attention_bw
  s   *


r   c                  C   sb   t  } | jdddd | jdddd |  }|js"ttt| td |js/tt	t| td d S d S )Nz--omit-forward
store_truezDo not run forward benchmarks)actionhelpz--omit-backwardzDo not run backward benchmarks)
arg_parsermin_run_time)
r   add_argument
parse_argsomit_forwardr   r   CASESr   omit_backwardr   )r   argsr+   r+   r,   mainW  s6   
r   __main__)FT)TNr!   );r$   random	functoolsr   r6   torch.utilsr   xformers.opsr<   xformers.ops.fmhar=   r>   xformers.attn_bias_utilsr   r   xformers.benchmarks.utilsr   r   backendsr   matmul
allow_tf32r   rC   r5   r4   VISION_SHAPESsortedr%   
LLM_SHAPEScutlassFwOpBwOpflashflash3ckrw   r-   VISION_CASES	LLM_CASEScopycupdateRandomstrchoiceTensorrA   ru   appendLowerTriangularMaskLLM_CASE_UPDATESr   rR   intr   r   r   rv   r+   r+   r+   r,   <module>   s   
& 


	

 
c
M

