o
     i                     @   s^  d dl mZ d dlZd dlm  mZ d dlmZ d dlmZm	Z	m
Z
 d dlmZ dZedZee
g dejgdgd	ZG d
d dejZG dd deZG dd deZG dd dejjZG dd deZG dd dZG dd deZG dd deZG dd deZG dd deZeeeeeeedZe	dd eeed! e	d"d d eeed# dS )$    )TupleN)nn)	DTYPE2STRbenchmark_main_helper2product_dictg      ?cuda))i @        r   )r      i <  r
   )        `  r   )r   r   r   r   )   r   r   r   )i  r   r   r   )i 0     r	   r   F)B_in_hidden_out_ftdtypebiasc                       sP   e Zd ZejZdeeeeef dededdf fddZ	dd	 Z
d
d Z  ZS )Mlpr   r   bwreturnNc           	         s   |\}}}}t    d| _t|  d| d| d| d| d|r#dnd | _| j|||d| _t | _	| j|||d| _
tj||gd|d	| _tj||gd|d
d| _| j| _| d| d S )Nmlp (,) b )r   r   )devicer   Tr   r   requires_grad)super__init__labelr   	sub_label
LINEAR_CLSfc1r   GELUactfc2torchrandngradinputoutto	selfr   r   r   r   Bin_fthid_ftout_ft	__class__ V/home/ubuntu/.local/lib/python3.10/site-packages/xformers/benchmarks/benchmark_sp24.pyr    .   s   
0
zMlp.__init__c                 C   s.   | j }| |}| |}| |}|| _d S N)r+   r$   r&   r'   r,   r/   xr6   r6   r7   fwA   s
   



zMlp.fwc                 C   s   | j j| jdd d S )NT)retain_graph)r,   backwardr*   r/   r6   r6   r7   r   H   s   zMlp.bw)__name__
__module____qualname__r   Linearr#   r   intboolr    r;   r   __classcell__r6   r6   r4   r7   r   +   s    r   c                   @      e Zd Zdd ZdS )MlpDenseMaskc                 C   sD   | j }| |}tjj|}|| }| |}| |}|| _d S r8   )	r+   r$   r(   opsxformerssparse24_largest_mask_2dr&   r'   r,   )r/   r:   maskr6   r6   r7   r;   M   s   



zMlpDenseMask.fwNr?   r@   rA   r;   r6   r6   r6   r7   rG   L       rG   c                   @   rF   )MlpAct24c                 C   s8   | j }| |}t|}| |}| |}|| _d S r8   )r+   r$   xops
sparsify24r&   r'   r,   r9   r6   r6   r7   r;   Z   s   




zMlpAct24.fwNrL   r6   r6   r6   r7   rN   Y   rM   rN   c                   @   s"   e Zd ZdejdejfddZdS )	LinearW24r+   r   c                 C   s"   t j| jddd}t||| jS )N24dense
cusparselt)gradientbackend)rO   rP   weightFlinearr   )r/   r+   w_sparser6   r6   r7   forwardf   s   zLinearW24.forwardN)r?   r@   rA   r(   TensorrZ   r6   r6   r6   r7   rQ   e   s    rQ   c                   @   s   e Zd ZeZdS )MlpW24N)r?   r@   rA   rQ   r#   r6   r6   r6   r7   r\   o   s    r\   c                       sD   e Zd Zdeeeeef dededdf fddZd
dd	Z  ZS )MicrobenchmarkBaser   r   r   r   Nc           	         s   |\}}}}t    d| _t|  d| d| d| d| d|r#dnd | _tj||gd|dd	| _| j 	  | _
t| j| _d S )
Nr   r   r   r   r   r   r   Tr   )r   r    r!   r   r"   r(   r)   r+   t
contiguousinput_colMajorrO   rP   input_spr.   r4   r6   r7   r    t   s   
0zMicrobenchmarkBase.__init__c                 C   s   d S r8   r6   r>   r6   r6   r7   r      s   zMicrobenchmarkBase.bw)r   N)	r?   r@   rA   r   rC   rD   r    r   rE   r6   r6   r4   r7   r]   s   s    r]   c                   @      e Zd ZdejfddZdS )MicrobenchmarkSparsify24r   c                 C   s   t | j | jS r8   )rO   rP   r+   r>   r6   r6   r7   r;      s   zMicrobenchmarkSparsify24.fwNr?   r@   rA   r(   r[   r;   r6   r6   r6   r7   rc          rc   c                   @   rb   )MicrobenchmarkSp24ApplyDenser   c                 C   s   t j| j| jdd | jS NT)pattern	out_dense)rO   sparsify24_liker+   ra   r>   r6   r6   r7   r;         zMicrobenchmarkSp24ApplyDense.fwNrd   r6   r6   r6   r7   rf      re   rf   c                   @   rb   )MicrobenchmarkSp24ApplyDenseTr   c                 C   s   t j| j| jdd | jS rg   )rO   rj   r`   ra   r+   r>   r6   r6   r7   r;      rk   z MicrobenchmarkSp24ApplyDenseT.fwNrd   r6   r6   r6   r7   rl      re   rl   c                   @   rb   )MicrobenchmarkInputCloner   c                 C   s   | j   | j S r8   )r+   cloner>   r6   r6   r7   r;      s   
zMicrobenchmarkInputClone.fwNrd   r6   r6   r6   r7   rm      re   rm   )act24densew24s24_inp_sparsify24s24_inp_apply_denses24_inp_apply_dense_ts24_inp_clonesp24_fwT)r;   cases	functionsmin_run_time	sp24_fwbw)r;   r   rw   rx   ry   ) typingr   r(   torch.nn.functionalr   
functionalrW   utilsr   r   r   xformers.opsrH   rO   ry   r   listhalfCASESModuler   rG   rN   rB   rQ   r\   r]   rc   rf   rl   rm   rx   r6   r6   r6   r7   <module>   sX   
!
	

