o
    iF&                     @   sz  d dl mZmZmZ d dlZd dlmZ d dlmZm	Z	 d dl
mZ ejjZe	ddejdejd	ejd
ededejfddZeddejdejd	ejd
ededejfddZe	ddejdejd	ejdejdejdejdejfddZeddejdejd	ejdejdejdejdejfddZe	ddejdejdejd	ejd
ededejdejfddZeddejdejdejd	ejd
ededejdejfddZG d d! d!eZejZejZeejjd"d# Zeejjd$d% Zeejjd&d' Zeej j!d(d) Z"eej#jd*d+ Z$eej%jd,d- Z&eej'jd.d/ Z(eej)jd0d1 Z*eej+j,j-d2d3 Z.dS )4    )ListOptionalTupleN)return_and_correct_aliasing)register_custom_opregister_custom_op_impl)TorchAOBaseTensorzblocksparse::bsr_to_densecrow_indicescol_indicesvaluesMKreturnc                 C   s   t j| ||||fd S )N)r	   r
   r   size)torchsparse_bsr_tensorto_denser	   r
   r   r   r    r   P/home/ubuntu/.local/lib/python3.10/site-packages/torchao/sparsity/blocksparse.pybsr_to_dense   s
   r   c                 C   s   t j||f|j|jdS )Ndtypedevice)r   emptyr   r   r   r   r   r   bsr_to_dense_abstract   s   r   zblocksparse::int_addmmA
left_alpharight_alphac              
   C   s   ddl m} ddlm} |jtjksJ |jd }|jd }	|jd }
tj| ||||	fd}|t	||}|j
|||
f tjd}||||dd|||d	 S )
Nr   )broadcast_batch_dimsbsr_dense_addmmr   )r      )alphabetaoutr   r   )torch.sparse._triton_opsr   torchao.kernel.bsr_triton_opsr!   r   r   int8shaper   blocksparse_int_addmm	new_emptybfloat16t)r	   r
   r   r   r   r   r   r!   r   r   N
weight_bsroriginal_batch_dims_broadcastedr(   r   r   r   r-   *   s.   	


	r-   c                 C   s0   |j d }|j d }tj||ftj|jd S )Nr"   r   )r,   r   r   r/   r   r0   )r	   r
   r   r   r   r   r1   r   r   r   r   blocksparse_int_addmm_abstractL   s   
	
r4   zblocksparse::addmmx_paddedbiasc                 C   s^   ddl m} |d u sJ tj|||||fd}| jd }	| ||	f}
||
|| dd|
d |
S )Nr   r    r$   r%   )r&   r'   r(   )r*   r!   r   r   r,   r.   )r5   r	   r
   r   r   r   r6   r!   bsrN_paddedr(   r   r   r   blocksparse_addmm[   s   

r9   c                 C   s   | j d }| ||fS )Nr%   )r,   r.   )r5   r	   r
   r   r   r   r6   r8   r   r   r   blocksparse_addmm_abstractv   s   

r:   c                   @   s   e Zd ZU eej ed< eej ed< eej ed< eed< g dZe		ddej
dedeej deej deej defd	d
ZdefddZdeee eej
eef f fddZedeej
eef dejfddZedd Zdd ZdS )BlockSparseTensorbsr_crow_indicesbsr_col_indices
bsr_values	blocksize)r<   r=   r>   Fr,   requires_gradc           
      C   sZ   |d u rt d|}|j|j|j|d}tjj| |fi |}	||	_||	_||	_	||	_
|	S )NzCNo values passed to BlockSparseTensor: bsr_values must be provided!)r   r   layoutr@   )
ValueErrorr   r   rA   r   Tensor_make_wrapper_subclassr?   r<   r>   r=   )
clsr,   r?   r<   r=   r>   r@   previous_tensorkwargstensorr   r   r   __new__   s    
zBlockSparseTensor.__new__r   c                 C   s$   t | dsJ | jj d| j dS )Nr,   z(shape=))hasattr	__class____name__r,   selfr   r   r   __repr__   s   zBlockSparseTensor.__repr__c                    s0   t t fdd j} j j jf}||fS )Nc                    s   t  | d uS )N)getattr)xrN   r   r   <lambda>   s    z6BlockSparseTensor.__tensor_flatten__.<locals>.<lambda>)listfilter	__slots__r,   r@   r?   )rO   inner_tensorstensor_metar   rN   r   __tensor_flatten__   s
   z$BlockSparseTensor.__tensor_flatten__rX   c              	   C   s6   |\}}}| ||| dd | dd | dd |dS )Nr<   r=   r>   r,   r?   r<   r=   r>   r@   )get)rE   rW   rX   
outer_sizeouter_strider,   r@   r?   r   r   r   __tensor_unflatten__   s   



z&BlockSparseTensor.__tensor_unflatten__c                 C   s,   | |}| |j|| | | ddS )NFrZ   )to_sparse_bsrr,   r	   r
   r   )rE   dense_tensorr?   
bsr_tensorr   r   r   
from_dense   s   
zBlockSparseTensor.from_densec                 C   s,   t | j| j|| j|| j|| j| jdS )NrZ   )r;   r,   r?   r<   r=   r>   r@   )rO   funcr   r   r   apply_fn_to_shard   s   z#BlockSparseTensor.apply_fn_to_shardN)F)rM   
__module____qualname__r   r   rC   __annotations__intrV   staticmethodSizeboolrI   strrP   r   r   rY   classmethodr^   rb   rd   r   r   r   r   r;      s@   
 &
r;   c                 C   s   t | |||d tjS Nr   )r   rd   r   detachrc   typesargsrG   r   r   r   block_sparse_detach   s   rs   c                 C   s   t |dksJ t |dksJ |d dksJ |d }| dks$J |jr)J t|jd |j| | | 	dddS )N   r   r"   )r%   F)r@   )
lendimr@   r;   r,   r?   r	   r
   r   	unsqueeze)rc   rq   rr   rG   r7   r   r   r   block_sparse_unsqueeze   s   
rx   c                 C   sZ   t |dksJ t |dksJ |\}}dd }t|tjr(t|tr(|||S |||S )Nrt   r   c                 S   s   t | tsJ t |tjsJ |  dksJ | dksJ | jr$J |ddks-J ||d|d| j | jd}|	dd
d|  }|  | }t| j| j|  |  |S )N   r   r%   )
isinstancer;   r   rC   rv   r@   r   viewr?   	transposeindex_selectr
   r   r,   r	   )r7   r0   	t_blockedmasked_t
new_valuesr   r   r   my_mul  s   
$z block_sparse_mul.<locals>.my_mul)ru   rz   r   rC   r;   )rc   rq   rr   rG   r7   r0   r   r   r   r   block_sparse_mul   s   

r   c                 C   s\   |\}}t |tksJ t|dksJ |d }|dksJ tjj| | |j	d S )Nr%   r   )
typerT   ru   r   opsblocksparsesumr   r	   r,   )rc   rq   rr   rG   r7   rv   r   r   r   block_sparse_sum  s    r   c                 C      |d j  S rn   )r>   ro   rp   r   r   r   block_sparse_values"     r   c                 C   r   rn   )r<   ro   rp   r   r   r   block_sparse_crow_indices'  r   r   c                 C   r   rn   )r=   ro   rp   r   r   r   block_sparse_col_indices,  r   r   c                 C   s   |d j jd S rn   )r>   r,   rp   r   r   r   block_sparse__nnz1  s   r   c              	   C   sv   |\}}}| d|d }|jd }|jd }	tjj|| |	 |
 ||	d }
|
 }|d u r7|S || S )Nr"   r   r%   )reshaper   r0   r,   r   r   r   addmmr	   r
   r   )rc   rq   rr   rG   x_origwr6   rR   r   r   r(   out_origr   r   r   block_sparse_linear6  s"   


	r   )/typingr   r   r   r   torch.utils._python_dispatchr   torchao.opsr   r   torchao.utilsr   r   atenrC   rh   r   r   r-   r4   r9   r:   r;   
implementsimplements_torch_functionro   defaultrs   rw   rx   mulr   r   dim_IntListr   r   r   r	   r   r
   r   _nnzr   nn
functionallinearr   r   r   r   r   <module>   s   
!]








	






