o
     i"                     @   sT   d dl Z d dlZd dlmZ e dZdd Zdd Zdd	 ZG d
d dej	Z
dS )    N)masked_matmulxformersc                 C   s   |j \}}}}| | j d | j d | j d | || j d }|jdd\}}	}
||d d ||
d d f  }|| j d |  |	 }tj|| j d |j d  || j d | j| jd}|d|| j| ||| j d d	| j d }|S )
Nr            Tas_tupledtypedevice)	shapereshapenonzerotorchzerosr   r   
index_add_to)blayoutvaluesNnnz_
block_sizebrhrctemp
linear_idxout r#   V/home/ubuntu/.local/lib/python3.10/site-packages/xformers/sparse/blocksparse_tensor.py_spmm   s$   &	r%   c           
      C   s   | j dd\}}}tj|ddd}|| jd  | }tj|jd | jd | jd  |jd |jd |j|jd	}| }|d|	|j|| 
 d
 | t
||d d |f  }	|	S )NTr   r   )dimkeepdimr   r   r   r   r
   gW:)r   r   	logsumexpr   r   r   r   maxr   r   exp
clamp_min_log_add_)
r   r   r   r   r   normsr!   out_tmax_valr"   r#   r#   r$   _softmax(   s$   r1   c                 C   s   | j d |j d  }| | j d | j d | j d | || j d } ||j d |j d |j d | ||j d }|jdd\}}}td| d d ||d d d d f |d d ||d d d d f }|S )	Nr	   r   r   r   r   Tr   znhik,nhjk->nhij)r   r   r   r   einsum)ar   r   r   r   r   r   r"   r#   r#   r$   _sddmm=   s   &&Br4   c                   @   s   e Zd Zedd Zdd Zdd Zdd Zed	d
 Z	edd Z
edd Zedd Zedd Zedd Zedd Zedd Zedd Zed!ddZedd  ZdS )"BlockSparseTensorc                 C   s~   i }|j |d< |j|d< |j|d< |j|d< |jdksJ |j\}}}}|j\}}}	|||| ||	 f}
tjj| |
fi |S )Nr   r   r   requires_grad   )	r   r   r   r6   ndimr   r   Tensor_make_wrapper_subclass)clsr   r   kwargsBr   r   Cr   wr   r#   r#   r$   __new__M   s   



zBlockSparseTensor.__new__c                 C   sV   |j d |j d ksJ |j|jksJ d|j d }|dks#J d|| _|| _d S )Nr	   r   z8Both values and layout need to reside on the same device   z*Minimum block size is 16, for now at least)r   r   _BlockSparseTensor__values_BlockSparseTensor__layout)selfr   r   r   r#   r#   r$   __init__[   s   

zBlockSparseTensor.__init__c                 C   s   d| j  d| j dS )Nzblock_sparse_tensor(shape=z	, values=))r   rB   rD   r#   r#   r$   __repr__h   s   zBlockSparseTensor.__repr__c                 C   s   | j S N)rB   rG   r#   r#   r$   r   k   s   zBlockSparseTensor.valuesc                 C   s   |  | ||}||_||_|S rI   )r@   rB   rC   )r;   r   r   matrixr#   r#   r$   	_raw_wrapn   s   zBlockSparseTensor._raw_wrapc                 C   s"   |  | ||j}||_|j|_|S rI   )r@   rC   rB   )r;   r   bmatrJ   r#   r#   r$   _wrapu   s   zBlockSparseTensor._wrapc                 C   s0   t || rt|tju stS t||j|j}|S rI   )
isinstancetyper   r9   NotImplementedr%   rC   rB   )r;   arg0arg1resr#   r#   r$   _bmm|   s   zBlockSparseTensor._bmmc                 C   sR   t |tju rt |tju stS |dd}| sJ t|||j}| ||S )Nr	   r   )	rO   r   r9   rP   	transposeis_contiguousr4   rC   rM   )r;   r3   r   maskrS   r#   r#   r$   _masked_matmul   s   z BlockSparseTensor._masked_matmulc                 C   s.   |dks
|dks
t S t|j|j}| ||S )Nr   r   )rP   r1   rC   rB   rM   )r;   rQ   r&   rS   r#   r#   r$   r1      s   zBlockSparseTensor._softmaxc                 C   s:   t |tr
t|}t |tjsJ | |jj|d|jS )N)r   )rN   strr   r   rB   r   rC   )r;   rQ   r   r#   r#   r$   _to   s   

zBlockSparseTensor._toc                 C   sh   t || r
t || stS |j|jksJ |j|j}}||| |j|j}}||| |S rI   )rN   rP   r   rB   
resize_as_copy_rC   )r;   rQ   rR   av0av1r#   r#   r$   _copy   s   zBlockSparseTensor._copyc                 C   sT   t || r
t || stS |j|jkrdS t|j|jsdS t|j|js(dS dS )NFT)rN   rP   r   r   equalrB   rC   )r;   rQ   rR   r#   r#   r$   _equal   s   zBlockSparseTensor._equalc              
   C   s   t j|j|j|jd}|j}|j}|jd }|jd }|jd }||jd |jd ||||}tt	|j
dd D ]#\}	\}
}}|d d |	d d d d f |d d |
|d d |d d f< q:|S )Nr
   r   r	   r   r   Tr   )r   r   r   r   r   rB   rC   r   	enumeratezipr   )r;   rQ   r"   r   r   r   blocks_iblocks_jout_ridxr   ijr#   r#   r$   	_to_dense   s   


":zBlockSparseTensor._to_denser#   Nc           	      C   s,  |d u ri }|t jjt jt jjt jt jjfv r)t|dksJ | |d |d S |t jjt jj	jt jfv r@| 
|d |d S |tkrYt|dksLJ | |d |d |d S |t jj	jt jt jfv r|d }|j }||g|dd  R i |}| ||S |t jjkrt|dksJ | |d |d S |t jjfv rt|dksJ | |d |d S |t jjt jfv rt|dksJ | |d |d S |t jjkrt|dksJ | |d S |t jjkr	|d }|j }||g|dd  R i |}| ||S |t jjkr&|d }|d }| |j||j|S |t jjjt jj jfv rQt|dks<J t|dksEJ |d }| |jj|S |t jj!kr_||d j t j"# ' ||i |}|t j$% v r}|W  d    S t j&'|| W  d    S 1 sw   Y  t(S )Nr   r   r   r&   r   ))r   r9   bmm
__matmul__matmullenrT   softmaxnn
functionalr1   r   rX   dropoutdropout_rB   clonerM   r   rZ   r\   r_   r`   ra   to_denserj   detach__deepcopy__rK   rC   grad__get___gradrequires_grad__CDisableTorchFunction	overridesget_default_nowrap_functions_tensor_convertrP   )	r;   functypesargsr<   xr   memoretr#   r#   r$   __torch_function__   sx   



"z$BlockSparseTensor.__torch_function__c                 C   s   t S rI   )rP   )r;   r   r   r   r<   r#   r#   r$   __torch_dispatch__  s   z$BlockSparseTensor.__torch_dispatch__)r#   N)__name__
__module____qualname__staticmethodr@   rE   rH   r   classmethodrK   rM   rT   rX   r1   rZ   r_   ra   rj   r   r   r#   r#   r#   r$   r5   L   s8    






	



Lr5   )loggingr   xformers.opsr   	getLoggerloggerr%   r1   r4   r9   r5   r#   r#   r#   r$   <module>   s   
