o
    i4                     @   s4   d dl Z g dZdd Zdd Zdd Zd	d
 ZdS )    N))sparse_semi_structured_from_dense_cutlass'sparse_semi_structured_to_dense_cutlassmask_creatorc                 C   s$  t jd| |dd d d f d|}t jd||d| d}d}|jdkr'dnd}|| | |d d  |d d	  || d	 d d  || d d	  }|d dk|d dk@ t j}|d dk|d dk@ t j}	|||	 7 }|||	 8 }d}
||
 }||
 }||  |
 ||
  | d
S )Nr   device   @                   )torcharangerepeatitemsizetoint8view)m
meta_ncols
meta_dtyper   dst_rowsdst_colsgroup_xgroup_ytopright
bottomleft
interleavecols_majcols_min r"   h/home/ubuntu/.local/lib/python3.10/site-packages/compressed_tensors/utils/semi_structured_conversions.py*_calculate_meta_reordering_scatter_offsets-   s,   $


  r$   c                 C   s   |   dkrtd|    d| j\}}| j}tj}| jtjkr%tj}n| jtjtj	tj
tjfv r6tj}n	td| j d|jd d }|dvrNtd	|tjkrb|d
 dkratd| dn|d dkrptd| d|d|  dkrtd| dd|  | jtj
krd}| d|| |}|dkd\}}	}
}nd}| d|| |}|dkd \}}
\}	}|||  }||	@ }| |	@ }| |	 @ }|}|}||B |B }||	 B }||tjd> B }||tjd> B }| jtj
kr|d|d}|d|d}tj||fdd||d }n|d|dd ||d }||d> B }|d||f|}|dkrt|d d d d df |d d d d df d> B |d d d d df d> B |d d d d df d> B }nk|dkr|d d d d df |d d d d df d> B |d d d d df d> B |d d d d df d> B |d d d d df d
> B |d d d d df d> B |d d d d df d> B |d d d d df d> B }||| f}t||||}|d||d ||||fS )Nr	   z)Expected 2-dimensional dense tensor, got -dimensional tensorInvalid datatype z of dense matrixr   r   )r   r   z6Invalid number of elements per meta element calculatedr   r   zNumber of rows of dense matrix z must be divisible by 16r
   z must be divisible by 32z"Number of columns of dense matrix z must be divisible by r   r   dim                        )r(   RuntimeErrorshaper   r   r   dtypeint32halfbfloat16floatint16r   r   unbindr   int64gather	unsqueezestack	new_emptyr$   scatter_)denser   kr   r   quadbits_per_meta_elemksparsedense_4m0m1m2m3dense_2r   expr0expr1expr2bit0bit1bit2bit3idxs0idxs1sparse0sparse1sparsemeta_4meta_nmetameta_reorderedmeta_offsetsr"   r"   r#   r   N   s   



$


 

r   c                 C   s  |   dkrtd|    d| j\}}| j}|  dkr(td|   d|j|kr9td| d|j d|j}|tjtjfvrLtd| d	|jd
 d }| jtj	kr[dnd}|j\}}	||krptd| d| |	| | d| krtd| d|	| | d  dt
||	||}
t|dd|
||	}tj||	d| f||d}|dkr)|d@ |d d d d df< |d? d@ |d d d d df< |d? d@ |d d d d df< |d? d@ |d d d d df< |d
? d@ |d d d d df< |d? d@ |d d d d df< |d? d@ |d d d d df< |d? d@ |d d d d df< n|d
kr|d@ |d d d d df< |d? d@ |d d d d df< |d? d@ |d d d d df< |d? d@ |d d d d df< |d
? d@ |d d d d df< |d? d@ |d d d d df< |d? d@ |d d d d df< |d? d@ |d d d d df< |d? d@ |d d d d d
f< |d? d@ |d d d d df< |d? d@ |d d d d df< |d ? d@ |d d d d d!f< |d"? d@ |d d d d df< |d#? d@ |d d d d d$f< |d%? d@ |d d d d df< |d&? d@ |d d d d d'f< |dtjdd| | | |d(d ddddd }tj|d | f| j|d}| jtj	kr\|d|| d n|tjd|| tjd ||d| S ))Nr	   z*Expected 2-dimensional sparse tensor, got r%   z(Expected 2-dimensional meta tensor, got zExpected meta matrix to be on z device, got matrix on z devicer&   z of meta matrixr   r   zNumber of rows of meta matrix z4 must be equal to number of columns of spase matrix z#Number of columns of sparse matrix z different from the z<, expected according to the number of columns of meta matrixr   r   )r3   r   r)   r   r-   
   r+   r*      r/   r      	   r,         r.         r0         r   )r(   r1   r2   r   r3   r   r8   r4   r   r7   r$   r;   r   emptyr   r   zerosr?   reshaper5   )rU   rY   r   rA   r   r   rB   rC   
meta_nrowsr   rZ   rX   meta_2dense_offsetsr@   r"   r"   r#   r      s   


	
 
r   c                 C   s   d}d}d}|   | dkrtd| j d| d|   | }|   ||}tj|dd	dddt|| f }tj	|j|j
d
}|jd|dd| j}|S )aJ  
    Class for creating N:M sparsity masks.
    Masks will be created using the N:M ratio, where for every block of
    M weights, N will be pruned based on ranked weight value. Each mask
    will correspond to the given tensor.

    :param N: The number of weights in a group to keep
    :param M: The size of a weight group
    r	   r   Nr   zTensor of size z can't be evenly divided into z groupsr   r'   r   )r(   indexvalue)numel
ValueErrorr2   detachabsrg   r   argsortintonesr   r?   )tensorNMmask
num_groupstensor_temprk   w_br"   r"   r#   r   9  s   
&r   )r   __all__r$   r   r   r   r"   r"   r"   r#   <module>   s   ! a