o
    3wi                     @   s"  d dl Z d dlZd dlmZ e sdejfddZdS d dlZd dlmZ	 ej
eji dddeji d	ddeji d
ddeji dddeji ddeji d	deji d
deji ddeji ddeji d	deji d
deji ddgdgdejde	jde	jfddZdejfddZdS )    N)is_triton_availablexc                 C   s   d S )N r   r   r   a/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/bitsandbytes/triton/quantize_rowwise.pyquantize_rowwise	   s   r         )
num_stages	num_warps      )r
   )r   
n_elements)configskey
BLOCK_SIZEP2c                 C   s   t jdd}|| }t d|}|| }	||k }
t j| |	 |
d}t |}t jt |
|ddd}t jd||  }t j	||	 ||
d t 	|| | d S )Nr   )axis)maskg     _@)
tl
program_idarangeloadabsmaxwhere	libdevicellrintstore)x_ptr
output_ptroutput_maxsr   r   r   pidblock_startr   offsetsrow_maskr   abs_xmax_valoutputr   r   r   _quantize_rowwise   s   
r)   c                    s   t j j jt jd}t j jd  jt jd}tdtt	 jd  } j
r.|j
s0J | } fdd}t|  ||| jd |d ||fS )N)devicedtyper   r   r   c                    s    j d fS )Nr   )shape)metar   r   r   <lambda>A   s    z"quantize_rowwise.<locals>.<lambda>)r   r   )torchemptyr,   r*   int8float16intmathceillog2is_cudanumelr)   )r   r(   r!   r   r   gridr   r   r   r   9   s   )r4   r/    bitsandbytes.triton.triton_utilsr   Tensorr   tritontriton.languagelanguager   autotuneConfigjit	constexprr)   r   r   r   r   <module>   s<    