o
    3wi                     @   s.  d dl Z d dlZd dlmZ e sdejdejfddZdS d dlZd dlmZ	 ej
eji ddd	eji d
dd	eji ddd	eji ddd	eji ddeji d
deji ddeji ddeji ddeji d
deji ddeji ddgdgdejde	jde	jfddZdejdejfddZdS )    N)is_triton_availablexstate_xc                 C   s   d S )N )r   r   r   r   c/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/bitsandbytes/triton/dequantize_rowwise.pydequantize_rowwise	   s   r         )
num_stages	num_warps      )r
   )r   
n_elements)configskey
BLOCK_SIZEP2c                 C   st   t jdd}|| }t d|}	||	 }
|	|k }t j| |
 |d}t || }|| | }t j||
 ||d d S )Nr   )axis)mask)tl
program_idarangeloadstore)x_ptrr   
output_ptrinv_127r   r   r   pidblock_startr   offsetsrow_maskr   max_valoutputr   r   r   _dequantize_rowwise   s   r#   c              	      s~   t j j jt jd}tdtt jd  } j	r!|j	s#J |
 } fdd}t|  ||d| jd |d |S )N)devicedtyper   r   c                    s    j d fS )Nr   )shape)metar   r   r   <lambda>>   s    z$dequantize_rowwise.<locals>.<lambda>g@ ?)r   r   )torchemptyr&   r$   float16intmathceillog2is_cudanumelr#   )r   r   r"   r   r   gridr   r(   r   r   7   s    )r.   r*    bitsandbytes.triton.triton_utilsr   Tensorr   tritontriton.languagelanguager   autotuneConfigjit	constexprr#   r   r   r   r   <module>   s<    