o
    oi&
                     @   sH  d dl Z d dlZd dlmZ e sdejfddZdS d dlZd dlmZ	 ej
eji ddeji ddeji d	deji d
deji ddeji dd
deji dd
deji d	d
deji d
d
deji dd
deji ddeji ddeji d	deji d
dgdgdejde	jde	jde	jde	jfddZdejfddZdS )    N)is_triton_availablexc                 C   s   d S )N )r   r   r   i/home/ubuntu/.local/lib/python3.10/site-packages/bitsandbytes/triton/quantize_columnwise_and_transpose.py!quantize_columnwise_and_transpose	   s   r      )
num_stages            )r   	num_warps)r   
n_elements)configskeyMN
BLOCK_SIZEP2c                 C   s   t jdd}|}	t d|}
|
|k }|
| }|	| }t j| | |d}t |}t jt ||ddd}t jd||  }|| }||
 }t j	|| ||d t 	|| | d S )Nr   )axis)maskg     _@)
tl
program_idarangeloadabsmaxwhere	libdevicellrintstore)x_ptr
output_ptroutput_maxsr   r   r   r   r   pidblock_start	p2_arangep2_arange_maskr   offsetsr   abs_xmax_valoutput	new_startnew_offsetsr   r   r   "_quantize_columnwise_and_transpose   s   
r.   c              
      s   | j \}}tj||| jtjd}tj| j d | jtjd}tdtt	| }| j
r/|j
s1J |   fdd}t| | || ||||d ||fS )N)devicedtyper   r	   c                    s   t  | d fS )Nr   )tritoncdiv)metar   r   r   <lambda>I   s    z3quantize_columnwise_and_transpose.<locals>.<lambda>)r   r   )shapetorchemptyr/   int8float16intmathceillog2is_cudanumelr.   )r   r   r   r+   r#   r   gridr   r4   r   r   @   s   
)r<   r7    bitsandbytes.triton.triton_utilsr   Tensorr   r1   triton.languagelanguager   autotuneConfigjit	constexprr.   r   r   r   r   <module>   sH    