o
    پi                     @   s   d Z 	 ddlmZ ddlZddlZddlmZ ddlm	Z	m
Z
mZ ddlmZmZmZmZ G dd dZe	fd	d
Zdd ZdefddZ	ddejdedededeej f
ddZdejdedefddZdS )z
Adapted from
https://github.com/vllm-project/vllm/blob/020f58abcdea65302225663130d08fd8f4dd755a/vllm/model_executor/layers/quantization/utils/marlin_utils_test.py
    )OptionalN)
ScalarType)GPTQ_MARLIN_TILEmarlin_permute_scalesmarlin_zero_points)get_pack_factorgptq_quantize_weightsquantize_weightssort_weightsc                   @   s   e Zd Zdd ZdS )MarlinWorkspacec                 C   s@   || dksJ d |||| | }tj|tjdd| _d S )Nr   z5out_features = {} is undivisible by min_thread_n = {}cudadtypedevice)formattorchzerosintscratch)selfout_featuresmin_thread_nmax_parallelmax_workspace_size r   Q/home/ubuntu/.local/lib/python3.10/site-packages/sglang/test/test_marlin_utils.py__init__   s   zMarlinWorkspace.__init__N)__name__
__module____qualname__r   r   r   r   r   r      s    r   c                 C   s   | j ||fks	J || dksJ d| d| || dks)J d| d| | || ||| |f} | d} | || || f} | d| fd d |f | j } | S )Nr   z	size_k = z	, tile = r            )shapereshapepermutenumel)q_wsize_ksize_npermtiler   r   r   marlin_permute_weights*   s     
&r.   c           	      C   s   t | |||} t|}| j}|   tj} tj| j	d | j	d | ftjd}t
|D ]}|| d d |d |f || > O }q.t|tj|}|S )Nr   r"   )r   )r.   r   r   cpunumpyastypenpuint32r   r%   ranger   
from_numpyint32to)	r)   r*   r+   num_bitsr,   pack_factororig_deviceq_packedir   r   r   marlin_weights9   s   $$r=   r8   c           	         s"  g }t dD ]N}g }|d }dD ]0}d|d  d|d  d d|d d  d|d d  d fD ]}|d| | d|   q0qt dD ] | fdd	|D  qEqt|}| dkrftg d
}n| dkrrtg d}ntd| |dt|fd d |f 	 }t
|}|S )N       )r   r"   r!   r"         c                    s   g | ]}|d    qS )   r   ).0pjr   r   
<listcomp>Z   s    z#get_weight_perm.<locals>.<listcomp>)r   r!   r?      r"   r#         r    znum_bits must be 4 or 8, got {}r$   )r4   appendextendr2   array	Exceptionr   r&   lenravelr   r5   )	r8   	perm_listr<   perm1colblockrowr,   
interleaver   rE   r   get_weight_permL   s0   

"
rW   w
quant_type
group_size	act_order	test_permc                 C   s   | j \}}|j}|dkr|}||ksJ t| ||||\}}	}
}}tjdtj| jd}|r5t|	|\}	}}t|}t	|	||||}t
|
|||}||||||g}tt|D ]}|| | j||< qV|S )Nr$   r   r   )r%   	size_bitsr   r   emptyr   r   r
   rW   r=   r   r4   rO   r7   )rX   rY   rZ   r[   r\   r*   r+   r8   w_refr)   sg_idx	rand_permsort_indicesweight_perm
marlin_q_wmarlin_sres_listr<   r   r   r   marlin_quantizej   s$   

rh   c                 C   s   | j \}}|dkr|}||ksJ || dksJ || }t| ||dd\}}}}	t|j}
t||||j|
}t||||}t|	|||j}||||g}tt|D ]}|| 	| j
||< qR|S )Nr$   r   T)zero_points)r%   r	   rW   r]   r=   r   r   r4   rO   r7   r   )rX   rY   rZ   r*   r+   
num_groupsr_   r)   r`   zprd   re   rf   	marlin_zprg   r<   r   r   r   awq_marlin_quantize   s   

rm   )N)__doc__typingr   r0   r2   r   sgl_kernel.scalar_typer   +sglang.srt.layers.quantization.marlin_utilsr   r   r   $sglang.srt.layers.quantization.utilsr   r   r	   r
   r   r.   r=   r   rW   Tensorboolrh   rm   r   r   r   r   <module>   s2    #
'