o
    پi                     @  s   d dl mZ d dlmZmZ d dlZd dlmZmZm	Z	 er*d dl
mZ d dlmZ dZed-ddZd.ddZ				d/d0d+d,ZdS )1    )annotations)TYPE_CHECKINGOptionalN)
cache_onceload_jitmake_cpp_args)
ScalarType)Module   dtypetorch.dtypereturnr	   c                 C  s0   t | }tdg|R dgdd| dfgdS )Ngptq_marlinzgemm/marlin/gptq_marlin.cuhgptq_marlin_gemmzgptq_marlin_gemm<>)
cuda_filescuda_wrappers)r   r   )r   args r   Q/home/ubuntu/.local/lib/python3.10/site-packages/sglang/jit_kernel/gptq_marlin.py_jit_gptq_marlin_module   s   r   tOptional[torch.Tensor]devicetorch.devicetorch.Tensorc                 C  s   | d ur| S t jd||dS )Nr   )r   r   )torchempty)r   r   r   r   r   r   	_or_empty   s   r   TFac
b_q_weightb_scalesglobal_scaleb_zerosg_idxperm	workspaceb_q_typer   size_mintsize_nsize_k	is_k_fullbooluse_atomic_adduse_fp32_reduceis_zp_floatc                 C  sL  | j }|d u rtj|
|f| j|d}|
dkr|S |d uo+|d uo+| dko+| dk}|rNtj|j}t|
d d d d}tj|| t	 tj
|d}n	tjdtj
|d}|retj|
|f| j|d}n	tjd| j|d}t||| j}t||tj}t||tj}t||tj}t| j}|| |||||||||||	j|||| |S )N)r   r   r         @   )r   r   r   r   numelcudaget_device_propertiesmulti_processor_countmin_MAX_THREAD_Nfloat32r   int32r   r   id)r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r+   r,   r-   r/   r0   r1   r   has_act_ordersmsmax_m_blockc_tmpa_tmpglobal_scale_t	b_zeros_tg_idx_tperm_tmoduler   r   r   r   "   s`   



r   )r   r   r   r	   )r   r   r   r   r   r   r   r   )TFFF)$r   r   r    r   r!   r   r"   r   r#   r   r$   r   r%   r   r&   r   r'   r   r(   r   r)   r*   r+   r*   r,   r*   r-   r.   r/   r.   r0   r.   r1   r.   r   r   )
__future__r   typingr   r   r   sglang.jit_kernel.utilsr   r   r   sgl_kernel.scalar_typer   tvm_ffi.moduler	   r:   r   r   r   r   r   r   r   <module>   s     

