o
    پi                     @   s  d dl Z d dlmZ d dlmZmZmZ d dlZd dlm	Z	 d dl
mZmZmZ d dlmZ d dlmZ e eZerEd dlZd dlmZ ed	Z		
d"deejejf deejejf dejdejdedee defddZdeejejf deejejf dejdejfddZdeejejf deejejf dejfddZdedefddZedd Zdeejejf fd d!Z dS )#    N)contextmanager)AnyOptionalTuple)compile_utils)DEEPGEMM_BLACKWELLDEEPGEMM_SCALE_UE8M0ENABLE_JIT_DEEPGEMM)
ServerArgs)get_bool_env_var)get_mn_major_tma_aligned_tensorSGLANG_DEEPGEMM_SANITY_CHECK   lhsrhsoutmasked_m
expected_moverlap_argsmax_block_nc              
   C   s   | d j \}}}	|d j \}}
}tjj}t|  t| t||
|	||D t|d ur.|jnd ( tj	| ||||fi |d urGt
d||jdni W  d    W  d    S 1 s\w   Y  W d    d S 1 slw   Y  d S )Nr   T)enable_overlapr   signal)shaper   DeepGemmKernelTypeGROUPED_GEMM_NT_F8F8BF16_MASKED_sanity_check_inputdeep_gemm_execution_hookconfigure_deep_gemm_num_smsnum_sms	deep_gemmfp8_m_grouped_gemm_nt_maskeddictr   )r   r   r   r   r   r   r   
num_groups_knkernel_type r'   b/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/layers/deep_gemm_wrapper/entrypoint.pygrouped_gemm_nt_f8f8bf16_masked   s<   	
"r)   	m_indicesc           
      C   s~   | d j \}}|d j \}}}tjj}	t|  t| t|||||	 t| ||| W d    d S 1 s8w   Y  d S )Nr   )r   r   r   GROUPED_GEMM_NT_F8F8BF16_CONTIGr   r   r    m_grouped_fp8_gemm_nt_contiguous)
r   r   r   r*   mr$   r"   r%   r#   r&   r'   r'   r(   grouped_gemm_nt_f8f8bf16_contigC   s   "r.   c           	      C   s~   | d j \}}|d j \}}d}tjj}t|  t| t||||| t| || W d    d S 1 s8w   Y  d S )Nr      )r   r   r   GEMM_NT_F8F8BF16r   r   r   fp8_gemm_nt)	r   r   r   r-   r$   r%   r#   r"   r&   r'   r'   r(   gemm_nt_f8f8bf16T   s   "r2   gpu_idserver_argsc                 C   s   t | | d S N)r   update_deep_gemm_config)r3   r4   r'   r'   r(   r6   i   s   r6   c              	   c   sJ    | d u r
d V  d S t  }t |  zd V  W t | d S t | w r5   )r   get_num_smsset_num_sms)r   original_num_smsr'   r'   r(   r   m   s   

r   x_fp8c                 C   sZ   t sd S | \}}|jtjkrd S ddlm} ||}t||ks+J d|d|d S )Nr   )ceil_to_ue8m0zx_scale=z x_scale_ceil=)_SANITY_CHECKdtypetorchint(sglang.srt.layers.quantization.fp8_utilsr;   all)r:   xx_scaler;   x_scale_ceilr'   r'   r(   r   z   s   &r   )Nr   )!logging
contextlibr   typingr   r   r   r>   #sglang.srt.layers.deep_gemm_wrapperr   .sglang.srt.layers.deep_gemm_wrapper.configurerr   r   r	   sglang.srt.server_argsr
   sglang.srt.utilsr   	getLogger__name__loggerr   deep_gemm.utils.layoutr   r<   Tensorr?   r)   r.   r2   r6   r   r   r'   r'   r'   r(   <module>   sd    


)


