o
    پi*                     @   s   d dl Z d dlZd dlmZ d dlmZmZ d dlZd dlm	Z	 d dl
mZmZ er0d dlmZ eeZe ZdaG dd deZd	efd
dZdddZedd ZejfdejdedejfddZdd ZdS )    N)IntEnum)TYPE_CHECKINGCallable)envs)get_npu_memory_capacityis_npu)
ServerArgsc                   @   s   e Zd ZdZdZdZdS )NPUACLFormat      N)__name__
__module____qualname__ACL_FORMAT_UNDEFINEDACL_FORMAT_NDACL_FORMAT_FRACTAL_NZ r   r   Y/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/hardware_backend/npu/utils.pyr	      s    r	   fnc                    s   t   fdd}|S )Nc                     s2   t  ddrtd j d S d _ | i |S )N_has_been_calledFz$Function {} has already been called.T)getattrloggerdebugr   r   )argskwargsr   r   r   wrapper   s
   z_call_once.<locals>.wrapper)	functoolswraps)r   r   r   r   r   
_call_once   s   r    r   r   c                 C   s   d| _ d| _d| _| jdu rd| _t }|dkr2| jdu r d| _| jdu r1| jdk r.d| _n!d| _n|d	krO| jdu r>d
| _| jdu rO| jdk rLd| _nd| _d| _| j	rfd| _
|  rad| _dS d| _dS dS )z7
    Set default server arguments for NPU backend.
    ascendN   i   i         @   i   i       Tkernel_ascendpage_first_kv_splitpage_first_direct)attention_backendprefill_attention_backenddecode_attention_backend	page_sizer   chunked_prefill_sizecuda_graph_max_bstp_sizedisable_custom_all_reduceenable_hierarchical_cachehicache_io_backenduse_mla_backendhicache_mem_layout)r   npu_memr   r   r   set_default_server_args'   s8   








r7   c                  C   sP   t sJ dddl} ddl}ddlm} dd tj_d|jj	_
|jjdd	 dS )
zK
    Initialize NPU backend. This function should be called only once.
    z4NPU backend initialization called on non-NPU device.r   N)transfer_to_npuc                   S   s   dS )NFr   r   r   r   r   <lambda>c   s    z"init_npu_backend.<locals>.<lambda>TF)jit_compile)_is_npusgl_kernel_npu	torch_nputorch_npu.contribr8   torchcudais_availablenpuconfigallow_internal_formatset_compile_mode)r<   r=   r8   r   r   r   init_npu_backendV   s   
rF   tensor
acl_formatreturnc                 C   s,   t s| S tj r| S ddl}|| |jS )a  
    Cast a tensor to a specific NPU ACL format.

    Args:
        tensor (torch.Tensor): The input tensor.
        acl_format (NPUACLFormat): The target NPU ACL format.

    Returns:
        torch.Tensor: The tensor cast to the specified NPU ACL format.
    r   N)r;   r   $SGLANG_NPU_DISABLE_ACL_FORMAT_WEIGHTgetr=   npu_format_castvalue)rG   rH   r=   r   r   r   rL   i   s   
rL   c                   C   s   t d u r	tj a t S )N)indexer_weight_streamr?   rB   Streamr   r   r   r   get_indexer_weight_stream   s   
rP   )r   r   )r   loggingenumr   typingr   r   r?   sglang.srt.environr   sglang.srt.utilsr   r   sglang.srt.server_argsr   	getLoggerr   r   r;   rN   r	   r    r7   rF   r   TensorrL   rP   r   r   r   r   <module>   s4    

/

