o
    پi)                     @   s   d dl mZmZ d dlZd dlmZ d dlmZmZ e Z	e	r0d dl
mZ dd Zdefdd	Zdad
d ZdZdefddZdd Zdd ZdedefddZdeeef defddZdedefddZdd Zd d! ZdS )"    )CallableUnionN)
reductions)is_nputorch_releasec                  G      t | tt} tj|  S N)_modify_tuple_REDUCE_TENSOR_ARG_DEVICE_INDEXnpu_verl_to_sglangnpu_reductions_rebuild_npu_tensor_originalargs r   P/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/utils/patch_torch.py_rebuild_npu_tensor_modified      
r   devicec                 C   s   t d usJ dt S )NzKSGLANG_TP_RANK is not registered. Please call register_sgl_tp_rank() first.SGLANG_TP_RANKr   r   r   r   r      s   
r   c                   C   sZ   t sttdr	dS tjt_tjt_tt_tt_t	  dS tt
dr$dS t
jt
_tt
_dS )zTMonkey patching before Torch https://github.com/pytorch/pytorch/pull/149248 is fixed_reduce_tensor_originalNr   )_is_npuhasattrr   reduce_tensorr   rebuild_cuda_tensor_rebuild_cuda_tensor_original_reduce_tensor_modified_rebuild_cuda_tensor_modifiedinit_reductionsr   rebuild_npu_tensorr   r   r   r   r   r   monkey_patch_torch_reductions(   s   


r"      rankc                 C   s   | a d S r   r   )r$   r   r   r   register_sgl_tp_rankB   s   r%   c                  O   s(   t j| i |\}}t|tt}||fS r   )r   r   r	   r
   _device_to_uuid)r   kwargs	output_fnoutput_argsr   r   r   r   G   s
   r   c                  G   r   r   )r	   r
   _device_from_maybe_uuidr   r   r   r   r   r   r   O   r   r   returnc                 C   s   t tj| jS r   )strtorchcudaget_device_propertiesuuidr   r   r   r   r&   T   s   r&   device_maybe_uuidc                 C   sd   t | tr| S t | tr+ttj D ]}ttj|j| kr$|  S qt	d|  t	d| )NzInvalid device_uuid=z Unknown type: device_maybe_uuid=)

isinstanceintr,   ranger-   r.   device_countr/   r0   	Exception)r1   r   r   r   r   r*   X   s   

r*   indexmodifierc                 C   s.   g | d | || | | |d d  R S )N   r   )tr7   r8   r   r   r   r	   e   s   .r	   c                  C   s2   t dk rdd lm  m}  d| j_d| j_d S d S )N)      r   T)r   *torch._higher_order_ops.auto_functionalize_higher_order_opsauto_functionalizeauto_functionalized_v2
_cacheableauto_functionalized)afr   r   r   monkey_patch_torch_compilei   s
   rD   c                    s    fdd}|S )a  
    Decorator factory to conditionally register a fake for a custom op if it exists.
    Parses op_name (e.g., 'sgl_kernel::gptq_gemm'), checks if the op exists via hasattr
    on the namespace attribute of torch.ops. Registers the fake if present; otherwise,
    returns the function unchanged.
    Args:
        op_name (str): Full operator name (e.g., 'sgl_kernel::gptq_gemm').
    Returns:
        callable: Decorator for the fake function.
    Example:
        @register_fake_if_exists('sgl_kernel::gptq_gemm')
        def fake_gptq_gemm(a, b_q_weight, b_gptq_qzeros, b_gptq_scales, b_g_idx, use_shuffle, bit):
            return a.new_empty((a.shape[0], b_q_weight.shape[-1]), dtype=a.dtype)
    c                    s<     d\}}ttj|d }|rt||rtj |  | S )Nz::)splitgetattrr-   opsr   libraryregister_fake)func	namespacebare_opops_namespaceop_namer   r   	decorator   s
   z*register_fake_if_exists.<locals>.decoratorr   )rO   rP   r   rN   r   register_fake_if_existss   s   rQ   )typingr   r   r-   torch.multiprocessingr   sglang.srt.utils.commonr   r   r   torch_npu.multiprocessingr   r   r3   r   r   r"   r
   r%   r   r   r,   r&   r*   r	   rD   rQ   r   r   r   r   <module>   s(   
