o
    پi%                     @   s  d Z ddlZddlmZmZmZmZ ddlZddlm	Z	 ddl
mZ defddZdefd	d
Zdejdededeejejf fddZe	ddZ		d!dededee dee dee	 f
ddZG dd dZ	d"dddddee dee deee  dedef
dd ZdS )#z!Utility methods for model layers.    N)AnyCallableListOptional)Library)current_platformreturnc                 C   D   t | dr| jS t | drtt| dd r|  S tdt|  )N
world_sizesizeUnsupported group type: )hasattrr
   callablegetattrr   
ValueErrortypegroup r   ^/home/ubuntu/.local/lib/python3.10/site-packages/sglang/multimodal_gen/runtime/layers/utils.pyget_group_size   
   
r   c                 C   r	   )Nrank_in_grouprankr   )r   r   r   r   r   r   r   r   r   r   r   get_group_rank   r   r   tokens
vocab_sizenum_seqsc                 C   sT   t j||d ft j| jd}|d| t |  |d d d |f }|dk}||fS )N   )dtypedevicer   )torchzeroslongr    scatter_add_	ones_like)r   r   r   
bin_countsmaskr   r   r   get_token_bin_counts_and_mask"   s   r(   sglangFRAGMENTop_nameop_funcmutates_args	fake_impl
target_libc           
   
   C   sF  ddl }|pt}z t|jdr|jjnd}t|j|r'tt|j|| r'W dS W n ttfy3   Y nw t|j	drC|j	j
||d}nddl}|jj
||}z$|| |  || |t sadnd |durq|| | W dS W dS  ty }	 zd	t|	v rd
t|	v rn|	W Y d}	~	dS d}	~	w ty }	 z|	d}	~	ww )a  
    `torch.library.custom_op` can have significant overhead because it
    needs to consider complicated dispatching logic. This function
    directly registers a custom op and dispatches it to the CUDA backend.
    See https://gist.github.com/youkaichao/ecbea9ec9fc79a45d2adce1784d7a9a5
    for more details.

    By default, the custom op is registered to the vLLM library. If you
    want to register it to a different library, you can pass the library
    object to the `target_lib` argument.

    IMPORTANT: the lifetime of the operator is tied to the lifetime of the
    library object. If you want to bind the operator to a different library,
    make sure the library object is alive when the operator is used.

    Note: This function will silently skip registration if the operator
    with the same name is already registered to avoid RuntimeError in
    multi-engine scenarios (e.g., VERL framework).
    r   Nnamer)   infer_schema)r-   CUDAPrivateUse1zTried to register an operatorzmultiple times)torch.library
sglang_libr   mr0   opsr   AttributeErrorRuntimeErrorlibraryr1   torch._custom_op.impl
_custom_opimpldefiner   is_npu_register_fakestr)
r+   r,   r-   r.   r/   r!   my_liblib_name
schema_strerrorr   r   r   direct_register_custom_op6   sH   rF   c                   @   sR   e Zd Zdededee fddZdd Zedefd	d
Z	edefddZ
dS )CustomOpWrapperr+   r,   r-   c                 K   s"   || _ || _|| _|| _d | _d S N)r+   r,   r-   extra_kwargs_impl)selfr+   r,   r-   rI   r   r   r   __init__   s
   
zCustomOpWrapper.__init__c                 O   s   | j |i |S rH   )	real_impl)rK   argskwargsr   r   r   __call__   s   zCustomOpWrapper.__call__r   c                 C   sX   | j d u r)ttjj| jst| j| j| j| j	d t
tjj| j| _ | j d us)J | j S )N)r+   r,   r-   r.   )rJ   r   r!   r7   r)   r+   rF   r,   r-   r.   r   )rK   r   r   r   rM      s   
zCustomOpWrapper.real_implc                    sL   dj v r
j d S dj v sJ tjj d   fdd}|S )Nr.   	out_shapec                     s    d u rd S j | i |}|  ztt tr!|j  W S |j  W S  tt	fy?   t
d  dj d dw )Nz)Cannot find output argument at position `z` for custom operator `z` with signature `z`.)bindapply_defaultsr!   
empty_like
isinstanceintrN   	arguments
IndexErrorKeyErrorr9   r+   )rN   rO   boundrQ   rK   	signaturer   r   r.      s(   
z,CustomOpWrapper.fake_impl.<locals>.fake_impl)rI   inspectr\   r,   )rK   r.   r   r[   r   r.      s   


zCustomOpWrapper.fake_implN)__name__
__module____qualname__rA   r   r   rL   rP   propertyrM   r.   r   r   r   r   rG      s    
rG   T)r+   r-   eagerfnrb   c          
         s   t  }t ddh}||ksJ d||  dv }dv }|r)|r)J d|s1|s1dd< dtdtf fdd	}	| durG|	| S |	S )
a*  
    A decorator to register a custom operator.

    Example usage:
    ```python
    # inplace operator, out_shape is None by default
    @register_custom_op(mutates_args=["x"])
    def add_1_(x: torch.Tensor) -> None:
        x.add_(1)

    # operator with output, out_shape indicates the position of output
    @register_custom_op(mutates_args=["x"], out_shape=0)
    def add(x: torch.Tensor, y: torch.Tensor) -> torch.Tensor:
        return x.add_(y)
    ```

    :param fn: The function to be registered as a custom operator.
               If None, return a decorator.
    :type fn: Callable
    :param op_name: The name of the operator. If None, use the function name
    :type op_name: Optional[str]
    :param mutates_args: A list of argument names that are mutated in-place.
    :type mutates_args: List[str]
    :param out_shape: The position (int for positional, str for keyword) of the output-shape tensor.
                      It is used to generate a fake implementation for torch.compile compatibility.
                      If the operator is inplace and has no output, set to None.
    :type out_shape: Optional[List[Union[int, str]]]
    :param fake_impl: A fake implementation for the operator.
                      Only one of `out_shape` or `fake_impl` should be provided.
    :type fake_impl: Optional[Callable]
    :param eager: Whether to register the operator eagerly.
                  If False, the registration will be deferred until the first call.
                  If you met any issue with torch.compile, try to set eager=True.
                  Currently, to avoid misuse, we set eager=True by default.
    :type eager: bool
    :return: The registered JIT custom operator, or a decorator.
             NOTE: the real register will occur at the first call of the function.
    :rtype: Callable
    rQ   r.   zUnexpected extra kwargs: z:Only one of `out_shape` or `fake_impl` should be provided.Nr,   r   c                    s.   t dp| j| p
g d} r|jS |S )N)r+   r,   r-   r   )rG   r^   rM   )r,   wrapperrb   rI   r-   r+   r   r   	decorator   s   z%register_custom_op.<locals>.decorator)setkeysr   )
rc   r+   r-   rb   rI   extra_kwarg_keysexpected_kwarg_keyshas_out_shapehas_fake_implrf   r   re   r   register_custom_op   s&   /
	rm   )NNrH   )__doc__r]   typingr   r   r   r   r!   r4   r   'sglang.multimodal_gen.runtime.platformsr   rV   r   r   Tensortupler(   r5   rA   rF   rG   boolrm   r   r   r   r   <module>   s`   		


JB
