o
    پii                     @   s  U d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlmZ d dlmZmZmZ d dlmZmZmZ d dlmZmZmZ d dlZd dlZd dlZd dlmZ d dlm Z  d dl!m"  m#Z# d dl$m%Z%m&Z& e&e'Z(ed	Z)ej*ej+ej,d
Z-dZ.e/e0d< dZ1e/e0d< de/fddZ2ej3j4Z5da6dej3j7dB ddfddZ8e8ej3_4dej3j7dB fddZ9G dd de j:Z;G dd de j<Z=de>e) de>e) fddZ?de@de@de@fd d!ZAd"e/defd#d$ZBd%d& ZCd'eDe/e/f fd(d)ZEd*ed+e/eFB eB d,eGe d-eDe/ef def
d.d/ZHdeDe/ef fd0d1ZId]d2d3ZJde/fd4d5ZKG d6d7 d7ZLd]d8d9ZMeG d:d; d;ZNe
O ZPdeNfd<d=ZQ		d^d>ejRd?ejRd@ejRdB dAe dB fdBdCZSdejRfdDdEZT				d_dFeDe/ef dB dGe@dB dHe@dB dIe@dB deUeUeUejVdB    f
dJdKZWdLe@ddfdMdNZXedOdPdeYfdQdRZZedOdPdeYfdSdTZ[	Vd`deGeUejV eUejV f fdWdXZ\dYdZ Z]d[d\ Z^dS )a    N)Callable)	dataclassfieldsis_dataclass)	lru_cachepartialwraps)AnyTypeVarcast)	RemotePdb)MixedPrecisionPolicy)SortedHelpFormatterinit_loggerT)fp32fp16bf16"SGLANG_DIFFUSION_ATTENTION_BACKENDSTR_BACKEND_ENV_VAR!SGLANG_DIFFUSION_ATTENTION_CONFIGSTR_ATTN_CONFIG_ENV_VARreturnc                  C   s|   t j} | rtd|  t| S tjjdurd} ntjjdur!d} nttjdr0tjj	dur0d} nt
dtd|  t| S )	a  
    We either use the library file specified by the `VLLM_NCCL_SO_PATH`
    environment variable, or we find the library file brought by PyTorch.
    After importing `torch`, `libnccl.so.2`, `librccl.so.1` or `libmccl.so.2`
    can be found by `ctypes` automatically.
    zEFound nccl from environment variable SGLANG_DIFFUSION_NCCL_SO_PATH=%sNzlibnccl.so.2zlibrccl.so.1musazlibmccl.so.2z0NCCL only supports CUDA, ROCm and MUSA backends.zFound nccl from library %s)envsSGLANG_DIFFUSION_NCCL_SO_PATHloggerinfotorchversioncudahiphasattrr   
ValueErrorstr)so_file r&   O/home/ubuntu/.local/lib/python3.10/site-packages/sglang/multimodal_gen/utils.pyfind_nccl_library3   s    r(   streamc                 C   s   | a | d urt|  d S d S N)_current_streamprev_set_stream)r)   r&   r&   r'   _patched_set_streamT   s   r-   c                  C   s@   ddl m}  |  sdS tdu r|  rtj ntj atS )a  
    replace `torch.cuda.current_stream()` with `sglang.multimodal_gen.utils.current_stream()`.
    it turns out that `torch.cuda.current_stream()` is quite expensive,
    as it will construct a new stream object at each call.
    here we patch `torch.cuda.set_stream` to keep track of the current stream
    directly, so that we can avoid calling `torch.cuda.current_stream()`.

    the underlying hypothesis is that we do not call `torch._C._cuda_setStream`
    from C/C++ code.
    r   current_platformN)	'sglang.multimodal_gen.runtime.platformsr/   is_cuda_aliker+   is_rocmr   r    Streamcurrent_streamr.   r&   r&   r'   r4   ^   s   r4   c                       s(   e Zd Zd fdd	ZdddZ  ZS )	StoreBooleanFNc              	      s   t  j||dd|||d d S )N?T)option_stringsdestnargsconstdefaultrequiredhelp)super__init__)selfr7   r8   r;   r<   r=   	__class__r&   r'   r?      s   
zStoreBoolean.__init__c                 C   s   |d u rt || jd d S t|tr8| dkr!t || jd d S | dkr0t || jd d S td| dt || jt| d S )NTtruefalseFzInvalid boolean value: z. Expected 'true' or 'false'.)setattrr8   
isinstancer$   lowerr#   bool)r@   parser	namespacevaluesoption_stringr&   r&   r'   __call__   s   

zStoreBoolean.__call__)FFNr*   )__name__
__module____qualname__r?   rM   __classcell__r&   r&   rA   r'   r5   ~   s    r5   c                       sj   e Zd ZdZd fddZ	ddejf fddZdee	 dee	 fd	d
Z
de	dee	 fddZ  ZS )FlexibleArgumentParserz=ArgumentParser that allows both underscore and dash in names.r   Nc                    s&   d|vrt |d< t j|i | d S )Nformatter_class)r   r>   r?   r@   argskwargsrA   r&   r'   r?      s   zFlexibleArgumentParser.__init__c                    s  |d u rt jdd  }tdd |D r| |}g }|D ]a}|dr[d|v rI|dd\}}d|tdd  dd }|| d|  q|d|tdd  dd  q|dry|dkryt|d	kry|d ||d	d   q|| qt	 
||}t |_d
}|t|k r|| }|drd|v r|dd
 d	d  dd}|j| |d7 }n/|d	d  dd}|j| |d t|k r||d  ds|d	7 }n	|d7 }n|d7 }|t|k s|S )N   c                 s   s    | ]}| d V  qdS )--configN)
startswith).0argr&   r&   r'   	<genexpr>       z4FlexibleArgumentParser.parse_args.<locals>.<genexpr>--=_-z-O   r   )sysargvany_pull_args_from_configrY   splitlenreplaceappendr>   
parse_argsset	_providedadd)r@   rU   rJ   processed_argsr[   keyvalueirA   r&   r'   rk      sD   

$


"

z!FlexibleArgumentParser.parse_argsrU   c                 C   s6  d}d}t |D ]\}}|dr|dkrtd|}|}q|du r$|S |d| }d|v r?|ddd }||d d }n|t|d krKtd||d  }||d d }| |}	|d	 d
kr|dkrltd|d	 }
|d }|dd }|
|g|	 | | }|S |d	 }
|dd }|
g|	 | | }|S )a  Method to pull arguments specified in the config file
        into the command-line args variable.

        The arguments in config file will be inserted between
        the argument list.

        example:
        ```yaml
            port: 12323
            tensor-parallel-size: 4
        ```
        ```python
        $: vllm {serve,chat,complete} "facebook/opt-12B"             --config config.yaml -tp 2
        $: args = [
            "serve,chat,complete",
            "facebook/opt-12B",
            '--config', 'config.yaml',
            '-tp', '2'
        ]
        $: args = [
            "serve,chat,complete",
            "facebook/opt-12B",
            '--port', '12323',
            '--tp-size', '4',
            '-tp', '2'
            ]
        ```

        Please note how the config args are inserted after the sub command.
        this way the order of priorities is maintained when these are args
        parsed by super().
        NrX   z$More than one config file specified!r_   rW   zCNo config file specified! Please check your command-line arguments.rb   r   servezANo model_tag specified! Please check your command-line arguments.)	enumeraterY   r#   rg   rh   _load_config_file)r@   rU   index
config_argrr   r[   args_before_config	file_pathargs_after_configconfig_argscommand	model_tagother_args_beforer&   r&   r'   rf      sV   "

z-FlexibleArgumentParser._pull_args_from_configrz   c              
      s   | dd }|dvrtd|g i }zt|}t|}W d   n1 s)w   Y  W n tyC } ztd| |d}~ww dd | jD d	t	d
t
t	tf f fdd  d| S )a   Loads a yaml file and returns the key value pairs as a
        flattened list with argparse like pattern
        ```yaml
            port: 12323
            tensor-parallel-size: 4
            vae_config:
                load_encoder: false
                load_decoder: true
        ```
        returns:
            processed_args: list[str] = [
                '--port': '12323',
                '--tp-size': '4',
                '--vae-config.load-encoder': 'false',
                '--vae-config.load-decoder': 'true'
            ]
        .rs   )yamlymljsonzUConfig file must be of a yaml/yml/json type.                              %s suppliedNzOUnable to read the config file at %s.                 Make sure path is correctc                 S   s   g | ]
}t |tr|jqS r&   )rF   r5   r8   )rZ   actionr&   r&   r'   
<listcomp>[  s
    
z<FlexibleArgumentParser._load_config_file.<locals>.<listcomp>prefixdc                    s   |  D ]a\}}| r|  d| n|}t|tr3|vr3|r&d|  qd|  d qt|trLd|  |D ]	}t| qAqt|trW || qd|  t| qd S )Nr   r^   rD   )itemsrF   rH   rj   listr$   dict)r   r   rp   rq   full_keyitemprocess_dictro   store_boolean_argumentsr&   r'   r   _  s"   

z>FlexibleArgumentParser._load_config_file.<locals>.process_dict )rg   r#   openr   	safe_load	Exceptionr   error_actionsr$   r   r	   )r@   rz   	extensionconfigconfig_fileexr&   r   r'   rv   2  s6   
"
z(FlexibleArgumentParser._load_config_filer   NNN)rN   rO   rP   __doc__r?   argparse	Namespacerk   r   r$   rf   rv   rQ   r&   r&   rA   r'   rR      s    6XrR   clsc                    s>   | j dtfdd td	 fdd}t| d| | S )
a-  
    A replacement for `abc.ABC`.
    When we use `abc.ABC`, subclasses will fail to instantiate
    if they do not implement all abstract methods.
    Here, we only require `raise NotImplementedError` in the
    base class, and log a warning if the method is not implemented
    in the subclass.
    r@   c              	   S   s   g }t | D ].}|drqzt| |}t|r|j}W n	 ty%   Y qw t|}d|v r4|| q|rKd	|}d| d|  }t
| d S d S )Nr`   NotImplementedError,zMethods z not implemented in )dirrY   getattrcallable__func__AttributeErrorinspect	getsourcerj   joinr   warning)r@   unimplemented_methods	attr_nameattr	attr_funcsrcmethod_namesmsgr&   r&   r'   find_unimplemented_methods  s*   




zBwarn_for_unimplemented_methods.<locals>.find_unimplemented_methodsr   Nc                    s"   | g|R i |  |  d S r*   r&   rT   r   original_initr&   r'   wrapped_init  s   z4warn_for_unimplemented_methods.<locals>.wrapped_initr?   r   )r?   objectr   type__setattr__)r   r   r&   r   r'   warn_for_unimplemented_methodsx  s   
r   rq   	alignmentc                 C   s   t t| | | S )zalign height, width according to alignment

    Args:
        value (int): height or width
        alignment (int): target alignment factor

    Returns:
        int: the aligned value
    )intmathceil)rq   r   r&   r&   r'   align_to  s   
r   qualnamec                 C   s$   |  dd\}}t|}t||S )z8
    Resolve an object by its fully qualified name.
    r   rW   )rsplit	importlibimport_moduler   )r   module_nameobj_namemoduler&   r&   r'   resolve_obj_by_qualname  s   

r   c                  C   s   ddl m  m  m}  | S )aV  
    Historical comments:

    libnvml.so is the library behind nvidia-smi, and
    pynvml is a Python wrapper around it. We use it to get GPU
    status without initializing CUDA context in the current process.
    Historically, there are two packages that provide pynvml:
    - `nvidia-ml-py` (https://pypi.org/project/nvidia-ml-py/): The official
        wrapper. It is a dependency of sglang-diffusion, and is installed when users
        install sglang-diffusion. It provides a Python module named `pynvml`.
    - `pynvml` (https://pypi.org/project/pynvml/): An unofficial wrapper.
        Prior to version 12.0, it also provides a Python module `pynvml`,
        and therefore conflicts with the official one which is a standalone Python file.
        This causes errors when both of them are installed.
        Starting from version 12.0, it migrates to a new module
        named `pynvml_utils` to avoid the conflict.
    It is so confusing that many packages in the community use the
    unofficial one by mistake, and we have to handle this case.
    For example, `nvcr.io/nvidia/pytorch:24.12-py3` uses the unofficial
    one, and it will cause errors, see the issue
    https://github.com/vllm-project/vllm/issues/12847 for example.
    After all the troubles, we decide to copy the official `pynvml`
    module to our codebase, and use it directly.
    r   N)(sglang.multimodal_gen.third_party.pynvmlmultimodal_genthird_partypynvml)r   r&   r&   r'   import_pynvml  s   r   r   c                 C   sN   |   D ] \}}|tjv rtj| |krtd|tj| | |tj|< qd S )Nz5Overwriting environment variable %s from '%s' to '%s')r   osenvironr   r   )r   kvr&   r&   r'   update_environment_variables  s   r   objmethodrU   rV   c                 C   sn   t |trtt|| }n"t |tr+zt| |}W n ty*   td|ddw t|| }||i |S )a3  
    Run a method of an object with the given arguments and keyword arguments.
    If the method is string, it will be converted to a method using getattr.
    If the method is serialized bytes and will be deserialized using
    cloudpickle.
    If the method is a callable, it will be called directly.
    zMethod z is not implemented.N)	rF   bytesr   cloudpickleloadsr$   r   r   r   )r   r   rU   rV   funcr&   r&   r'   
run_method  s   




r   c                    s&   t  std fddt D S )NzExpected dataclass instancec                    s   i | ]
}|j t |j qS r&   )namer   )rZ   fr   r&   r'   
<dictcomp>  s    z"shallow_asdict.<locals>.<dictcomp>)r   	TypeErrorr   r   r&   r   r'   shallow_asdict  s   r   c                  C   sB   d} dd l }| dkrtd}|| tj d S td d S )NrW   r   Linuxz	libc.so.6z8kill_itself_when_parent_died is only supported in linux.)	platformsystemctypesCDLLprctlsignalSIGKILLr   r   )PR_SET_PDEATHSIGr   libcr&   r&   r'   kill_itself_when_parent_died  s   
r   c                  C   s&   t  \} }}dt| ||}|S )Nr   )rc   exc_infor   	tracebackformat_exception)etyperq   tberr_strr&   r&   r'   get_exception_traceback  s   r   c                   @   s4   e Zd Zdeeeef  fddZdefddZ	dS )TypeBasedDispatchermappingc                 C   s
   || _ d S r*   )_mapping)r@   r   r&   r&   r'   r?     s   
zTypeBasedDispatcher.__init__r   c                 C   s4   | j D ]\}}t||r||  S qtd| )NzInvalid object: )r   rF   r#   )r@   r   tyfnr&   r&   r'   rM     s
   
zTypeBasedDispatcher.__call__N)
rN   rO   rP   r   tupler   r   r?   r	   rM   r&   r&   r&   r'   r     s    r   c                  C   sn   t  t jt j%} | t jt jd | d |  d }td|d	  W d    d S 1 s0w   Y  d S )NrW   )	localhostr   r   )hostport)
socketAF_INETSOCK_STREAM
setsockopt
SOL_SOCKETSO_REUSEADDRbindgetsocknamer   	set_trace)sr   r&   r&   r'   remote_breakpoint&  s   
"r  c                   @   sf   e Zd ZU dZejdB ed< dZejdB ed< dZejdB ed< dZ	ejdB ed< dZ
edB ed< dS )MixedPrecisionStateNparam_dtypereduce_dtypeoutput_dtypecompute_dtype	mp_policy)rN   rO   rP   r  r   dtype__annotations__r  r  r  r	  r   r&   r&   r&   r'   r  .  s   
 r  c                   C   s   t tds	tdtttjS )z&Get the current mixed precision state.statezMixed precision state not set)r"   _mixed_precision_stater#   r   r  r  r&   r&   r&   r'   get_mixed_precision_state;  s   
r  r  r  r  r	  c                 C   s   t | |||d}|t_dS )zSet mixed precision policy globally.

    Args:
        param_dtype: Parameter dtype used for training
        reduce_dtype: Reduction dtype used for gradients
        output_dtype: Optional output dtype
    )r  r  r  r	  N)r  r  r  )r  r  r  r	  r  r&   r&   r'   set_mixed_precision_policyB  s   
r  c                  C   s   t tds	t S t } | jS )z:Get the current compute dtype from mixed precision policy.r  )r"   r  r   get_default_dtyper  r  )r  r&   r&   r'   get_compute_dtypeX  s   
r  mask_strategyt_maxl_maxh_maxc                    sr  | du r |durdur dusJ d fddt |D S dd | D }|du rUdu rU du rUtdd |D d }td	d |D d td
d |D d n|duradura duseJ d|} fddt |D }|  D ];\}}tt|d\}	}
}d|	  kr|k rn q{d|
  krk rn q{d|  krk rn q{|||	 |
 |< q{|S )aN  
    Convert a dictionary of mask indices to a 3D list of tensors.
    Args:
        mask_strategy: keys are "t_l_h", values are torch.Tensor masks.
        t_max, l_max, h_max: if provided (all three), force the output shape to (t_max, l_max, h_max).
                            If all three are None, infer shape from the data.
    NzBIf mask_strategy is None, you must provide t_max, l_max, and h_maxc                    "   g | ]} fd dt D qS )c                       g | ]}d d t  D qS )c                 S      g | ]}d qS r*   r&   rZ   r`   r&   r&   r'   r   t      9dict_to_3d_list.<locals>.<listcomp>.<listcomp>.<listcomp>ranger  )r  r&   r'   r   t      .dict_to_3d_list.<locals>.<listcomp>.<listcomp>r  r  )r  r  r&   r'   r   s  s    z#dict_to_3d_list.<locals>.<listcomp>c                 S   s    g | ]}t tt|d qS )r`   )r   mapr   rg   )rZ   rp   r&   r&   r'   r   x  s     c                 s   s    | ]\}}}|V  qd S r*   r&   )rZ   tr`   r&   r&   r'   r\   }  r]   z"dict_to_3d_list.<locals>.<genexpr>rW   c                 s   s    | ]\}}}|V  qd S r*   r&   )rZ   r`   lr&   r&   r'   r\   ~  r]   c                 s   s    | ]\}}}|V  qd S r*   r&   )rZ   r`   hr&   r&   r'   r\     r]   zfEither supply none of (t_max, l_max, h_max) to infer dimensions, or supply all three to fix the shape.c                    r  )c                    r  )c                 S   r  r*   r&   r  r&   r&   r'   r     r  r  r  r  )max_head_idxr&   r'   r     r  r  r  r  )r$  max_layer_idxr&   r'   r     s    r`   r   )r  maxr   r   r   rg   )r  r  r  r  indicesmax_timesteps_idxresultrp   rq   r!  r"  r#  r&   )r  r  r$  r%  r'   dict_to_3d_lista  s:   r*  seedc                 C   s   ddl m} ||  d S )Nr   r.   )r0   r/   seed_everything)r+  r/   r&   r&   r'   set_random_seed  s   r-  rW   )maxsizec                   C   s   t jdd uS )Nvsa)r   util	find_specr&   r&   r&   r'   is_vsa_available  s   r2  c                  C   s>   t jdd u r
dS z
dd l} | jdkW S  ty   Y dS w )Nz!kernel.csrc.attn.vmoba_attn.vmobaFr   z2.7.4)r   r0  r1  
flash_attn__version__r   )r3  r&   r&   r'   is_vmoba_available  s   r5  F皙?c           	      C   sl  t | tsJ dd | D }dd | D }|r|durt||ddD ]]\}}tjd||jd }||k rctjd	d
d|j|d|dddf 	 |dddf< t
|dddf |dddf< q"|dddf |dddf< |dddf |dddf< q"||fS t||ddD ]&\}}t
|dddf |dddf< t
|dddf |dddf< q||fS )a  
    Generate binary masks for Text-to-Image-to-Video (TI2V) tasks.

    Creates masks to control which frames should be preserved vs replaced.
    Primarily used to fix the first frame to the input image while generating other frames.

    Args:
        tensors: List of tensors with shape [C, T, H, W]
        zero: If True, set first frame (dim 1, index 0) to zero. Default: False
        generator: Optional random generator for stochastic masking
        p: Probability of applying special noise when generator is provided. Default: 0.2

    Returns:
        Tuple of two lists of tensors:
        - When zero=False: Both lists contain all-ones tensors
        - When zero=True (no generator): First frame set to 0, others to 1
        - When zero=True (with generator): First frame set to small random values with probability p

    Example:
        >>> latent = torch.randn(48, 69, 96, 160)  # [C, T, H, W]
        >>> _, mask = masks_like([latent], zero=True)
        >>> # mask[0][:, 0] == 0 (first frame)
        >>> # mask[0][:, 1:] == 1 (other frames)
        >>> blended = (1.0 - mask[0]) * image + mask[0] * latent
        >>> # Result: first frame = image, other frames = latent
    c                 S   "   g | ]}t j|j|j|jd qS )r
  devicer   onesshaper
  r9  rZ   ur&   r&   r'   r        " zmasks_like.<locals>.<listcomp>c                 S   r7  r8  r:  r=  r&   r&   r'   r     r?  NF)strictrW   )	generatorr9  g            ?)rW   )meanstdsizer9  rA  r   )rF   r   zipr   randr9  r   normal	expand_asexp
zeros_like)	tensorszerorA  pout1out2r>  r   
random_numr&   r&   r'   
masks_like  s<   $"$rR  c                 C   s   | | }|| d }|| }t || | }t || | | }	|| dkr2|	| dkr2||	 |ks4J ||	 }
t || | }t || | | }|| dkr\|| dkr\|| |ks^J || }t||
 |
| t|| || k rx||	fS ||fS )NrB  r   )r   r&  )wr#  dwdhexpected_arearatioowohow1oh1ratio1oh2ow2ratio2r&   r&   r'   best_output_size  s   (($r`  c                 C   s@   t | | }|| }t|d d }t|d d }||d fS )N    )r   sqrtround)target_arearW  widthheightr&   r&   r'   calculate_dimensions  s
   
rg  r   r   )NNNN)FNr6  )_r   r   r   importlib.utilr   r   r   r   r   rc   	threadingr   collections.abcr   dataclassesr   r   r   	functoolsr   r   r   typingr	   r
   r   r   r   r   
remote_pdbr   torch.distributed.fsdpr   sglang.multimodal_gen.envsr   r   1sglang.multimodal_gen.runtime.utils.logging_utilsr   r   rN   r   r   float32float16bfloat16PRECISION_TO_TYPEr   r$   r  r   r(   r    
set_streamr,   r+   r3   r-   r4   Actionr5   ArgumentParserrR   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r  localr  r  r
  r  r  r   Tensorr*  r-  rH   r2  r5  rR  r`  rg  r&   r&   r&   r'   <module>   s   
  ^+





	



=
B