o
    ÔÙ¾i
  ã                   @   sL   d dl mZmZ d dlmZmZmZmZmZm	Z	 d dl
Z
G dd„ deƒZdS )é    )ÚABCÚabstractmethod)ÚDictÚIteratorÚListÚOptionalÚTupleÚUnionNc                +   @   s&  e Zd ZdZe																		d,deeee ef  deeee	 e	f  deeeee
  ee
 f  deeee ef  deeee ef  d	eeee
 e
f  d
eeee
 e
f  deeeee
  ee
 f  deeeee  ee f  deeee ef  dee dee deeee ef  deeee
 e
f  deeee
 e
f  dee
 deeee ef  dee
 dee	ee	 f f&dd„ƒZedd„ ƒZe		d-deeeejf  dee defdd „ƒZd!edefd"d#„Zd!efd$d%„Zed&d'„ ƒZed(d)„ ƒZed*d+„ ƒZdS ).Ú
EngineBasezÅ
    Abstract base class for engine interfaces that support generation, weight updating, and memory control.
    This base class provides a unified API for both HTTP-based engines and engines.
    NFÚpromptÚsampling_paramsÚ	input_idsÚ
image_dataÚreturn_logprobÚlogprob_start_lenÚtop_logprobs_numÚtoken_ids_logprobÚ	lora_pathÚcustom_logit_processorÚreturn_hidden_statesÚstreamÚbootstrap_hostÚbootstrap_portÚbootstrap_roomÚdata_parallel_rankÚridÚpriorityÚreturnc                 C   ó   dS )z'Generate outputs based on given inputs.N© )Úselfr   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   úU/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/entrypoints/EngineBase.pyÚgenerate   s   zEngineBase.generatec                 C   r   )zFlush the cache of the engine.Nr   ©r    r   r   r!   Úflush_cache&   ó   zEngineBase.flush_cacheTÚnamed_tensorsÚload_formatr$   c                 C   r   )z0Update model weights with in-memory tensor data.Nr   )r    r&   r'   r$   r   r   r!   Úupdate_weights_from_tensor+   s   z%EngineBase.update_weights_from_tensorÚ	lora_namec                 C   r   )z8Load a new LoRA adapter without re-launching the engine.Nr   )r    r)   r   r   r   r!   Úload_lora_adapter5   ó   zEngineBase.load_lora_adapterc                 C   r   )z6Unload a LoRA adapter without re-launching the engine.Nr   )r    r)   r   r   r!   Úunload_lora_adapter9   r+   zEngineBase.unload_lora_adapterc                 C   r   )z*Release GPU memory occupation temporarily.Nr   r#   r   r   r!   Úrelease_memory_occupation=   r%   z$EngineBase.release_memory_occupationc                 C   r   )z:Resume GPU memory occupation which is previously released.Nr   r#   r   r   r!   Úresume_memory_occupationB   r%   z#EngineBase.resume_memory_occupationc                 C   r   )z+Shutdown the engine and clean up resources.Nr   r#   r   r   r!   ÚshutdownG   r%   zEngineBase.shutdown)NNNNFNNNNNNNNNNNNN)NT)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r   r   r	   r   Ústrr   ÚintÚboolr   r"   r$   r   ÚtorchÚTensorr(   r*   r,   r-   r.   r/   r   r   r   r!   r
      s     íþýüûúùø	÷
öõôóòñðïîíì
üþýü	

r
   )Úabcr   r   Útypingr   r   r   r   r   r	   r7   r
   r   r   r   r!   Ú<module>   s     