o
    پi                     @  s   d dl mZ d dlZd dlZd dlmZmZ d dlZd dlm	Z	m
Z
mZmZ d dlmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZm Z m!Z! erZd dl"m#Z# e$e%Z&G dd dZ'd	d
 Z(dd Z)dS )    )annotationsN)TYPE_CHECKINGTuple)GPU_MEMORY_ALL_TYPESGPU_MEMORY_TYPE_CUDA_GRAPHGPU_MEMORY_TYPE_KV_CACHEGPU_MEMORY_TYPE_WEIGHTS)CheckWeightsReqInputCheckWeightsReqOutput!DestroyWeightsUpdateGroupReqInput"DestroyWeightsUpdateGroupReqOutputGetWeightsByNameReqInputGetWeightsByNameReqOutputInitWeightsUpdateGroupReqInputInitWeightsUpdateGroupReqOutputReleaseMemoryOccupationReqInput ReleaseMemoryOccupationReqOutputResumeMemoryOccupationReqInputResumeMemoryOccupationReqOutputUpdateWeightFromDiskReqInputUpdateWeightFromDiskReqOutput$UpdateWeightsFromDistributedReqInput%UpdateWeightsFromDistributedReqOutputUpdateWeightsFromIPCReqInputUpdateWeightsFromIPCReqOutputUpdateWeightsFromTensorReqInput UpdateWeightsFromTensorReqOutput)	Schedulerc                   @  s   e Zd Zd)ddZd*dd	Zd+ddZd,ddZd-ddZd.ddZd/ddZ	d0ddZ
d1dd Zd2d"d#Zd3d$d%Zd3d&d'Zd(S )4SchedulerUpdateWeightsMixinselfr   recv_reqr   c                 C  sF   | j |\}}|r|jr|  }|sJ dnt| t||dS )z)In-place update of the weights from disk.)Cache flush failed after updating weightsr   )	tp_workerupdate_weights_from_diskflush_cacheloggererrorr   r   r    successmessageflush_cache_success r+   f/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/managers/scheduler_update_weights_mixin.pyr#   .   s   
z4SchedulerUpdateWeightsMixin.update_weights_from_diskr   c                 C     | j |\}}t||S )z3Initialize the online model parameter update group.)r"   init_weights_update_groupr   r   r    r(   r)   r+   r+   r,   r.   ;      
z5SchedulerUpdateWeightsMixin.init_weights_update_groupr   c                 C  r-   )z0Destroy the online model parameter update group.)r"   destroy_weights_update_groupr   r/   r+   r+   r,   r1   B   r0   z8SchedulerUpdateWeightsMixin.destroy_weights_update_groupr   returnTuple[bool, str]c                 C  sD   | j |\}}|r|jr|  }|sJ dnt| t||S )z"Update the online model parameter.r!   )r"   update_weights_from_distributedr$   r%   r&   r   r'   r+   r+   r,   r4   I   s   

z;SchedulerUpdateWeightsMixin.update_weights_from_distributedr   c                 C  s^   | j p| j}||\}}|r|jr|  }|sJ dnt| tjj| j	d t
||S )z/Update the online model parameter from tensors.r!   group)draft_workerr"   update_weights_from_tensorr$   r%   r&   torchdistributedbarriertp_cpu_groupr   )r   r    workerr(   r)   r*   r+   r+   r,   r8   W   s   

z6SchedulerUpdateWeightsMixin.update_weights_from_tensorr   c                 C  sT   | j |\}}|r|jr|  }|sJ dnt| tjj| jd t	||S )zMUpdate the online model parameter from IPC for checkpoint-engine integration.r!   r5   )
r"   update_weights_from_ipcr$   r%   r&   r9   r:   r;   r<   r   r'   r+   r+   r,   r>   g   s   

z3SchedulerUpdateWeightsMixin.update_weights_from_ipcr   c                 C  s   | j |}t|S )N)r"   get_weights_by_namer   )r   r    	parameterr+   r+   r,   r?   u   s   z/SchedulerUpdateWeightsMixin.get_weights_by_namer   c                 C  s   |   sJ d|j}|d u st|dkrt}|D ]}| j| qt|v r0| jt | 	  t
|v rIt| jjj| _tj| j | jt
 t|v rS| jt t   t S )NzHrelease_memory_occupation should be called only when no ongoing request.r   )_is_no_requesttagslenr   offload_tagsaddr   memory_saver_adapterpauser$   r   _export_static_stater"   model_runnermodelstashed_model_static_stater9   r:   r;   r<   r   get_device_modulesynchronizer   r   r    rB   tagr+   r+   r,   release_memory_occupationy   s,   z5SchedulerUpdateWeightsMixin.release_memory_occupationr   c                 C  s   |j }|d u st|dkrt}|D ]}| j| qt|v r$| jt t|v r@| jt t	j
| j t| jjj| j | `t|v rJ| jt t S )Nr   )rB   rC   r   rD   remover   rF   resumer   r9   r:   r;   r<   _import_static_stater"   rI   rJ   rK   r   r   rN   r+   r+   r,   resume_memory_occupation   s$   z4SchedulerUpdateWeightsMixin.resume_memory_occupationr	   c              
   C  sl   z| j jj|jd tdddW S  ty5 } ztd|  t	  td| dW  Y d }~S d }~ww )N)actionTzSuccess.)r(   r)   zcheck_weights see error: F)
r"   rI   check_weightsrU   r
   	Exceptionr%   warning	traceback	print_exc)r   r    er+   r+   r,   rV      s   z)SchedulerUpdateWeightsMixin.check_weightsc                 C  sR   |d }| j j| | jd ur'|dd }|d usJ d| jj| d S d S )Nurl	draft_urlz6draft_url must be provided when draft model is enabled)r"   rI   save_remote_modelr7   get)r   paramsr\   r]   r+   r+   r,   r^      s   

z-SchedulerUpdateWeightsMixin.save_remote_modelc                 C  s$   | j jj|d |d |d d d S )Npathpatternmax_size)ra   rb   rc   )r"   rI   save_sharded_model)r   r`   r+   r+   r,   rd      s
   
z.SchedulerUpdateWeightsMixin.save_sharded_modelN)r   r   r    r   )r   r   r    r   )r   r   r    r   )r    r   r2   r3   )r   r   r    r   )r   r   r    r   )r   r   r    r   )r   r   r    r   )r   r   r    r   )r   r   r    r	   )r   r   )__name__
__module____qualname__r#   r.   r1   r4   r8   r>   r?   rP   rT   rV   r^   rd   r+   r+   r+   r,   r   ,   s    








!

	r   c                 C  s   t dd |  D dS )Nc                 S  s    g | ]\}}||   fqS r+   )detachclone).0namebufferr+   r+   r,   
<listcomp>   s    z(_export_static_state.<locals>.<listcomp>)buffersdictnamed_buffers)rJ   r+   r+   r,   rH      s
   rH   c                 C  s.   t |  }|d D ]
\}}||| d< q
d S )Nrn   .ro   )rJ   static_paramsself_named_buffersrk   tensorr+   r+   r,   rS      s   rS   )*
__future__r   loggingrY   typingr   r   r9   sglang.srt.constantsr   r   r   r   sglang.srt.managers.io_structr	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   sglang.srt.managers.schedulerr   	getLoggerre   r%   r   rH   rS   r+   r+   r+   r,   <module>   s    X
 (