o
    پiIE                     @   sn  d Z ddlZddlZddlZddlZddlmZmZ ddlm	Z	m
Z
mZmZmZ ddlZddlZddlZddlZddlmZ ddlmZmZmZmZmZ ddlmZ ddlmZ dd	lmZm Z  dd
l!m"Z"m#Z#m$Z$m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+m,Z,m-Z- e.e/Z0e1ej23ddZ4ej5G dd dZ6G dd deZ7G dd deZ8e7fde defddZ9dS )?DetokenizerManager is a process that detokenizes the token ids.    N)OrderedDictdefaultdict)DictListOptionalTupleUnion)envs)BatchEmbeddingOutputBatchMultimodalDecodeReqBatchStrOutputBatchTokenIDOutputFreezeGCReq)MultiHttpWorkerDetokenizerMixin)start_cpu_monitor_thread)PortArgs
ServerArgs)configure_logger	freeze_gcget_zmq_socketkill_itself_when_parent_died)get_tokenizer)Watchdog)TypeBasedDispatcherfind_printable_textget_exception_tracebackSGLANG_DETOKENIZER_MAX_STATESi   c                   @   sB   e Zd ZU dZeed< ee ed< eed< eed< dZeed< dS )	DecodeStatusz)Store the status of incremental decoding.decoded_text
decode_idssurr_offsetread_offsetr   sent_offsetN)	__name__
__module____qualname____doc__str__annotations__r   intr#    r+   r+   [/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/managers/detokenizer_manager.pyr   >   s   
 r   c                	   @   s*  e Zd ZdZdedefddZedee	 de
fdd	Zdefd
dZdefddZdefddZdd Zdd Zdee	ee f dede
fddZdefddZdeee  dee
 dee
 dee	 fdd Zdefd!d"Zdedee	d#B  d#B fd$d%Zdefd&d'Zdefd(d)Zd*e fd+d,Z!d#S )-DetokenizerManagerr   server_args	port_argsc                 C   s8   |  | | | | | |jrtd |   d S )Ndetokenizer)init_ipc_channelsinit_tokenizerinit_running_statusenable_metricsr   init_request_dispatcher)selfr.   r/   r+   r+   r,   __init__M   s   


zDetokenizerManager.__init__ridreturnc                 C   s   t | to	| dS )NHEALTH_CHECK)
isinstancer(   
startswith)r8   r+   r+   r,   is_health_check_requesta   s   z*DetokenizerManager.is_health_check_requestc                 C   s6   t d}t|t j|jd| _t|t j|jd| _d S )N   TF)	zmqContextr   PULLdetokenizer_ipc_namerecv_from_schedulerPUSHtokenizer_ipc_namesend_to_tokenizer)r6   r/   contextr+   r+   r,   r1   e   s   

z$DetokenizerManager.init_ipc_channelsc                 C   s.   |j rd | _d S t|j|j|j|jd| _d S )N)tokenizer_modetrust_remote_coderevision)skip_tokenizer_init	tokenizerr   tokenizer_pathrH   rI   rJ   r6   r.   r+   r+   r,   r2   n   s   
z!DetokenizerManager.init_tokenizerc                 C   sF   t td| _d| _|jdk| _|j| _tjd|j	dt
j d| _d S )N)capacityFzgpt-ossr-   T)
debug_namewatchdog_timeoutsofttest_stuck_time)LimitedCapacityDictDETOKENIZER_MAX_STATESdecode_statusis_dummytool_call_parseris_tool_call_parser_gpt_ossdisable_tokenizer_batch_decoder   createsoft_watchdog_timeoutr
   SGLANG_TEST_STUCK_DETOKENIZERgetsoft_watchdogrN   r+   r+   r,   r3   y   s   z&DetokenizerManager.init_running_statusc                 C   s.   t t| jft| jft| jft| jfg| _	d S N)
r   r   handle_batch_embedding_outr   handle_batch_token_id_outr   handle_multimodal_decode_reqr   handle_freeze_gc_req_request_dispatcherr6   r+   r+   r,   r5      s   
z*DetokenizerManager.init_request_dispatcherc                 C   s`   	 | j   | j }W d   n1 sw   Y  | |}|dur*| j| | j   q)z$The event loop that handles requestsTN)r_   disablerC   
recv_pyobjre   rF   
send_pyobjfeed)r6   recv_objoutputr+   r+   r,   
event_loop   s   

zDetokenizerManager.event_looprl   finished_reasonno_stop_trimc                 C   s   |s|s|S | dd }|s|S t|tr+t|tr+||}|dkr)|d | S |S t|trNt|trN|d dkr@| jr@|S t|dksHJ |d d S |S )NmatchediL r   )r^   r;   r(   findr*   listrY   len)r6   rl   rn   ro   rp   posr+   r+   r,   trim_matched_stop   s   
z$DetokenizerManager.trim_matched_stoprk   c                 C   s   |S r`   r+   r6   rk   r+   r+   r,   ra      s   z-DetokenizerManager.handle_batch_embedding_outids_list	skip_list
space_listc                    s   | j dusJ |d |d  t fdd|D r/tfdd|D r/| j j dS tt}tt||D ]\}\}}|||f | q:dgt }|	 D ]$\\}}}	| j jfdd	|	D ||d}
t|	|
D ]\}}|||< qpqU|S )
zSBatch decode with grouping by (skip_special_tokens, spaces_between_special_tokens).Nr   c                 3       | ]}| kV  qd S r`   r+   ).0s)
first_skipr+   r,   	<genexpr>   s    z;DetokenizerManager._grouped_batch_decode.<locals>.<genexpr>c                 3   r{   r`   r+   )r|   sp)first_spacer+   r,   r      s    
skip_special_tokensspaces_between_special_tokens c                    s   g | ]} | qS r+   r+   )r|   idx)rx   r+   r,   
<listcomp>   s    z<DetokenizerManager._grouped_batch_decode.<locals>.<listcomp>)
rL   allbatch_decoder   rs   	enumeratezipappendrt   items)r6   rx   ry   rz   groupsr   skipspaceresultsindicesdecodedtextr+   )r~   r   rx   r,   _grouped_batch_decode   s0   "
z(DetokenizerManager._grouped_batch_decodec              
      s  t |j}g g }}t|D ]X}|j| }| jvr6t|j| |j| d|j| d} |s5| j|< n j| }|j	|j|  |
 |j|jd  |j| |j|  |
|j|j|j  q js js ||j|j} ||j|j}	n-dd |D }dd |D }	n fddt||j|jD } fddt||j|jD }	g }
t|D ]}|j| } |rt|j| |j| d|j| d}nz j| }W n ty   td| d	t d
w |	| t || d  }|j| d u r$t |dkr|ds|j| |_|j|_t |j|_d}nt|}n
| jv r. j|=  |j| |j| |j| }||jd  }t ||_|

| q|
S )Nr   )r   r    r!   r"   c                 S      g | ]}d qS )dogr+   r|   _r+   r+   r,   r         zDDetokenizerManager._decode_batch_token_id_output.<locals>.<listcomp>c                 S   r   )catr+   r   r+   r+   r,   r     r   c                    $   g | ]\}}} j j|||d qS r   rL   decode)r|   surrr   r   rf   r+   r,   r         c                    r   r   r   )r|   readr   r   rf   r+   r,   r     r   z$Decode status not found for request a  . It may be due to the request being evicted from the decode status due to memory pressure. Please increase the maximum number of requests by setting the SGLANG_DETOKENIZER_MAX_STATES environment variable to a bigger value than the default value. The current value is zJ. For more details, see: https://github.com/sgl-project/sglang/issues/2812u   �r   )rt   ridsrangerV   r   decoded_textsr    read_offsetsr=   extendr   rv   r!   finished_reasonsro   r"   rZ   rW   r   r   r   r   KeyErrorRuntimeErrorrU   endswithr   r   r#   )r6   rk   bsread_idssurr_idsir8   r}   
surr_texts
read_textsoutput_strsnew_text
output_strincremental_outputr+   rf   r,   _decode_batch_token_id_output   s   











	

z0DetokenizerManager._decode_batch_token_id_outputNc                 C   s"   d }|j d urdd |j D }|S )Nc                 S   s0   g | ]}|d urt |  dnd qS )Nzutf-8)pybase64	b64encodenumpytobytesr   )r|   routed_expertsr+   r+   r,   r   ^  s    z>DetokenizerManager._extract_routed_experts.<locals>.<listcomp>)r   )r6   rk   r   r+   r+   r,   _extract_routed_expertsY  s   
z*DetokenizerManager._extract_routed_expertsc                 C   sX  t |jdkr| |ng }| |}td(i d|jd|jd|jd|d|jd|jd|j	d	|j
d
|jd|jd|jd|jd|jd|jd|jd|jd|jd|jd|jd|jd|jd|jd|jd|jd|jd|jd|d|jdd dd d |jd!|jd"|j d#|j!d$|j"d%|j#d&|j$d'|j%S ))Nr   r   http_worker_ipcsr   r   
output_idsprompt_tokenscompletion_tokenscached_tokenscached_tokens_detailsspec_verify_ctspec_accepted_tokensspec_acceptance_histograminput_token_logprobs_valinput_token_logprobs_idxoutput_token_logprobs_valoutput_token_logprobs_idxinput_top_logprobs_valinput_top_logprobs_idxoutput_top_logprobs_valoutput_top_logprobs_idxinput_token_ids_logprobs_valinput_token_ids_logprobs_idxoutput_token_ids_logprobs_valoutput_token_ids_logprobs_idxoutput_token_entropy_valoutput_hidden_statesr   customized_infoplaceholder_tokens_idxplaceholder_tokens_valretraction_countstoken_stepsload
queue_timeforward_entry_timeprefill_launch_delayprefill_launch_latencyprefill_finished_tsr+   )&rt   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   )r6   rk   r   r   r+   r+   r,   rb   h  s   

	
 !"#$%&z,DetokenizerManager.handle_batch_token_id_outc                 C   s   t  r`   )NotImplementedErrorrw   r+   r+   r,   rc     s   z/DetokenizerManager.handle_multimodal_decode_reqrecv_reqc                 C   s   t d d S )NzDetokenizer Manager)r   )r6   r   r+   r+   r,   rd     s   z'DetokenizerManager.handle_freeze_gc_req)"r$   r%   r&   r'   r   r   r7   staticmethodr   r(   boolr=   r1   r2   r3   r5   rm   r	   r   r*   r   rv   r   ra   r   r   r   rs   r   rb   r   rc   r   rd   r+   r+   r+   r,   r-   J   sP    
	




(x
2r-   c                       s.   e Zd Zdef fddZ fddZ  ZS )rT   rO   c                    s   t  j|i | || _d S r`   )superr7   rO   )r6   rO   argskwargs	__class__r+   r,   r7     s   
zLimitedCapacityDict.__init__c                    s,   t | | jkr| jdd t || d S )NF)last)rt   rO   popitemr   __setitem__)r6   keyvaluer   r+   r,   r     s   zLimitedCapacityDict.__setitem__)r$   r%   r&   r*   r7   r   __classcell__r+   r+   r   r,   rT     s    rT   r.   r/   c                 C   s   t   td t|  t  }z|| |}| jdkr$|  W d S |  W d S  t	yI   t
 }td|  |  |tj Y d S w )Nzsglang::detokenizer   z%DetokenizerManager hit an exception: )r   setproctitler   psutilProcessparenttokenizer_worker_numrm   multi_http_worker_event_loop	Exceptionr   loggererrormaybe_clear_socket_mappingsend_signalsignalSIGQUIT)r.   r/   detokenizer_manager_classparent_processmanager	tracebackr+   r+   r,   run_detokenizer_process  s   


r   ):r'   dataclassesloggingosr   collectionsr   r   typingr   r   r   r   r	   r   r   r   r?   sglang.srt.environr
   sglang.srt.managers.io_structr   r   r   r   r   )sglang.srt.managers.multi_tokenizer_mixinr   sglang.srt.metrics.cpu_monitorr   sglang.srt.server_argsr   r   sglang.srt.utilsr   r   r   r   &sglang.srt.utils.hf_transformers_utilsr   sglang.srt.utils.watchdogr   sglang.utilsr   r   r   	getLoggerr$   r   r*   environr^   rU   	dataclassr   r-   rT   r   r+   r+   r+   r,   <module>   sD   
  Z