o
    پi                     @   s   d dl Z d dlmZmZ d dlmZ d dlmZmZm	Z	m
Z
mZmZ d dlZd dlZd dlZer6d dlmZ edddefdd	ZG d
d deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZdS )    N)ABCabstractmethod)	lru_cache)TYPE_CHECKINGAnyDictListOptionalSet)Req)maxsizejson_strc                 C   s   t | }tt|d S )ztDeserialize a json string to a Callable object.
    This function is cached to avoid redundant deserialization.
    callable)orjsonloadsdillbytesfromhex)r   data r   ^/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/sampling/custom_logit_processor.py_cache_from_str   s   
r   c                
   @   sf   e Zd ZdZe	ddejdeee	e
ef   dejfddZede
fdd	Zed
e
fddZdS )CustomLogitProcessorz+Abstract base class for callable functions.Nlogitscustom_param_listreturnc                 C   s   t )zDefine the callable behavior.)NotImplementedErrorselfr   r   r   r   r   __call__   s   zCustomLogitProcessor.__call__c                 C   s   t dt|  iS )z<Serialize the callable function to a JSON-compatible string.r   )jsondumpsr   hex)clsr   r   r   to_str#   s   zCustomLogitProcessor.to_strr   c                 C   s
   t | S )z3Deserialize a callable function from a JSON string.)r   )r#   r   r   r   r   from_str(   s   
zCustomLogitProcessor.from_strN)__name__
__module____qualname____doc__r   torchTensorr	   r   r   strr   r   classmethodr$   r%   r   r   r   r   r      s    r   c                	   @   s:   e Zd Z	ddejdeeeee	f   dejfddZ
dS )DisallowedTokensLogitsProcessorNr   r   r   c                    sF   |d d  t  fdd|D sJ d|td |d f< |S )Nr   	token_idsc                 3   s    | ]	} |d  kV  qdS )r0   Nr   ).0cdisallowed_token_idsr   r   	<genexpr>5   s    
z;DisallowedTokensLogitsProcessor.__call__.<locals>.<genexpr>zcustom_param_list=inf.)allfloatr   r   r3   r   r   /   s   
z(DisallowedTokensLogitsProcessor.__call__r&   )r'   r(   r)   r+   r,   r	   r   r   r-   r   r   r   r   r   r   r/   .   s    r/   c                   @   sD   e Zd ZU dZeed< eed< eed< deeee	f  fddZ
dS )	ThinkingBudgetLogitProcessorz7A logit processor that controls the length of thinking.THINKING_START_TOKEN_IDTHINKING_END_TOKEN_IDNEW_LINE_TOKEN_IDr   c           
      C   s  |d u s|s|S t |D ]w\}}|d u rq|d}|d u s't|tr'|dk r(q|d}g |j|j}| j|vs?| j|v r@q|| j}t	|| d }	|	|k rSq|jr^|jd | j
krqtd ||d d f< d||| j
f< qtd ||d d f< d||| jf< q|S )Nthinking_budgetr   __req__   r6   g        )	enumerateget
isinstanceintorigin_input_ids
output_idsr:   r;   indexlenr<   r8   )
r   r   r   i
param_dictr=   reqcur_idsstart_indexnum_tokens_after_startr   r   r   r   C   s6   



z%ThinkingBudgetLogitProcessor.__call__N)r'   r(   r)   r*   rD   __annotations__listdictr-   r   r   r   r   r   r   r9   <   s   
 r9   c                   @   6   e Zd ZU dZdZeed< dZeed< dZeed< dS )	#Glm4MoeThinkingBudgetLogitProcessorzjA logit processor that controls the length of thinking for GLM-4.5 / GLM-4.6 / GLM-4.5V / GLM-4.6V models.i6O r:   i7O r;      r<   N	r'   r(   r)   r*   r:   rD   rO   r;   r<   r   r   r   r   rS   s   
   
 rS   c                   @   rR   )	!Qwen3ThinkingBudgetLogitProcessorzHA logit processor that controls the length of thinking for Qwen3 models.isP r:   itP r;   rT   r<   NrU   r   r   r   r   rW   {   rV   rW   c                   @   rR   )	&DeepSeekR1ThinkingBudgetLogitProcessorzNA logit processor that controls the length of thinking for DeepSeek-R1 models.i r:   i r;      r<   NrU   r   r   r   r   rX      rV   rX   c                	   @   s>   e Zd ZdZ	ddejdeeee	e
f   dejfddZdS )	&DeepseekOCRNoRepeatNGramLogitProcessorzJBlock n-gram repetitions within a sliding window for DeepSeek-OCR outputs.Nr   r   r   c              
   C   s  |s|S t |D ]\}}|sq|d}|d u rqzt|dp!d}t|dp*d}W n ttfy8   Y qw |dksA|dkrBq|j|j }t||k rOqtdt|| }	t|| d }
|
|	kreq|dkrut	||d  d  }nt	 }t
 }t|	|
D ]}||||  }|dkst	|d d |kr||d  q|dpg }z	dd	 |D }W n ttfy   t
 }Y nw || |sqt|}td
 |||f< q|S )Nr>   
ngram_sizer   window_sizer?   r@   whitelist_token_idsc                 S   s   h | ]}t |qS r   )rD   )r1   token_idr   r   r   	<setcomp>   s    zBDeepseekOCRNoRepeatNGramLogitProcessor.__call__.<locals>.<setcomp>r6   )rA   rB   rD   	TypeError
ValueErrorrE   rF   rH   maxtuplesetrangeadddifference_updaterP   r8   )r   r   r   	batch_idxparamsrK   r[   r\   sequencesearch_start
search_endcurrent_prefixbanned_tokensidxngramwhitelist_ids	whitelistindicesr   r   r   r      sX   


z/DeepseekOCRNoRepeatNGramLogitProcessor.__call__r&   )r'   r(   r)   r*   r+   r,   r	   r   r   r-   r   r   r   r   r   r   rZ      s    rZ   )r    abcr   r   	functoolsr   typingr   r   r   r   r	   r
   r   r   r+   "sglang.srt.managers.schedule_batchr   r-   r   r   r/   r9   rS   rW   rX   rZ   r   r   r   r   <module>   s$     7	