o
    پi                     @   s   d dl mZmZmZ d dlZd dlZd dlm  m	Z
 d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ G dd	 d	eZeZdS )
    )ListTupleUnionN)DllmAlgorithm)
DllmConfig)LogitsProcessorOutput)ForwardBatch)ModelRunnerc                	       sR   e Zd Zdef fddZdededeee	e
jf ee
j ef fddZ  ZS )	LowConfidenceconfigc                    s    t  | |jdd| _d S )N	thresholdgffffff?)super__init__algorithm_configgetr   )selfr   	__class__ \/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/dllm/algorithm/low_confidence.pyr      s   zLowConfidence.__init__model_runnerforward_batchreturnc                    sZ  |j }g |j| jk}t| dkr)|j|d d}|j|j}}g  | |fS t	|D ]'}|| j
 }	|	| j
 }
|j|	|
 }|| jk}| j
t|  }| q-t	| j
D ]}|j| jk}t| dkrm n|j|d d}|j|j}}||jjd | j
 ksJ t	|D ]w}|| j
 }|| j
 }|j||f }|| jk}t| dkrq|j||f }tj|dd}ttjtj|dddt|ddd}t|||}t||tj }|| jk}|  dkrtj|dd\}}d||< || ||< qqZ|j|d d}|j|j}}t|j|df  fd	d
t	|D }|||fS )Nr   )pp_proxy_tensors)dim)r   index   )kTc                    s    g | ]} || d f qS )Nr   ).0inext_token_ids
start_listr   r   
<listcomp>a   s    z%LowConfidence.run.<locals>.<listcomp>)
batch_size	input_idsmask_idtorchsumitemforwardlogits_outputcan_run_graphrange
block_sizeappendshapefull_logitsargmaxsqueezegatherFsoftmax	unsqueezewherenpinfr   topkreshape)r   r   r   r%   
mask_indexoutr,   can_run_cuda_graphblock_idblock_start	block_endblock_input_idsblock_mask_indexstart_batch_idcurr_block_startcurr_block_endcurr_logitsxp
confidencetransfer_indexselect_indexnext_token_ids_listr   r!   r   run   st   








!
zLowConfidence.run)__name__
__module____qualname__r   r   r	   r   r   r   r   r(   Tensorr   boolrR   __classcell__r   r   r   r   r
      s    r
   )typingr   r   r   numpyr:   r(   torch.nn.functionalnn
functionalr6   sglang.srt.dllm.algorithm.baser   sglang.srt.dllm.configr   "sglang.srt.layers.logits_processorr   ,sglang.srt.model_executor.forward_batch_infor   &sglang.srt.model_executor.model_runnerr	   r
   	Algorithmr   r   r   r   <module>   s    Z