o
    پi>                     @  s   d dl mZ d dlZd dlZd dlmZmZmZmZm	Z	m
Z
mZ d dlZd dlm  m  mZ d dlmZ d dlmZ d dlmZ erJd dlmZ eeZejG dd	 d	ZdddZdS )    )annotationsN)TYPE_CHECKINGAnyCallableDictListOptionalTuple)CustomLogitProcessor)	TOP_K_ALL)get_global_server_args)ScheduleBatchc                   @  sR  e Zd ZU ded< ded< ded< ded< ded< ded< ded	< ded
< ded< dZded< dZded< dZded< dZded< dZded< dZ	ded< dZ
ded< dZded< dZded< dZded < dZded!< edFd$d%ZdFd&d'ZdGd*d+Zd,d- Zd.d/ Zd0d1 ZdHd3d4ZdId8d9ZdId:d;ZedJd@dAZdGdBdCZdDdE ZdS )KSamplingBatchInfotorch.Tensortemperaturestop_pstop_ksmin_psboolis_all_greedyneed_top_p_samplingneed_top_k_samplingneed_min_p_samplingint
vocab_sizeNzOptional[List]grammarsOptional[torch.Tensor]
vocab_maskz6Optional[Callable[[torch.Tensor, torch.Tensor], None]]apply_mask_funcz1Optional[penaltylib.BatchedPenalizerOrchestrator]penalizer_orchestratoracc_linear_penaltiesFhas_custom_logit_processorz(Optional[List[Optional[Dict[str, Any]]]]custom_params>Optional[Dict[int, Tuple[CustomLogitProcessor, torch.Tensor]]]custom_logit_processorsampling_seedcudastrdevice
logit_biasbatchr   c                   s  t  }|j}|j|j tjdd D tj ddd}tjdd D tj d}tjdd D tj d}tjdd D tj d}|rXtjd	d D tj	 dnd }	d }
t
d
d D rtjt| d}
tD ]\}}|jjd ur|jj D ]\}}||
|t|f< qqs|jot
dd D }|ri }tD ]\}}|jd u rq|j}||vrg ||< || | q fdd| D }dd D }nd }d }tj||tjtjtjhd}| d&i d|d|d|d|d|	dtdd D dt
dd D dt
dd D dt
dd D d|d |d!|d"|d#|d$ d%|
}||| |S )'Nc                 S     g | ]}|j jqS  )sampling_paramstemperature.0rr,   r,   [/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/sampling/sampling_batch_info.py
<listcomp>M       z9SamplingBatchInfo.from_schedule_batch.<locals>.<listcomp>dtyper(      c                 S  r+   r,   r-   top_pr/   r,   r,   r2   r3   R   r4   c                 S  r+   r,   r-   top_kr/   r,   r,   r2   r3   U   r4   c                 S  r+   r,   r-   min_pr/   r,   r,   r2   r3   X   r4   c                 S  s$   g | ]}|j jd ur|j jndqS )N*   )r-   r%   r/   r,   r,   r2   r3   \   s    c                 s  s    | ]	}|j jd uV  qd S N)r-   r)   r/   r,   r,   r2   	<genexpr>l       z8SamplingBatchInfo.from_schedule_batch.<locals>.<genexpr>)r(   c                 s  s    | ]}|j V  qd S r@   )r$   r/   r,   r,   r2   rA   v   s    c              
     sN   i | ]#\}}t |t|tjttjd dt|dj	 ddfqS ))r6   r   T)non_blocking)
hashr
   from_strtorchzeroslenr   scatter_tensorto)r0   processor_strtrue_indicesr(   reqsr,   r2   
<dictcomp>   s    	z9SamplingBatchInfo.from_schedule_batch.<locals>.<dictcomp>c                 S  r+   r,   )r-   r"   r/   r,   r,   r2   r3      r4   )r   r*   
penalizersr   r   r   r   r%   r   c                 s  s    | ]	}|j jd kV  qdS )r8   Nr;   r/   r,   r,   r2   rA      rB   r   c                 s  s    | ]	}|j jd kV  qdS )g      ?Nr9   r/   r,   r,   r2   rA      rB   r   c                 s  s    | ]	}|j jtkV  qd S r@   )r-   r<   r   r/   r,   r,   r2   rA      rB   r   c                 s  s    | ]	}|j jd kV  qdS )r   Nr=   r/   r,   r,   r2   rA      rB   r   r   r!   r"   r$   r(   r)   r,   )r   enable_deterministic_inferencerO   r(   rF   rJ   floatviewint32int64anyrG   rH   	enumerater-   r)   itemsr   enable_custom_logit_processorr$   append
penaltylibBatchedPenalizerOrchestratorBatchedFrequencyPenalizerBatchedMinNewTokensPenalizerBatchedPresencePenalizeralladjusted_from_schedule_batch)clsr*   r   global_server_argsenable_deterministicr   r   r   r   r%   r)   ir1   keyvaluer!   processor_dictrL   merged_custom_logit_processorr"   r   retr,   rN   r2   from_schedule_batchE   s   
		
	
z%SamplingBatchInfo.from_schedule_batchc                 C     d S r@   r,   )selfr*   r   r,   r,   r2   rb         z.SamplingBatchInfo.adjusted_from_schedule_batchother'SamplingBatchInfo'c                 C  rm   r@   r,   )rn   rp   r,   r,   r2   adjusted_merge_batch   ro   z&SamplingBatchInfo.adjusted_merge_batchc                 C  s
   t | jS r@   )rH   r   rn   r,   r,   r2   __len__   s   
zSamplingBatchInfo.__len__c                 C  s   | j sd | _d | _d S tdd | j D }|j| jt| j| jd| _|j	| _t
| j D ]\}}|r@|js@| s@|| j| q,|| j| j| _d S )Nc                 s  s    | ]}|r|V  qd S r@   r,   )r0   grammarr,   r,   r2   rA      s    z<SamplingBatchInfo.update_regex_vocab_mask.<locals>.<genexpr>)r   
batch_sizer(   )r   r   r   nextallocate_vocab_maskr   rH   r   r(   apply_vocab_maskrX   finishedis_terminatedfill_vocab_maskmove_vocab_mask)rn   first_grammarrf   ru   r,   r,   r2   update_regex_vocab_mask   s"   z)SamplingBatchInfo.update_regex_vocab_maskc                 C  sH   | j jrtjt| j| jftj| jjd| _	| j 
| j	 d S d | _	d S )Nr5   )r   is_requiredrF   rG   rH   r   r   float32r(   r    applyrs   r,   r,   r2   update_penalties   s   
z"SamplingBatchInfo.update_penaltieslogitsc                 C  sh   | j d ur|| j  | jr| jjr| j| | jd ur%| j|| jd | jd ur2|| j d S d S )N)r   r   )r    add_r   r   r   r   r   r)   )rn   r   r,   r,   r2   apply_logits_bias   s   


z#SamplingBatchInfo.apply_logits_biaskeep_indices	List[int]keep_indices_devicec                 C  sj   | j | | jr| || dD ]}t| |d }|d ur%t| |||  q| jd ur3| j| | _d S d S )Nr   r   r   r   r%   )r   filterr!   $_filter_batch_custom_logit_processorgetattrsetattrr)   )rn   r   r   itemrh   r,   r,   r2   filter_batch   s   
zSamplingBatchInfo.filter_batchc                   sV    fddj  D _ fdd|D _tj dkr)d_ d_d_dS dS )z3Filter the custom logit processor and custom paramsc                   s0   i | ]\}\}}t |  r|||  fqS r,   )rF   rW   )r0   kpmask)r   r,   r2   rP     s    
zJSamplingBatchInfo._filter_batch_custom_logit_processor.<locals>.<dictcomp>c                   s   g | ]} j | qS r,   )r"   )r0   rf   rs   r,   r2   r3     s    zJSamplingBatchInfo._filter_batch_custom_logit_processor.<locals>.<listcomp>r   NF)r$   rY   r"   rH   r!   )rn   r   r   r,   )r   rn   r2   r     s   

z6SamplingBatchInfo._filter_batch_custom_logit_processorlhsrhsbs1bs2c                 C  s8  | d u r
|d u r
d S | pi |pi } }t |  t | }i }|D ]u}|| v r0| | d n|| d }|| v r@| | d ntj|tj|d}	||v rS|| d ntj|tj|d}
|t|	|
gf||< || d jd || ksJ d|| d jd  d||  d|	d|
d|d	|d
| d|q$|S )Nr   r8   r5   zThe batch size of merged mask (z>) does not match the sum of the batch sizes of the two masks (z)
left_mask=z
right_mask=z
bs1=z
bs2=z
lhs=z
rhs=)setkeysunionrF   rG   r   catshape)r   r   r   r   r(   r   merged_dictr   	processor	left_mask
right_maskr,   r,   r2   merge_custom_logit_processor"  sD    z.SamplingBatchInfo.merge_custom_logit_processorc                 C  s4  | j |j  | js|jr>t| j|jt| t|| j| _| jp'd gt|  | _|jp2d gt| |_| j	|j d| _t
| j|jt| t|| jd| _dD ]!}t| |d }t||d }|d urr|d urrt| |t||g qQ|  j|jM  _|  j|jO  _|  j|jO  _|  j|jO  _| | d S )NTg        r   )r   merger!   r   r   r$   rH   r(   r"   extendmerge_bias_tensorr)   r   r   rF   r   r   r   r   r   rr   )rn   rp   r   self_val	other_valr,   r,   r2   merge_batchJ  s8   
zSamplingBatchInfo.merge_batchc                 C  s   |    tj| d dS )N)r   )r   dataclassesreplacers   r,   r,   r2   copy_for_forward}  s   z"SamplingBatchInfo.copy_for_forward)r*   r   r   r   )rp   rq   )r   r   )r   r   r   r   )
r   r#   r   r#   r   r   r   r   r(   r'   )__name__
__module____qualname____annotations__r   r   r   r   r    r!   r"   r$   r%   r(   r)   classmethodrl   rb   rr   rt   r   r   r   r   r   staticmethodr   r   r   r,   r,   r,   r2   r      sH   
 

u




'3r   r   r   r   r   r   r   r(   r'   defaultrS   c                 C  s   | du r
|du r
dS | dur|durt | |gS | dur)| jdd | j}}n|jdd |j}}| du rGt j|g|R ||d|} |du rZt j|g|R ||d|}t | |gS )a  Merge two bias tensors for batch merging.

    Args:
        lhs: Left-hand side tensor
        rhs: Right-hand side tensor
        bs1: Batch size of left-hand side tensor
        bs2: Batch size of right-hand side tensor
        device: Device to place the merged tensor on
        default: Default value for missing tensor elements

    Returns:
        Merged tensor or None if both inputs are None
    Nr8   )r(   r6   )rF   r   r   r6   emptyfill_)r   r   r   r   r(   r   r   r6   r,   r,   r2   r     s   r   )r   r   r   r   r   r   r   r   r(   r'   r   rS   )
__future__r   r   loggingtypingr   r   r   r   r   r   r	   rF   sglang.srt.sampling.penaltylibsrtsamplingr\   *sglang.srt.sampling.custom_logit_processorr
   #sglang.srt.sampling.sampling_paramsr   sglang.srt.server_argsr   "sglang.srt.managers.schedule_batchr   	getLoggerr   logger	dataclassr   r   r,   r,   r,   r2   <module>   s"    $
  o