o
    پij7                     @  s.  d dl mZ d dlZd dlmZmZ d dlmZmZm	Z	m
Z
 d dlZd dlmZ erDd dlmZmZ d dlmZ d dlmZ d d	lmZ G d
d deZejG dd dZ		dEdFddZ	dGdHddZdId!d"ZdJd#d$Z		%dKdLd*d+Z	%dMdNd,d-Zd.d/ Z dOd7d8Z!	 dPdQd;d<Z"	dGdRdCdDZ#dS )S    )annotationsN)Enumauto)TYPE_CHECKINGListOptionalUnion)envs)LogitsMetadataLogitsProcessorOutput)ScheduleBatch)EagleVerifyOutput)NgramVerifyInputc                   @  s   e Zd Ze Ze ZdS )LogprobStageN)__name__
__module____qualname__r   PREFILLDECODE r   r   S/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/layers/utils/logprob.pyr      s    
r   c                   @  sF   e Zd ZU ded< dZded< dZded< dZded< dZded< dS )	InputLogprobsResulttorch.Tensorinput_token_logprobsNzOptional[List]input_top_logprobs_valinput_top_logprobs_idxinput_token_ids_logprobs_valinput_token_ids_logprobs_idx)r   r   r   __annotations__r   r   r   r   r   r   r   r   r      s   
 r   last_logitsr   logits_metadatar
   top_pOptional[torch.Tensor]temperaturereturnc                 C  s~   |du r|j }|du r|j}|jr| | } |jr6|dk r6ddlm} tj| dd}~ |||}t	|S tj
jj| ddS )z
    compute logprobs for the output token from the given logits.

    Returns:
        torch.Tensor: logprobs from logits
    Ng      ?r   )top_p_normalize_probs_torchdim)r!   r#   temp_scaled_logprobstop_p_normalized_logprobsanysglang.srt.layers.samplerr%   torchsoftmaxlognn
functionallog_softmax)r   r    r!   r#   r%   probsr   r   r   &compute_temp_top_p_normalized_logprobs    s   

r4   logprobstop_logprobs_nums	List[int]stageextend_logprob_pruned_lens_cpuOptional[List[int]]c           	        s  t |}| j|dd\     g }g }|tjkrAt|D ]\}|| d   | | d   q"||fS dt||D ]7\}|dkr[|g  |g  qH|fddt|D  | fddt|D  |7 qH||fS )Nr&   r'   r   c                   s    g | ]}|  d   qS Nr   .0j)kptvaluesr   r   
<listcomp>\        z(get_top_logprobs_raw.<locals>.<listcomp>c                   s    g | ]} |  d  qS r;   r   r<   )indicesr?   r@   r   r   rB   ]   rC   )	maxtopktolistr   r   	enumerateappendziprange)	r5   r6   r8   r9   max_ktop_logprobs_valtop_logprobs_idxi
pruned_lenr   )rD   r?   r@   rA   r   get_top_logprobs_rawB   s*   


  
rQ   all_logprobsc                 C  s   t | |jtj|jdS )N)r8   r9   )rQ   r6   r   r   r9   )rR   r    r   r   r   get_top_logprobs_prefillc   s   rS   c                 C     t | |tjdS N)r8   )rQ   r   r   )r5   r6   r   r   r   get_top_logprobsn   s   rV   Ftoken_ids_logprobsList[Optional[List[int]]]delay_cpu_copyboolc                   s   g g }}|t jkr6t|D ]#\}  d u r!|g  |g  q|| | f   |  q||fS d}t||D ]:\ }	|	dkrP|g  |g  q=| |||	  f }
||r`|
n|
  | fddt|	D  ||	7 }q=||fS )Nr   c                   s   g | ]} qS r   r   )r=   _	token_idsr   r   rB      s    z.get_token_ids_logprobs_raw.<locals>.<listcomp>)r   r   rH   rI   rG   rJ   rK   )r5   rW   r8   r9   rY   valsidxsrO   r@   rP   pos_logprobsr   r\   r   get_token_ids_logprobs_rawu   s,   





ra   c                 C  s   t | |jtj|j|dS )N)r8   r9   rY   )ra   rW   r   r   r9   )rR   r    rY   r   r   r   get_token_ids_logprobs_prefill   s   rb   c                 C  rT   rU   )ra   r   r   )r5   rW   r   r   r   get_token_ids_logprobs   s   rc   
top_k_numspruned_lensr   r   r   split_pruned_lenintc                 C  sN  | j d dkr	dS t|j}| j|dd}|j }	|j }
d}d}tt||D ]z\}\}}|dkr9||8 }nd}|dkrJ|	g  |	g  q*g }g }t
|D ]*}|| t|	krb|| } n|	|	||  d|  |	|
||  d|  qRt|dkr|dkr|d | |d | n
|	| |	| ||7 }q*|S )aj  Get top-k logprobs for each sequence in the chunk.

    Args:
        logprobs: Log probabilities tensor of shape [seq_len, vocab_size]
        logits_metadata: Metadata containing top-k and pruned length info
        top_k_nums: List of top-k numbers for each sequence
        pruned_lens: List of pruned lengths for each sequence
        input_top_logprobs_val: List to store top-k logprob values
        input_top_logprobs_idx: List to store top-k token indices
        split_pruned_len: Length of pruned tokens from previous chunk

    Returns:
        int: Number of remaining tokens to process in next chunk
    r      r'   Nr&   )shaperE   r6   rF   rA   rG   rD   rH   rJ   rI   rK   lenextend)r5   r    rd   re   r   r   rf   rL   retrA   rD   r@   next_split_pruned_lennr?   rP   validxr>   r   r   r   get_top_logprobs_chunk   s@   








rq   r   r   c                 C  s  | j d dkr	dS d}d}tt||D ]w\}\}	}
|dkr#|
|8 }
nd}|
dkr4|g  |g  qg }g }t|
D ]'}|| | j d krM|| } n|	durc|| || |	f   ||	 q<t|dkr|dkr}|d | |d | n
|| || ||
7 }q|S )a|  Get token_ids logprobs for each sequence in the chunk.

    Args:
        logprobs: Log probabilities tensor of shape [seq_len, vocab_size]
        logits_metadata: Metadata containing token IDs and pruned length info
        token_ids_logprobs: List of token IDs for each sequence
        pruned_lens: List of pruned lengths for each sequence
        input_token_ids_logprobs_val: List to store token logprob values
        input_token_ids_logprobs_idx: List to store token indices
        split_pruned_len: Length of pruned tokens from previous chunk

    Returns:
        int: Number of remaining tokens to process in next chunk
    r   Nr&   )ri   rH   rJ   rI   rK   rG   rj   rk   )r5   rW   re   r   r   rf   r@   rm   rn   r]   rP   ro   rp   r>   r   r   r   get_token_ids_logprobs_chunk   sF   






rr   batchr   res*Union[EagleVerifyOutput, NgramVerifyInput]logits_outputOptional[LogitsProcessorOutput]c                 C  s  |d u r|j }t|dr|j}n|j }| j}| j}|j}t|t|j	ks)J | j
j}| jj}|||  }tj rGtjjj|j	dd}	ntjjj|j	| dd}	|j}
dd |D }dd t||D }dd t||D }tdd	 |D }td
d	 |D }|rt|	|\|_|_|rt|	|\|_|_|	tjt|
| j
jd|
f |_ d}|j  }|
 }|j}|j}t| j!|ddD ]=\}}t"|D ]4}|j#r|j$%||  |j&%||  |j'dkr|sJ d|j(%||  |j)%||  |d7 }qqd S )Naccept_length_per_req_cpur&   r'   c                 S  s   g | ]}|d  qS )rh   r   )r=   acceptr   r   r   rB   \  s    z3add_output_logprobs_for_spec_v1.<locals>.<listcomp>c                 S  "   g | ]\}}t |D ]}|q
qS r   rK   )r=   num
num_tokensr[   r   r   r   rB   _      c                 S  rz   r   r{   )r=   r]   r}   r[   r   r   r   rB   e  r~   c                 s  s    | ]}|d kV  qdS )r   Nr   r=   xr   r   r   	<genexpr>l      z2add_output_logprobs_for_spec_v1.<locals>.<genexpr>c                 s  s    | ]}|d uV  qd S r;   r   r   r   r   r   r   m  r   )devicer   T)strictz0Inconsistent state: should_top_logprobs is Falserh   )*rv   hasattrrx   accept_lengthrG   r6   rW   accepted_indicesrj   next_token_logitssampling_infotemperatures	spec_infodraft_token_numr	   SGLANG_RETURN_ORIGINAL_LOGPROBgetr-   r0   r1   r2   verified_idrJ   r+   rV   next_token_top_logprobs_valnext_token_top_logprobs_idxrc   !next_token_token_ids_logprobs_val!next_token_token_ids_logprobs_idxaranger   next_token_logprobsreqsrK   return_logproboutput_token_logprobs_valrI   output_token_logprobs_idxtop_logprobs_numoutput_top_logprobs_valoutput_top_logprobs_idx)rs   rt   rv   rx   r6   rW   r   r   num_draft_tokensr5   batch_next_token_idsnum_tokens_per_req$top_logprobs_nums_repeat_interleaved%token_ids_logprobs_repeat_interleavedshould_top_logprobsshould_token_ids_logprobsr@   r   verified_idstoken_top_logprobs_valtoken_top_logprobs_idxreqr}   r[   r   r   r   add_output_logprobs_for_spec_v1:  s   






r   )NN)
r   r   r    r
   r!   r"   r#   r"   r$   r   r;   )r5   r   r6   r7   r8   r   r9   r:   )rR   r   r    r
   )r5   r   r6   r7   )NF)
r5   r   rW   rX   r8   r   r9   r:   rY   rZ   )F)r    r
   )r5   r   r    r
   rd   r7   re   r7   r   r   r   r   rf   rg   r$   rg   )r   )r5   r   rW   r7   re   r7   r   r   r   r   rf   rg   )rs   r   rt   ru   rv   rw   )$
__future__r   dataclassesenumr   r   typingr   r   r   r   r-   sglang.srt.environr	   "sglang.srt.layers.logits_processorr
   r   "sglang.srt.managers.schedule_batchr   !sglang.srt.speculative.eagle_infor   !sglang.srt.speculative.ngram_infor   r   	dataclassr   r4   rQ   rS   rV   ra   rb   rc   rq   rr   r   r   r   r   r   <module>   s@    &
!
!
PL