o
    
۾i                     @   s   d dl Zd dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZmZ G dd dZdS )    N)LogprobsMode)SamplingParams)apply_top_k_top_p)get_num_nans)apply_temperaturegumbel_sample)LogitBiasState)compute_topk_logprobs)apply_min_p)SamplerOutput)PenaltiesState)NO_LOGPROBSSamplingStatesc                   @   s   e Zd Z		d!dededejdedef
dd	Zd
edededdfddZ	dej
dejdejddfddZdej
dej
dejdejdej
dej
dej
defddZdej
dej
dejdej
dej
dej
deej
ej
f fdd ZdS )"Samplerraw_logprobs   max_num_reqs
vocab_sizedevicelogprobs_modenum_speculative_tokensc                 C   sT   |dvrt d| || _tj| _t||| _t|||| _t	||| _
|| _d S )N)processed_logprobsr   zUnsupported logprobs_mode: )NotImplementedErrorr   envsVLLM_COMPUTE_NANS_IN_LOGITScompute_nansr   sampling_statesr   penalties_stater   logit_bias_stater   )selfr   r   r   r   r    r    U/home/ubuntu/.local/lib/python3.10/site-packages/vllm/v1/worker/gpu/sample/sampler.py__init__   s   
zSampler.__init__req_idx
prompt_lensampling_paramsreturnNc                 C   s0   | j || | j|| | j||| d S N)r   add_requestr   r   )r   r#   r$   r%   r    r    r!   r(   (   s   zSampler.add_requestprefill_token_idsprefill_lensprompt_lensc                 C   s(   | j   | j||| | j  d S r'   )r   apply_staged_writesr   r   )r   r)   r*   r+   r    r    r!   r,   /   s
   
zSampler.apply_staged_writeslogitsidx_mappingidx_mapping_npcu_num_logits_nppos	input_idsexpanded_local_posc                 C   s   | j rt|nd }| ||||||\}	}
| j|}|tkr@| jdkr&|
}|jd |jd k}|r6| nd }t	|||	|}nd }t
|	dd||d}|S )Nr   r   r   )sampled_token_idslogprobs_tensorsnum_nans)r   r   sampler   max_num_logprobsr   r   shapetolistr	   r   view)r   r-   r.   r/   r0   r1   r2   r3   r7   sampledprocessed_logitsr9   expanded_logitscu_num_logitsr6   sampler_outputr    r    r!   __call__;   s2   	

zSampler.__call__c                 C   s   t j|t jd|}| j|||| | j|||||| j t	||| j
jj | j
|}|r:t||| j
jj | j
|}|rI| j
jj| nd }	| j
|}
|
rZ| j
jj| nd }|s`|
rft||	|}t||| j
jj| j
jj|dd}||fS )N)dtypeF)r   )torch
empty_likefloat32copy_r   apply_logit_biasr   apply_penaltiesr   r   r   temperaturegpudo_min_pr
   min_pdo_top_ktop_kdo_top_ptop_pr   r   seeds)r   r-   r.   r/   r1   r2   r3   rL   rN   rO   rP   rQ   r=   r    r    r!   r8   h   s:   

zSampler.sample)r   r   )__name__
__module____qualname__intrD   r   r   r"   r   r(   Tensornpndarrayr,   r   rB   tupler8   r    r    r    r!   r      s    


	
-r   )numpyrX   rD   	vllm.envsr   vllm.config.modelr   vllm.sampling_paramsr   $vllm.v1.sample.ops.topk_topp_samplerr   !vllm.v1.worker.gpu.metrics.logitsr    vllm.v1.worker.gpu.sample.gumbelr   r   $vllm.v1.worker.gpu.sample.logit_biasr   !vllm.v1.worker.gpu.sample.logprobr	   vllm.v1.worker.gpu.sample.min_pr
    vllm.v1.worker.gpu.sample.outputr   #vllm.v1.worker.gpu.sample.penaltiesr    vllm.v1.worker.gpu.sample.statesr   r   r   r    r    r    r!   <module>   s   