o
    
۾i                     @   s   d dl Z d dlmZmZ d dlmZ ejdejdejfddZejdejfdd	Z	d
e j
de j
de j
fddZ	dd
e j
dede j
dee dB def
ddZdS )    N)tltriton)LogprobsTensors
BLOCK_SIZEPADDED_TOPKc                 C   sv  t d}|||  }	td}
td||D ]!}|t d| }t j|	| ||k tdd}t t ||
}
q|
t j	}
d}td||D ]3}|t d| }t j|	| ||k dd}|t j	}t 
||
 }t ||k |d}|t |7 }qEt |}t d|}||k }t j|||  | |dd}t j|	| |d}|t j	}||
 | }t j| ||  | ||d d S )Nr   -infmaskotherg        )r	   )r   
program_idfloatrangearangeloadmaxmaximumtofloat32expwheresumlogstore)
output_ptr
logits_ptrlogits_stridetopk_ids_ptrtopk
vocab_sizer   r   req_idxrow_ptrmax_valiblocklogitsseelsek_offsetk_masktopk_idso r,   U/home/ubuntu/.local/lib/python3.10/site-packages/vllm/v1/worker/gpu/sample/logprob.py_topk_log_softmax_kernel
   s0   

 r.   c                 C   s   t d}|||  }t || }t || }	d}
td||D ]%}|t d| }t j|| ||k tdd}|
t ||	kt j7 }
q!t 	| | |
 d S )Nr   r   r   )
r   r   r   r   r   r   r   r   int32r   )r   r   r   token_ids_ptrr   r   r   r    token_idxnr"   r#   r$   r,   r,   r-   _ranks_kernel4   s   
	r4   r$   	token_idsreturnc                 C   sb   | j \}}|tj}|j d }| j||ftjd}t|f || | d|||dt	|d |S )N   )dtyper   i   )r   r   )
shaper   torchint64	new_emptyr   r.   strider   next_power_of_2)r$   r5   
batch_sizer   num_logprobslogprobsr,   r,   r-   compute_token_logprobsK   s   


rB   r@   sampled_token_idscu_num_logitsc           
      C   s   |dksJ | j \}}|d}|dkr&tj| |ddj}tj||fdd}t| |}tj|tj| j	d}	t
|f |	| | d||dd t|||	|dS )	Nr   )dimr7   )r8   devicei    )r   )logprob_token_idsrA   selected_token_rankscu_num_generated_tokens)r9   	unsqueezer:   r   indicescatrB   emptyr;   rG   r4   r=   r   )
r$   r@   rC   rD   r?   r   rH   topk_indicesrA   token_ranksr,   r,   r-   compute_topk_logprobs_   s,   


rQ   )N)r:   vllm.triton_utilsr   r   vllm.v1.outputsr   jit	constexprr.   r4   TensorrB   intlistrQ   r,   r,   r,   r-   <module>   s@   )

