o
    
۾i<                     @   s   d dl Z d dlmZmZ ejdejfddZde jde jde jd	dfd
dZejdejdejfddZ	de jde jde jde jde jde
d	e jfddZdS )    N)tltriton
BLOCK_SIZEc                 C   s   t d}t || }t || t j}|dks|dkr!d S t d}	|	| t d| }
|
|k }t j| ||  |
 |d}|t j}|| }t j| ||  |
 ||d d S )Nr           g      ?   )mask)r   
program_idloadtofloat32arangestore)
logits_ptrlogits_strideidx_mapping_ptrtemperature_ptr
vocab_sizer   	batch_idxreq_state_idxtemperature	block_idxblockr   logits r   T/home/ubuntu/.local/lib/python3.10/site-packages/vllm/v1/worker/gpu/sample/gumbel.py_temperature_kernel   s   
	
 r   r   idx_mappingr   returnc                 C   s@   | j \}}d}t||}t||f | | d||||d d S )Ni    r   )r   )shaper   cdivr   stride)r   r   r   num_reqsr   r   
num_blocksr   r   r   apply_temperature"   s   


r#   APPLY_TEMPERATUREc                 C   sj  t d}t || }t d}|| t d| }||
k }t j|||  | |tdd}|t j}t |	| t j}|dkrt || }t || }t ||}t ||t j	}t 
t 
|d  d  }|t j}|r||| }t ||| td}t j|dd}|| | }t j|dd}t | ||  | | t |||  | | d S )Nr   r   z-inf)r   otherr   g#B;)axis)r   r   r	   r   floatr
   r   randintrandfloat64logwhereargmaxmaxr   )local_argmax_ptrlocal_argmax_stridelocal_max_ptrlocal_max_strider   r   r   	seeds_ptrpos_ptrtemp_ptrr   r   r$   r   r   r   r   r   r   tempseedposgumbel_seedrgumbel_noiseidxtoken_idvaluer   r   r   _gumbel_sample_kernel4   s6   

r?   r7   r8   c                 C   s   | j \}}d}t||}	tj||	tj| jd}
tj||	tj| jd}t||	f |
|
	d||	d| | 	d|||||||d |j
ddd}|
jd|dd}|S )	Ni   )dtypedevicer   )r   r$   T)dimkeepdim)rC   index)r   r   r   torchemptyint64rA   r   r?   r    r-   gatherview)r   r   r   r7   r8   r#   r!   r   r   r"   local_argmax	local_maxmax_block_idxsampledr   r   r   gumbel_samplem   sB   

rO   )rF   vllm.triton_utilsr   r   jit	constexprr   Tensorr#   r?   boolrO   r   r   r   r   <module>   sH   
8