o
    
۾i                     @   sl   d dl Zd dlZd dlmZmZ d dlmZ d dlm	Z	 d dl
mZ G dd dZejdejfd	d
ZdS )    N)tltriton)cdiv)async_copy_to_gpu)
InputBatchc                
   @   sJ   e Zd ZdededejfddZdejdede	e
 d	ejd
df
ddZdS )StructuredOutputsWorkermax_num_logits
vocab_sizedevicec                 C   sH   t j|t j|d| _t j|t|dft j|d| _|| _t j | _	d S )N)dtyper
       )
torchzerosint32logits_indicesr   grammar_bitmaskr
   cudaStreamcopy_stream)selfr   r	   r
    r   Y/home/ubuntu/.local/lib/python3.10/site-packages/vllm/v1/worker/gpu/structured_outputs.py__init__   s   z StructuredOutputsWorker.__init__logitsinput_batchgrammar_req_idsr   returnNc              	   C   s  |sd S t j| j t|| jd |jd  d}W d    n1 s$w   Y  g }|j}|j	 }dd t
|D }	|D ]}
|	|
 }|| }||d  }|t|| q>t j| j  t j|t jddd}| jd t| j|dd	}W d    n1 sw   Y  t j }|| j |jd }|t|ksJ |jd
 }d}|t||f}t| ||d|||d||d | j| d S )Nr   )outc                 S   s   i | ]\}}||qS r   r   ).0ireq_idr   r   r   
<dictcomp>+   s    zAStructuredOutputsWorker.apply_grammar_bitmask.<locals>.<dictcomp>   cpuT)r   r
   
pin_memory)non_blockingi    )
BLOCK_SIZE)r   r   streamr   r   r   shapereq_idscu_num_logits_nptolist	enumerateextendrangetensorr   r   lencopy_current_streamwait_streamr   r   _apply_grammar_bitmask_kernelstride)r   r   r   r   r   bitmaskmappingr*   cu_num_logitsreq_id_to_idxgrammar_req_idreq_idxlogits_start_idxlogits_end_idxr   r3   	num_masksr	   r'   gridr   r   r   apply_grammar_bitmask   sR   


	

z-StructuredOutputsWorker.apply_grammar_bitmask)__name__
__module____qualname__intr   r
   r   Tensorr   liststrnpndarrayrA   r   r   r   r   r      s    
r   r'   c                 C   s   t d}t || }t d}	|	| d t d|d  }
t j|||  |
 |
|k d}|d d d f t ddd d d f ? d@ dk}||}|	| t d| }t j| ||  | td |||k @ d d S )Nr   r"   r   )maskinf)r   
program_idloadarangereshapestorefloat)
logits_ptrlogits_stridelogits_indices_ptrbitmask_ptrbitmask_strider	   r'   bitmask_idx
logits_idxblock_idbitmask_offsetpacked_bitmaskr7   block_offsetr   r   r   r5   U   s    


0


r5   )numpyrI   r   vllm.triton_utilsr   r   vllm.utils.math_utilsr   vllm.v1.worker.gpu.buffer_utilsr   vllm.v1.worker.gpu.input_batchr   r   jit	constexprr5   r   r   r   r   <module>   s   I