o
    
۾i.!                     @   s   d dl mZ d dlZd dlmZmZ d dlmZ d dlm	Z	 d dl
mZmZ G dd dZejd	ejfd
dZejdejdejfddZejdd ZdS )    )IterableN)tltriton)cdiv)PAD_SLOT_ID)StagedWriteTensorUvaBackedTensorc                
   @   s   e Zd Zdee dedededejf
ddZdeej	 d	ej	fd
dZ
dedeee df ded	dfddZd"ddZdej	d	eej	df fddZded	eej	df fddZdej	dej	dej	d	ej	fddZded	ej	fd d!ZdS )#BlockTablesblock_sizesmax_num_reqsmax_num_batched_tokensmax_model_lendevicec           
      C   s"  || _ || _|| _|| _|| _t| j | _g | _t| jD ]}| j | }t	| j|}t
| j|ftj|d}	| j|	 q| dd | jD | _tjdd | jD tj| jd| _tj| j tj| jd| _t| j| jftjd| _dd | jD | _| | j| _tj| j| jtj| jd| _d S )Ndtyper   c                 S   s   g | ]}|j qS  )gpu.0br   r   R/home/ubuntu/.local/lib/python3.10/site-packages/vllm/v1/worker/gpu/block_table.py
<listcomp>)   s    z(BlockTables.__init__.<locals>.<listcomp>c                 S   s   g | ]}|j d qS )r   )r   strider   r   r   r   r   ,   s    )r   c                 S   s   g | ]}t |jqS r   )torch
zeros_liker   r   r   r   r   r   ;   s    )r
   r   r   r   r   lennum_kv_cache_groupsblock_tablesranger   r   r   int32append_make_ptr_tensorblock_table_ptrstensorint64block_table_stridesblock_sizes_tensorr   
num_blocksinput_block_tablesinput_block_table_ptrszerosslot_mappings)
selfr
   r   r   r   r   i
block_sizemax_num_blocksblock_tabler   r   r   __init__   sR   

zBlockTables.__init__xreturnc                 C   s   t jdd |D t j| jdS )Nc                 S   s   g | ]}|  qS r   )data_ptr)r   tr   r   r   r   J   s    z0BlockTables._make_ptr_tensor.<locals>.<listcomp>r   )r   r#   uint64r   )r,   r2   r   r   r   r!   G   s   zBlockTables._make_ptr_tensor	req_indexnew_block_ids.	overwriteNc                 C   s`   t | jD ](}|s| jj||f nd}|| }| j| ||| |t| | jj||f< qd S )Nr   )r   r   r'   npr   stage_writer   )r,   r7   r8   r9   r-   start	block_idsr   r   r   append_block_idsM   s   zBlockTables.append_block_idsc                 C   s"   | j D ]}|  q| j  d S N)r   apply_writer'   copy_to_uva)r,   r0   r   r   r   apply_staged_writesY   s   

zBlockTables.apply_staged_writesidx_mappingc              	      sV   |j d  t| j f || j| j| j| jj| jjddd t	 fdd| j
D S )Nr      )
BLOCK_SIZEc                 3       | ]	}|d   V  qd S r?   r   r   r0   num_reqsr   r   	<genexpr>m       z2BlockTables.gather_block_tables.<locals>.<genexpr>)shape_gather_block_tables_kernelr   r"   r)   r%   r'   r   r   tupler(   )r,   rC   r   rH   r   gather_block_tables`   s   
	zBlockTables.gather_block_tablesrI   c                    s   t  fdd| jD S )Nc                 3   rF   r?   r   rG   rH   r   r   rJ   p   rK   z5BlockTables.get_dummy_block_tables.<locals>.<genexpr>)rN   r(   )r,   rI   r   rH   r   get_dummy_block_tableso   s   z"BlockTables.get_dummy_block_tablesquery_start_loc	positionsc                 C   sn   |j d }|j d }| j}t||d f || j|||| j| j| j| j| jdt	dd | jd d d |f S )Nr      rD   )PAD_IDTRITON_BLOCK_SIZE)
rL   r   _compute_slot_mappings_kernelr   r"   r%   r&   r+   r   r   )r,   rC   rQ   rR   rI   
num_tokens
num_groupsr   r   r   compute_slot_mappingsr   s$   


z!BlockTables.compute_slot_mappingsrW   c                 C   s"   | j t | j d d d |f S r?   )r+   fill_r   )r,   rW   r   r   r   get_dummy_slot_mappings   s   z#BlockTables.get_dummy_slot_mappings)r3   N)__name__
__module____qualname__listintr   r   r1   r   Tensorr!   rN   boolr>   rB   rO   rP   rY   r[   r   r   r   r   r	      sN    
9



r	   rE   c                 C   s   t d}t d}t | | }	|||  }
t |
|	 }t || }t|| t j}||	|  }t|| t j}|||  }t d||D ]!}|t d| }t j|| ||k d}t j|| |||k d qHd S )Nr   rS   mask)r   
program_idload	_load_ptrr   r   arangestore)batch_idx_to_req_idxsrc_block_table_ptrsdst_block_table_ptrsr%   num_blocks_ptrnum_blocks_striderE   group_id	batch_idxreq_idxgroup_num_blocks_ptrr'   r   src_block_table_ptrsrc_row_ptrdst_block_table_ptrdst_row_ptrr-   offsetr=   r   r   r   rM      s   

rM   rT   rU   c                 C   sN  t d}t d}|||	  }|t dd kr8t| ||D ]}|t d| }t j|| |
||k d qd S t|| t j}t || }t || }t || }t || }t || d }t|||D ]9}|t d| }t j|| ||k dd}|| }t |||  | }|| ||  }t j|| |||k d qkd S )Nr   rS   rc   )rd   other)	r   re   num_programsr   rh   ri   rg   r   rf   )rW   max_num_tokensrC   rQ   posr"   r%   r
   slot_mappings_ptrslot_mappings_striderT   rU   ro   rp   slot_mapping_ptrr-   rw   block_table_ptrblock_table_strider.   req_state_idx	start_idxend_idxrR   block_indicesblock_numbersslot_idsr   r   r   rV      s0   

rV   c                 C   s(   t | }t |t |}t |dS )N   )r   rf   castpointer_typemultiple_of)
ptr_to_ptr
elem_dtypeptrr   r   r   rg      s   
rg   )collections.abcr   r   vllm.triton_utilsr   r   vllm.utils.math_utilsr    vllm.v1.attention.backends.utilsr   vllm.v1.worker.gpu.buffer_utilsr   r   r	   jit	constexprrM   rV   rg   r   r   r   r   <module>   s(    ,