o
    i3                     @   sx   d dl Zd dlZd dlmZmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ eeZG dd dZG d	d
 d
ZdS )    N)get_dcp_groupget_pcp_group)init_loggercdiv)CpuGpuBuffer)get_total_cp_world_sizec                   @   sV  e Zd Zdedededededejdedefd	d
Zdee deddfddZ	dee deddfddZ
dededdfddZdededdfddZdejdejddfddZdeddfddZd eddfd!d"Zd4d#d$Zed%ejd&ed'ejdejfd(d)Zdedejfd*d+Zdejfd,d-Zdejfd.d/Zd0eejB d1ejdefd2d3ZdS )5
BlockTable
block_sizemax_num_reqsmax_num_blocks_per_reqmax_num_batched_tokens
pin_memorydevicekernel_block_sizecp_kv_cache_interleave_sizec	           	      C   sV  || _ || _|| _|| _||kr|| _d| _d| _n|| dkr+td| d| d|| _|| | _d| _|| j | _| j	| j | jt
jd| _tj|tjd| _| j	| jt
jd| _| jrktd| jdd	| _nd
| _zt j| _t j| _W n ty   d| _d| _Y nw zt j| _t j| _W n ty   d| _d| _Y nw || _d
S )a  
        Args:
            block_size: Block size used for KV cache memory allocation
            max_num_reqs: Maximum number of concurrent requests supported.
            max_num_blocks_per_req: Maximum number of blocks per request.
            max_num_batched_tokens: Maximum number of tokens in a batch.
            pin_memory: Whether to pin memory for faster GPU transfers.
            device: Target device for the block table.
            kernel_block_size: The block_size of underlying attention kernel.
                Will be the same as `block_size` if `block_size` is supported
                by the attention kernel.
           Fr   zkernel_block_size z( must divide kv_manager_block_size size z evenlyT)dtypeN)r   r   r   r   r
   blocks_per_kv_blockuse_hybrid_blocks
ValueErrorr   _make_buffertorchint32block_tablenpzerosnum_blocks_per_rowint64slot_mappingarangereshape_kernel_block_aranger   
world_sizepcp_world_sizerank_in_grouppcp_rankAssertionErrorr   dcp_world_sizedcp_rankr   )	selfr
   r   r   r   r   r   r   r    r,   P/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/v1/worker/block_table.py__init__   sZ   





zBlockTable.__init__	block_idsrow_idxreturnNc                 C   sf   |sd S | j r| t|| j| j}t|}| j| }| j|  |7  < || jj|||| f< d S N)	r   map_to_kernel_blocksr   arrayr   r#   lenr   r   )r+   r/   r0   
num_blocksstartr,   r,   r-   
append_rowd   s   
zBlockTable.append_rowc                 C   s   d| j |< | || d S Nr   )r   r8   )r+   r/   r0   r,   r,   r-   add_rowv   s   
zBlockTable.add_rowsrctgtc                 C   s<   | j | }| jj}||d |f ||d |f< || j |< d S r2   r   r   r   )r+   r;   r<   r6   block_table_npr,   r,   r-   move_rowz   s   
zBlockTable.move_rowc                 C   s:   ||g||g}}| j | | j |< | jj| | jj|< d S r2   r=   )r+   r;   r<   src_tgttgt_srcr,   r,   r-   swap_row   s   zBlockTable.swap_rowreq_indices	positionsc                 C   s  | j | j }| j| j | j }|dkr^| j| }|| j ||  }| jj | }|| }|| j	 | |k}	||| j	  | j	 || j	  }
|| j |
 }t
|	|d| jjd |jd < d S || j || j  }| jj | }|| j }
tj|| j |
| jjd |jd  d d S )Nr   r   r   )out)r%   r)   r'   r*   r
   r   r   r   ravelr   wherer    shapeadd)r+   rC   rD   total_cp_world_sizetotal_cp_rankvirtual_block_sizeblock_table_indicesblock_numbersvirtual_block_offsetsmaskblock_offsetsr    r,   r,   r-   compute_slot_mapping   sN   	


zBlockTable.compute_slot_mappingnum_reqsc                 C      | j | d S r2   )r   copy_to_gpur+   rS   r,   r,   r-   commit_block_table      zBlockTable.commit_block_table
num_tokensc                 C   rT   r2   )r    rU   )r+   rY   r,   r,   r-   commit_slot_mapping   rX   zBlockTable.commit_slot_mappingc                 C   s    | j jd | j jd d S r9   )r   gpufill_cpur+   r,   r,   r-   clear   s   zBlockTable.clearkv_manager_block_idsr   kernel_block_arangec                 C   s*   |dkr| S |  dd| | }| dS )u5  Convert kv_manager_block_id IDs to kernel block IDs.

        Example:
            # kv_manager_block_ids: 32 tokens,
            # Kernel block size: 16 tokens
            # blocks_per_kv_block = 2
            >>> kv_manager_block_ids = np.array([0, 1, 2])
            >>> Result: [0, 1, 2, 3, 4, 5]

            # Each kv_manager_block_id maps to 2 kernel block id:
            # kv_manager_block_id 0 → kernel block id [0, 1]
            # kv_manager_block_id 1 → kernel block id [2, 3]
            # kv_manager_block_id 2 → kernel block id [4, 5]
        r   r   )r"   )r`   r   ra   kernel_block_idsr,   r,   r-   r3      s   
zBlockTable.map_to_kernel_blocksc                 C   s   | j jd| S )z-Returns the device tensor of the block table.N)r   r[   rV   r,   r,   r-   get_device_tensor   s   zBlockTable.get_device_tensorc                 C      | j jS )z*Returns the CPU tensor of the block table.)r   r]   r^   r,   r,   r-   get_cpu_tensor      zBlockTable.get_cpu_tensorc                 C   rd   )z+Returns the numpy array of the block table.)r   r   r^   r,   r,   r-   get_numpy_array   rf   zBlockTable.get_numpy_arraysizer   c                G   s   t ||| j| jdS )N)r   r   r   )r   r   r   )r+   r   rh   r,   r,   r-   r      s   zBlockTable._make_bufferr1   N)__name__
__module____qualname__intboolr   r   r.   listr8   r:   r?   rB   r   ndarrayrR   rW   rZ   r_   staticmethodr3   Tensorrc   re   rg   SymIntr   r   r   r,   r,   r,   r-   r	      st    	
S

<
r	   c                   @   s$  e Zd ZdZ		d-dededededejd	ee d
ee dee dB deddfddZ	de
ee df deddfddZde
ee df deddfddZdededdfddZdededdfddZdejdejddfdd Zd!eddfd"d#Zd$eddfd%d&Zd.d'd(Zd)edd*fd+d,ZdS )/MultiGroupBlockTablez(The BlockTables for each KV cache group.Nr   r   max_model_lenr   r   r   block_sizeskernel_block_sizesmax_num_blocksr   r1   c
           
         s   t |t |krtdt | dt | d|d u r(t fdd|D }t |t |kr?tdt | dt | d fddt|||D | _d S )Nzkernel_block_sizes length (z!) must match block_sizes length ()c                    s   g | ]	}t  | qS r,   r   ).0r
   )ru   rJ   r,   r-   
<listcomp>  s    z1MultiGroupBlockTable.__init__.<locals>.<listcomp>zmax_num_blocks length (c                    s(   g | ]\}}}t ||| qS r,   )r	   )rz   r
   r   r   )r   r   r   r   r   r,   r-   r{   "  s    )r5   r   r   zipblock_tables)
r+   r   ru   r   r   r   rv   rw   rx   r   r,   )r   r   ru   r   r   r   rJ   r-   r.      s,   zMultiGroupBlockTable.__init__r/   .r0   c                 C   (   t | jD ]\}}||| | qd S r2   )	enumerater}   r8   r+   r/   r0   ir   r,   r,   r-   r8   2     zMultiGroupBlockTable.append_rowc                 C   r~   r2   )r   r}   r:   r   r,   r,   r-   r:   6  r   zMultiGroupBlockTable.add_rowr;   r<   c                 C      | j D ]}||| qd S r2   )r}   r?   r+   r;   r<   r   r,   r,   r-   r?   :     
zMultiGroupBlockTable.move_rowc                 C   r   r2   )r}   rB   r   r,   r,   r-   rB   >  r   zMultiGroupBlockTable.swap_rowrC   rD   c                 C   r   r2   )r}   rR   )r+   rC   rD   r   r,   r,   r-   rR   B  s   
z)MultiGroupBlockTable.compute_slot_mappingrS   c                 C      | j D ]}|| qd S r2   )r}   rW   )r+   rS   r   r,   r,   r-   rW   H     
z'MultiGroupBlockTable.commit_block_tablerY   c                 C   r   r2   )r}   rZ   )r+   rY   r   r,   r,   r-   rZ   L  r   z(MultiGroupBlockTable.commit_slot_mappingc                 C   s   | j D ]}|  qd S r2   )r}   r_   )r+   r   r,   r,   r-   r_   P  s   

zMultiGroupBlockTable.clearidxr	   c                 C   s
   | j | S )z3Returns the BlockTable for the i-th KV cache group.)r}   )r+   r   r,   r,   r-   __getitem__T  s   
z MultiGroupBlockTable.__getitem__)Nr   ri   )rj   rk   rl   __doc__rm   rn   r   r   ro   r.   tupler8   r:   r?   rB   r   rp   rR   rW   rZ   r_   r   r,   r,   r,   r-   rt      sP    
	

"2"

rt   )numpyr   r   vllm.distributedr   r   vllm.loggerr   vllm.utils.math_utilsr   vllm.v1.utilsr   vllm.v1.worker.cp_utilsr   rj   loggerr	   rt   r,   r,   r,   r-   <module>   s    n