o
    TiM                     @   s*   d dl mZmZ d dlZG dd dZdS )    )IterableUnionNc                   @   s   e Zd ZU dZeed< ejed< eed< eed< deddfd	d
ZdedejfddZ	de
ee ef ddfddZedefddZdS )BlockedAllocatora  
    Allocator class for managing which blocks are free/used in the
    blocked KV-cache. This is a simple allocator that uses a linked list
    to keep track of which blocks are free/used. The cost of allocation/deallocation
    is O(blocks), where blocks is the number of blocks to allocate/deallocate.

    TODO(cmikeh2): Evaluate performance of this allocator and migrate
    to C++ if necessary.
    _num_blocks_blocks_head_free_blocks
num_blocksreturnNc                 C   sH   |dk rt d| || _tjd|d tjddd| _d| _|| _dS )z
        Initialize an allocator with `num_blocks` blocks. This requires at least
        `num_blocks` * 4 bytes of host memory.

        Parameters:
            num_blocks (int): The number of blocks to allocate.
           z6Blocked KV-cache must have at least 1 block, provided cpuT)dtypedevice
pin_memoryr   N)
ValueErrorr   torcharangeint32r   r   r   )selfr	    r   c/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/inference/v2/ragged/blocked_allocator.py__init__!   s   	
zBlockedAllocator.__init__c                 C   st   || j krtd| dtj|tjd}t|D ]}| j||< | j| j  | _d| j|| < |  j d8  _ q|S )a  
        Allocate a list of blocks from the associated KV-caches. This will
        return `num_blocks` blocks from the KV-cache if they are available,
        or raise an exception if there are not enough free blocks.

        Parameters:
            num_blocks (int): The number of blocks to allocate.

        Returns:
            List[int]: The list of blocks allocated.
        z3Not enough free blocks in the KV-cache to allocate z blocks)r   r   )	r   r   r   zerosr   ranger   r   item)r   r	   allocated_blocksir   r   r   allocate2   s   

zBlockedAllocator.allocateblocksc                 C   s   t |tr|g}|D ]"}|dk s|| jkrtd| d| j| dkr,td| dq
|D ]}| j| j|< || _|  jd7  _q/dS )	a  
        Return a list of blocks to the free pool. If a single invalid block is provided (i.e.,
        one that is out of range of the allocator or is already free), then an exception is raised
        and no blocks are freed.

        Parameters:
            blocks (Union[Iterable[int], int]): The list of blocks to free. If only one block
                is to be freed, this can be alone as an integer.
        r   zInvalid block z provided to freer   zBlock z is already freer   N)
isinstanceintr   r   r   r   r   )r   r   blockr   r   r   freeJ   s   

zBlockedAllocator.freec                 C   s   | j S )zC
        Return the number of free blocks in the KV-cache.
        )r   )r   r   r   r   free_blocksd   s   zBlockedAllocator.free_blocks)__name__
__module____qualname____doc__r!   __annotations__r   Tensorr   r   r   r   r#   propertyr$   r   r   r   r   r      s   
 

r   )typingr   r   r   r   r   r   r   r   <module>   s   