o
    Ti                     @   s   d dl Z d dlmZmZmZmZ d dlmZ d dlm	Z	 d dl
mZ ddlmZ ddlmZ dd	lmZmZ dd
lmZ G dd dZdS )    N)AnyDictOptionalTuple)get_accelerator)RaggedUtilsBuilder)logger   )BlockedAllocator)BlockedKVCache)DSStateManagerConfigKVCacheConfig)DSSequenceDescriptorc                	   @   s  e Zd ZU dZeed< 	 ee ed< 	 eed< 	 e	e
ef ed< 	 eed< eejdf ed< eejdf ed	< 	
d,dedeedf dee dd
fddZd-de
de
dejfddZde
dd
fddZde
dee fddZde
defddZde
defddZede	e
ef fdd Zede
fd!d"Zede
fd#d$Zede
fd%d&Zedejfd'd(Zd-d)e
de
dejfd*d+Zd
S ).DSStateManagerzu
    Base abstract class for managing blocked KV caches. Will probably have a single
    implementation for now.
    _config_kv_configs	_kv_cache_seqs_tracking_allocator._all_block_ids_all_block_ids_shadowNconfig
kv_configsbase_mp_groupreturnc                 C   s   || _ || _t  | _t| j j| _g }g }| jD ]%}| j j|j|j	f}|
tj|tjt  d |
| j|d  qt|| _t|| _i | _t| j| j j|| j jd| _dS )z|
        The key

        Parameters:
            block_size (int): The number of tokens to allocate in each block.
        )dtypedevice)mp_groupoffloadN)r   r   r   load_ragged_utilsr
   max_tracked_sequencesr   num_allocation_groupsmax_blocks_per_allocation_groupappendtorchzerosint32r   current_deviceallocate_fast_host_buffertupler   r   r   r   memory_configr   r   )selfr   r   r   all_block_idsall_block_ids_shadowcache_config	ids_shape r2   `/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/inference/v2/ragged/ragged_manager.py__init__7   s*   



zDSStateManager.__init__r   cache_idcache_groupc                 C      | j j||dS )z
        Return the Tensor associated with the given cache id in the specified cache group.

        Arguments:
            cache_group (str): The KV cache group.
            cache_id (int): The cache id within that group.
        r6   )r   	get_cache)r-   r5   r6   r2   r2   r3   r9   d   s   zDSStateManager.get_cacheuidc                 C   sl   || j vrtd| d dS | j | }t| jD ]}| jj|j|d|d q| j|j	 | j |= dS )zK
        Free all resources associated with the given sequence id.
        zAttempting to flush sequence z which does not exist.Nr8   )
r   r   warningrangen_kv_cache_groupsr   freer.   r   tracking_id)r-   r:   seqir2   r2   r3   flush_sequencen   s   

zDSStateManager.flush_sequencec                 C   s   | j |dS )z
        Get the sequence descriptor for the given sequence id. If the sequence does not exist,
        then None is returned.
        N)r   get)r-   r:   r2   r2   r3   get_sequence}   s   zDSStateManager.get_sequencec                 C   s    |  |}|dur|S | |S )aA  
        Get the existing sequence descriptor for a given uid or initialize one if
        it does not exist. NOTE: This will always return a valid sequence descriptor
        if one may be allocated and should not be used from APIs that are attempting
        to test the schedulability of a hypothetical batch.
        N)rD   _create_sequence)r-   r:   r@   r2   r2   r3   get_or_create_sequence   s   

z%DSStateManager.get_or_create_sequencec                    s   || j v rtd| dz
| jd  W n ty&   td| dw t fdd| jD }t fdd| jD }t	 ||| j
jd	| j |< td
| d  d | j | S )zM
        Create a new sequence descriptor for the given sequence id.
        z	Sequence z already exists.r	   z,Unable to create tracking slot for sequence z% since the metadata buffers are full.c                 3       | ]}|  V  qd S Nr2   ).0r.   tracking_slotr2   r3   	<genexpr>   s    z2DSStateManager._create_sequence.<locals>.<genexpr>c                 3   rG   rH   r2   )rI   r/   rJ   r2   r3   rL      s    )max_contextzCreated sequence z with tracking slot .)r   
ValueErrorr   allocateitemRuntimeErrorr+   r   r   r   r   rM   r   debug)r-   r:   seq_block_idsseq_block_ids_shadowr2   rJ   r3   rE      s(   


zDSStateManager._create_sequencec                 C   s   | j S )z/
        Return the tracked sequences.
        )r   r-   r2   r2   r3   tracked_sequences   s   z DSStateManager.tracked_sequencesc                 C   s
   t | jS )zC
        Return the number of sequences currently tracked.
        )lenr   rV   r2   r2   r3   n_tracked_sequences   s   
z"DSStateManager.n_tracked_sequencesc                 C      | j jS )z8
        Return the block size of the KV cache.
        )
_kv_config
block_sizerV   r2   r2   r3   kv_block_size      zDSStateManager.kv_block_sizec                 C   rZ   )z1
        Return the number of KV caches.
        )r   
num_cachesrV   r2   r2   r3   r=      r^   z DSStateManager.n_kv_cache_groupsc                 C   rZ   )zC
        Return the number of free blocks in the KV cache.
        )r   free_blocksrV   r2   r2   r3   r`      r^   zDSStateManager.free_blocksn_blocksc                 C   r7   )Nr8   )r   reserve)r-   ra   r6   r2   r2   r3   allocate_blocks   s   zDSStateManager.allocate_blocksrH   )r   )__name__
__module____qualname____doc__r   __annotations__r   r   r   r   intr   r
   r&   Tensorr   r   r4   r9   rB   rD   rF   rE   propertyrW   rY   r]   r=   r`   rc   r2   r2   r2   r3   r      sN   
 

-
r   )r&   typingr   r   r   r   deepspeed.acceleratorr   deepspeed.ops.op_builderr   deepspeed.utils.loggingr   blocked_allocatorr
   kv_cacher   manager_configsr   r   sequence_descriptorr   r   r2   r2   r2   r3   <module>   s   