o
    پi                     @   s   d dl Z d dlmZ d dlmZ d dlmZmZ d dlZd dl	m
Z
 G dd de jZdejfd	d
ZeddG dd dZG dd deZdS )    N)OrderedDict)	dataclass)ListOptional)BaseTokenToKVPoolAllocatorc                	   @   s   e Zd Zejdd Zedee de	e fddZ
ej	ddee de	e de	ej fd	d
ZejdedejdedefddZejdedefddZejdededefddZejdd Zejdd ZdS )MultimodalCachec                 C   s   d S N selfr	   r	   Y/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/mem_cache/multimodal_cache.py__init__   s   zMultimodalCache.__init__	mm_hashesreturnc                 C   s   | sdS t t| S )zD
        Get a combined hash from individual mm item hashes
        N)hashtuple)r   r	   r	   r   combine_hashes   s   zMultimodalCache.combine_hashesNcombined_hashc                 C      t  )z
        Extract the embedding with the hash-ids of the queried items. Try combined hash first, if missed, fallback to individual hashes
        The returned tensor may not be contiguous
        NotImplementedError)r   r   r   r	   r	   r   get   s   zMultimodalCache.getmm_hash	embeddingmm_embedding_allocatorc                 C   r   )zQ
        Set the embedding to the pre-allocated locations with a hash id
        r   )r   r   r   r   r	   r	   r   set$   s   
zMultimodalCache.setc                 C   r   r   r   r   r   r	   r	   r   has0      zMultimodalCache.hasc                 C   r   r   r   )r   r   r   r	   r	   r   free4   s   zMultimodalCache.freec                 C   r   r   r   r
   r	   r	   r   clear:   r   zMultimodalCache.clearc                 C   r   r   r   r
   r	   r	   r   available_size>   r   zMultimodalCache.available_sizer   )__name__
__module____qualname__abcabstractmethodr   staticmethodr   intr   r   torchTensorr   r   boolr   r   r   r    r!   r	   r	   r	   r   r      sL    
	
r   r   c                 C   s   |   |   S r   )element_sizenumel)r   r	   r	   r   _get_tensor_sizeC   s   r.   T)kw_onlyc                   @   s   e Zd ZU ejed< dS )EmbeddingResultr   N)r"   r#   r$   r)   r*   __annotations__r	   r	   r	   r   r0   G   s   
 r0   c                	       s   e Zd ZdZdef fddZ	ddee dee dee fd	d
Z		ddededee
j defddZdedefddZdededefddZdd Zdd Zdd Z  ZS )MultiModalStaticCachez
    A server-level cache for multimodal embedding.
    Embeddings are computed prior, and this cache does not really pre-alloc
    max_sizec                    s"   t    || _t | _d| _d S Nr   )superr   r3   r   mm_cachecurrent_size)r   r3   	__class__r	   r   r   R   s   

zMultiModalStaticCache.__init__Nr   r   r   c                 C   s.   |  |}| j|}|d ur| j| |S r   )r   r6   r   move_to_end)r   r   r   r   r	   r	   r   r   [   s
   
zMultiModalStaticCache.getr   r   locc                 C   s   t |ts	J ||| jv r| j| dS t|j}| j| | jkrC| js(dS | jjdd\}}|  jt|j8  _| j| | jks#|| j|< |  j|7  _dS )NTF)last)	
isinstancer0   r6   r:   r.   r   r7   r3   popitem)r   r   r   r;   	data_sizelru_hashlru_embeddingr	   r	   r   r   f   s   


zMultiModalStaticCache.setc                 C   s
   || j v S r   )r6   r   r	   r	   r   r   {      
zMultiModalStaticCache.hasr   c                 C   s2   || j vrdS | j |}|  jt|j8  _dS )NFT)r6   popr7   r.   r   )r   r   r   old_embeddingr	   r	   r   r   ~   s
   
zMultiModalStaticCache.freec                 C   s   | j   d| _d S r4   )r6   r    r7   r
   r	   r	   r   r       s   

zMultiModalStaticCache.clearc                 C   s
   t | jS r   )lenr6   r
   r	   r	   r   __len__   rB   zMultiModalStaticCache.__len__c                 C   s   |   S r   )rF   r
   r	   r	   r   r!      s   z$MultiModalStaticCache.available_sizer   )r"   r#   r$   __doc__r(   r   r   r   r0   r   r)   r*   r+   r   r   r   r   r    rF   r!   __classcell__r	   r	   r8   r   r2   L   sD    



	r2   )r%   collectionsr   dataclassesr   typingr   r   r)   sglang.srt.mem_cache.allocatorr   ABCr   r*   r.   r0   r2   r	   r	   r	   r   <module>   s    8