o
    ©Ì³i<  ã                   @   s6   d dl mZ d dlZd dlmZ G dd„ dejƒZdS )é    )ÚTupleN)Únnc                       s~   e Zd ZdZdededededejddf‡ fd	d
„Zddd„Ze	defdd„ƒZ
dejdejdeejejf fdd„Z‡  ZS )ÚKVCachea   
    Standalone ``nn.Module`` containing a kv-cache to cache past key and values during inference.

    Args:
        batch_size (int): batch size model will be run with
        max_seq_len (int): maximum sequence length model will be run with
        num_kv_heads (int): number of key/value heads.
        head_dim (int): per-attention head embedding dimension
        dtype (torch.dtype): dtype for the caches
    Ú
batch_sizeÚmax_seq_lenÚnum_kv_headsÚhead_dimÚdtypeÚreturnNc                    sp   t ƒ  ¡  ||||f}| jdtj||ddd | jdtj||ddd | jdt d|d ¡dd || _d S )	NÚk_cache)r	   F)Ú
persistentÚv_cacheÚ	cache_posr   é   )ÚsuperÚ__init__Úregister_bufferÚtorchÚzerosÚaranger   )Úselfr   r   r   r   r	   Úcache_shape©Ú	__class__© úN/home/ubuntu/.local/lib/python3.10/site-packages/torchtune/modules/kv_cache.pyr      s   
ÿÿÿ
zKVCache.__init__c                 C   s(   | j  ¡  | j ¡  |  j| j8  _dS )zReset the cache to zero.N)r   Úzero_r   r   Úsize©r   r   r   r   Úreset.   s   

zKVCache.resetc                 C   s   | j d  ¡ S )Nr   )r   Úitemr   r   r   r   r   4   s   zKVCache.sizeÚk_valÚv_valc                 C   sÀ   |j \}}}}|| jj d kr!td| jj d › d|j d › dƒ‚| jd | | jj d ks0J ‚| j}| j}||dd…dd…| jd|… f< ||dd…dd…| jd|… f< | j |¡ ||fS )aP  Update KV cache with the new ``k_val``, ``v_val`` and return the updated cache.

        Note:
            When updating the KV cache, it is assumed that subsequent updates should update key-value
            positions in consecutive sequence positions. If you wish to update cache values which have
            already been filled, use ``.reset()``, which will reset the cache to the zero-th position.

        Example:
            >>> cache = KVCache(batch_size=2, max_seq_len=16, num_kv_heads=4, head_dim=32, dtype=torch.bfloat16)
            >>> keys, values = torch.ones((2, 4, 8, 32)), torch.ones((2, 4, 8, 32))
            >>> cache.update(keys, values)
            >>> # now positions 0 through 7 are filled
            >>> cache.size
            >>> 8
            >>> keys, values = torch.ones((2, 4, 1, 32)), torch.ones((2, 4, 1, 32))
            >>> cache.update(keys, values)
            >>> # this will fill at position 8
            >>> cache.size
            >>> 9

        Args:
            k_val (torch.Tensor): Current key tensor with shape [B, H, S, D]
            v_val (torch.Tensor): Current value tensor with shape [B, H, S, D]

        Returns:
            Tuple[torch.Tensor, torch.Tensor]: Updated key and value cache tensors, respectively.

        Raises:
            ValueError: if the batch size of the new key (or value) tensor is greater than the batch size
                used during cache setup.

        Note:
            This function will raise an ``AssertionError`` if the sequence length of ``k_val``
                is longer than the maximum cache sequence length.

        r   z6The current cache has been setup with a batch size of z,, but found new key tensors with batch size ú!r   N)Úshaper   Ú
ValueErrorr   r   Úadd_)r   r!   r"   ÚbszÚ_Úseq_lenÚk_outÚv_outr   r   r   Úupdate8   s   'ÿÿ  	zKVCache.update)r
   N)Ú__name__Ú
__module__Ú__qualname__Ú__doc__Úintr   r	   r   r   Úpropertyr   ÚTensorr   r,   Ú__classcell__r   r   r   r   r      s2    þýüûúù
ÿÿþr   )Útypingr   r   r   ÚModuler   r   r   r   r   Ú<module>   s   