o
    پik"                     @   s  d dl Z d dlZd dlmZmZmZmZmZmZm	Z	 er&d dl
Z
d dlmZ G dd deZe jddG dd	 d	Zd
ededed dddddddee dddee fddZded dee dddee fddZded dee deeee   fddZded dee defddZ								d/d d!Z						d0d"d#Zded ddded fd$d%Zdee dddee fd&d'Zd(ee	 dddee	 fd)d*Zd+dd,eddfd-d.ZdS )1    N)TYPE_CHECKINGCallableIterableIteratorListOptionalTuple)Guidec                   @   s   e Zd ZdS )ContextLengthExceededErrorN)__name__
__module____qualname__ r   r   O/home/ubuntu/.local/lib/python3.10/site-packages/outlines/generate/generator.pyr
      s    r
   T)frozenc                   @   s:   e Zd ZU ded< ded< ded< ded< ee ed< dS )GenerationStatetorch.Tensor	token_idskv_cachelogitsweights
fsm_statesN)r   r   r   __annotations__r   intr   r   r   r   r      s   
 r   modelsamplerfsmsr	   r   r   sequence_weightsattention_masksr   rngztorch.Generatorreturnc                 c   s    ddl }|du r| }d}		 z
| |||	\}
}	W n ty%   tdw t||}t|
|}||||\}}}t|||}t||}t|	|}	t	|dkrYt
||}t||}t|||}t||}|rqt||	|
||V  dS t||	|
||V  q)a2  Generates sequences of tokens.

    Parameters
    ----------
    model
        A callable that generates a probability distribution over the
        vocabulary when passed a tensor of token ids.
    sampler
        A callable that returns the next token ids, their ancestor sequence and
        the updated sequence weights when passed a distribution over the
        vocabulary.
    token_ids
        A tensor of token ids on which the sequence distribution is conditioned, of
        shape ``(n_seqs, n_prompt_tokens)``
    sequence_weights
        A tensor that contains the initial weights of the sequences, of shape
        ``(n_seqs,)``
    attention_masks
        A tensor of tensors that represent the tokens considered at the attention
        layer, of shape ``(n_seqs, n_prompt_tokens)``.
    fsms
        List of finite-state machines that drive the text generation,
        one for each sequence in the batch.
    fsm_states
        The initial states of the finite-state machine for each sequence in the batch.

    Yields
    ------
    A new sequence.

    r   NTz9The input length exceeds the context length of the model.   )torch	Generator
IndexErrorr
   get_allowed_tokensbias_logitsupdate_token_idsupdate_attention_masksreorder_kv_cachelenreorder_fsmsreorder_fsm_statesget_next_fsm_statesis_generation_finishedr   )r   r   r   r   r   r   r   r   r"   r   r   allowed_tokensbiased_logitsnext_token_ids	ancestorsis_finishedr   r   r   sequence_generator   sV   )







r4   r1   c                 C   s   dd t | ||D S )a  

    Parameters
    ----------
    fsm
        The finite-state machine used to monitor this batch.
    next_token_ids
        The tokens that were just generated.

    Returns
    -------
    A `torch.Tensor` object that represents the next logit mask.

    c                 S   s&   g | ]\}}}| |t|d  qS )r   )get_next_stater   ).0fsm	fsm_statetoken_idr   r   r   
<listcomp>   s    z'get_next_fsm_states.<locals>.<listcomp>zip)r   r   r1   r   r   r   r-   s   s   
r-   c                 C   s   dd t | |D S )a^  Get the new instructions for each sequence from the finite-state machine.

    Parameters
    ----------
    fsm
        The finite-state machine used to monitor this batch.
    fsm_states
        The FSM states corresponding to each sequence in the batch.

    Returns
    -------
    A nested list that contains the ids of the logits to keep.

    c                 S   s   g | ]
\}}| |jqS r   )get_next_instructiontokensr6   r7   stater   r   r   r:      s    z&get_allowed_tokens.<locals>.<listcomp>r;   r   r   r   r   r   r%      s   r%   c                 C   s   t dd t| |D S )a   Determine if the generation is finished.

    A generation is considered finished if the FSM of every sequence in the
    batch is in a final state.

    A better solution is to return finished sequences as soon as their FSM
    is in a final state.

    Parameters
    ----------
    fsm
        The finite-state machine used to monitor this batch.
    fsm_states
        The FSM states corresponding to each sequence in the batch.

    Returns
    -------
    Whether all sequences are finished sampling.

    c                 S   s   g | ]	\}}| |qS r   )is_final_stater?   r   r   r   r:      s    z*is_generation_finished.<locals>.<listcomp>)allr<   rA   r   r   r   r.      s   r.   r2   c                 C   s(   ddl }|| d|} |j| |gddS )a  Append the sampled tokens to the running sequence of tokens.

    Parameters
    ----------
    token_ids
        The current token sequences
    next_token_ids
        The tokens that were just generated and that we need to append
        to the existing sequences.
    ancestors
        The sequences to which the token ids need to be added.

    Returns
    -------
    A new sequence of token ids that contains the tokens that were
    just generated.

    r   N)dim)r"   index_selectconcatenate)r   r1   r2   r"   r   r   r   r'      s   r'   c                 C   sB   ddl }|| d|} |j| |j| jdd d | jdgddS )a  Expand the attention masks.

    Parameters
    ----------
    attention_masks
        The attention masks for each sequence in the batch.
    ancestors
        The sequences to which the token ids need to be added.

    Returns
    -------
    The attention masks padded with 1s.

    r   NrD   )r!   device)axis)r"   rF   rG   onesshaperI   )r   r2   r"   r   r   r   r(      s   r(   c                 C   s$   g }|D ]}| | |   q|S N)appendcopy)r   r2   reordered_fsmsancestorr   r   r   r+      s   r+   c                 C   s    g }|D ]	}| | |  q|S rM   )rN   )r   r2   reordered_statesrQ   r   r   r   r,      s   r,   r   c              	   C   sb   ddl }| du r
dS t }| D ]}t }|D ]}||d||j}||f7 }q||f7 }q|S )aM  Re-order the KV-cache based on the ancestors.

    In transformers, the object that stores the KV-cache is a tuple who elements
    are the key cache and the value cache. Each of these caches are tuples where
    each element correpond to a layer. To each layer corresponds a tensor whose
    first dimension is the batch size.

    r   N)r"   tuplerF   torI   )r   r2   r"   new_kv_cache
cache_itemnew_cache_itemlayerr   r   r   r)     s   r)   r   allowed_token_idsc                 C   s^   ddl }|j| tj | jd}t|D ]\}}|dur&| ||f |||f< q| | ||< q|S )a  Mask the logits.

    The function iterates over a nested list where each list corresponds to the
    indices that need to be masked for each row in the array.

    Parameters
    ----------
    logits
        Two dimensional tensor that contains the next-token probability
        distribution.
    allowed_token_ids
        A list that contains the tokens that can be generated by the model.

    Returns
    -------
    A view of the original logits tensor where some values are masked.

    r   NrH   )r"   	full_likemathinfrI   	enumerate)r   rY   r"   r0   iidsr   r   r   r&     s   r&   )r   r   r1   r   r2   r   r    r   )r   r   r2   r   r    r   )dataclassesr[   typingr   r   r   r   r   r   r   r"   outlines.fsm.guider	   	Exceptionr
   	dataclassr   r   r4   r-   r%   boolr.   r'   r(   r+   r,   r)   r&   r   r   r   r   <module>   s    $
	
[




