o
    پi"                     @   s$  d dl Z d dlZd dlZd dlmZmZmZmZ d dlZd dl	m
Z
 d dlmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZ er^d dlmZ eeef ZG dd deZG dd deZdd Z G dd deZe !dddgZ"G dd deZ#dS )    N)TYPE_CHECKINGAny	GeneratorUnion)DedentError)UnexpectedCharactersUnexpectedToken)Generate)Guide)
RegexGuide)Write)create_states_mapping)grammars)PartialLarkPartialParserState)	Tokenizerc                   @   s   e Zd ZU dZeed< dS )r
   ak  Base definition of a generation guide.

    A generation guide defines the behavior of a finite-state machine that guides
    a text generation procedure. Unlike the DFAs built from regular expressions
    guides can also emit a `Write` instructions which tells the model that it can
    append a sequence of tokens (or token word) instead of generating it.

    initial_stateN)__name__
__module____qualname____doc__r   __annotations__ r   r   F/home/ubuntu/.local/lib/python3.10/site-packages/outlines/fsm/guide.pyr
      s   
 	r
   c                   @   sd   e Zd ZdZdZdZdZdddZded	e	fd
dZ
deded	efddZdefddZdd ZdS )StopAtEOSGuidez@Guide to generate tokens until the EOS token has been generated.   r   	tokenizerr   c                 C   s   |j | _ |j | _dS )zzInitialize the generation guide.

        model
            The logit generator used to generate the next token.

        N)eos_token_id
vocabularyvalues)selfr   r   r   r   __init__/   s   zStopAtEOSGuide.__init__statereturnc                 C   s   |  |rt| jgS td S N)is_final_stater   r   r	   r    r"   r   r   r   get_next_instruction9   s   
z#StopAtEOSGuide.get_next_instructiontoken_idc                 C   s    || j ks
|| jkr| jS | jS r$   )r   final_stater   )r    r"   r(   r   r   r   get_next_state>   s   zStopAtEOSGuide.get_next_statec                 C   s
   || j kS r$   )r)   r&   r   r   r   r%   D   s   
zStopAtEOSGuide.is_final_statec                 C   s   | S r$   r   r    r   r   r   copyG   s   zStopAtEOSGuide.copyN)r   r   )r   r   r   r   r)   start_stater   r!   intInstructionr'   r*   r%   r,   r   r   r   r   r   (   s    

r   c                 O   s   t | |g|R i |S r$   )uncached_create_states_mapping)regex_stringr   argskwargsr   r   r   cached_create_states_mappingK   s   r4   c                       s*   e Zd ZdZedef fddZ  ZS )r   zp
    Guide to generate text in the language of a regular expression.
    CoreRegexGuide with outlines cache
    r1   c                    s   t  j||fdti|S )N_create_states_mapping)super
from_regexr4   )clsr1   r   r3   	__class__r   r   r7   U   s   zRegexGuide.from_regex)r   r   r   r   classmethodstrr7   __classcell__r   r   r9   r   r   O   s    r   CFGStateparser_state
prev_tokenc                	   @   s   e Zd ZdZdefddZdedefddZded	e	de
ed
d
f fddZdededefddZdededefddZdedefddZdedefddZdedefddZdddZd
S )CFGGuidezNGuide to generate text that is in the language of a context-free Lark grammar.
cfg_stringc                 C   sN   t d || _|| _| jj| _t|dtjgd| _t	| j
ddd| _dS )zg
        Construct the PartialLark parser and set the empty initial_state (PartialParserState)
        zOutlines' public *community-contributed* CFG structured generation is experimental. Please review https://dottxt-ai.github.io/outlines/latest/reference/generation/cfg#disclaimerlalr)parserimport_paths Nr?   r@   )warningswarnrB   r   r   r   r   GRAMMAR_PATHrD   r>   parser   )r    rB   r   r   r   r   r!   j   s   
zCFGGuide.__init__r"   r#   c                 C   s\   |j du rtt| jgS t| || jj	 }t
|dkr'tt|S tt|S )aW  Return the next instruction for guided generation.

        Current lazy approach:
        - For each token in the vocabulary
          - create a copy of the parsers state
          - add the tokens to the parsers input text
          - if valid, add token to returned tokens

        Further refinements are necessary for performant text processing.

        Parameters
        ----------
        state
            The guides current PartialParserState, or None if complete

        Returns
        -------
        A `Generate` instance that contains the model and the allowed token ids.

        Nr   )r?   r   torchtensorr   listiter_valid_token_idsr   r   r   lenr	   )r    r"   valid_tokensr   r   r   r'      s   
zCFGGuide.get_next_instructioncandidate_token_idsNc                 c   sx    |j du r| jV  dS |D ]+}|| jkr| |r|V  qz| |t| |V  W q ttttt	fy9   Y qw dS )a  
        Iterate over the given token_ids and yield those that are valid for the current parser state.

        Parameters
        ----------
        parser_state
            The current state of the parser, or None if complete.
        token_ids
            The list of token ids to check for validity.

        Yields
        ------
        int
            Valid token ids.
        N)
r?   r   can_terminate_state_get_parser_state_token_appliedr.   
ValueErrorEOFErrorr   r   r   )r    r"   rR   r(   r   r   r   rO      s,   




zCFGGuide.iter_valid_token_idsr(   c                 C   s6   |j du s
|| jkrd}n| |t|}t||dS )a  
        Update the state of the guide.
        Decode the token_id, and calculate the new parser_state with the token applied.

        Parameters
        ----------
        state
            The guides current PartialParserState, or None if complete
        token_id
            The id of the token that was just generated.

        Returns
        -------
        The guides new PartialParserState

        NrG   )r?   r   rT   r.   r>   )r    r"   r(   r?   r   r   r   r*      s   zCFGGuide.get_next_statec                 C   s   t  |j}|jdu r| j|gd }n| j|jggd }| j|j|ggd }|t|d }|dkr<td|jj j	|7  _	| j
j|dd |S )a(  
        Don't mutate `parser_state`, copy to protect

        Get the token string
          - if first token in generation: tokenizer.decode (no leading whitespace)
          - else: normalized (with possibly leading whitespace)

        Don't allow empty ("") tokens, raise ValueError
        Nr   rF   zempty next tokenF)is_end)r,   r?   r@   r   decoderP   rU   lexerr"   textrD   parse_from_state)r    r"   r(   r?   new_token_strprev_token_strcombined_token_strr   r   r   rT      s   
z(CFGGuide._get_parser_state_token_appliedc                 C   s
   |  |S r$   )rS   r&   r   r   r   r%      s   
zCFGGuide.is_final_statec                 C   s:   |j durzt|j   W dS  ty   Y dS w dS )z"Generation is allowed to terminateNFT)r?   r,   feed_eofr   r&   r   r   r   rS     s   
zCFGGuide.can_terminate_statec                 C   s    |j du pt|j  dhS )z1Generation must terminate, no legal continuationsNz$END)r?   setacceptsissubsetr&   r   r   r   must_terminate_state  s   zCFGGuide.must_terminate_statec                 C   s   t | j| jS )zCreate a copy of the Guide.)rA   rB   r   r+   r   r   r   r,     s   zCFGGuide.copy)r#   rA   )r   r   r   r   r<   r!   r>   r/   r'   rN   r   r.   rO   r*   r   rT   boolr%   rS   rc   r,   r   r   r   r   rA   g   s.     
'
!	rA   )$collectionsr,   rH   typingr   r   r   r   rL   lark.indenterr   
lark.lexerr   r   outlines_core.fsm.guider	   r
   	CoreGuider   CoreRegexGuider   r   r0   outlinesr   outlines.fsm.parsingr   r   outlines.models.tokenizerr   r/   r   r4   
namedtupler>   rA   r   r   r   r   <module>   s.    #