o
    پi0                      @   s   d Z ddlZddlmZmZmZmZmZmZm	Z	 ddl
Z
ddlmZ ddlmZ ddlmZmZmZ ddlmZ dd	lmZ erFdd
lmZ G dd deZG dd deZG dd deZG dd deZdS )a3  
 _______________________________
/ Don't want to self-host?       \ Try .json at http://dottxt.co /
 -------------------------------
       \   ^__^
        \  (oo)\_______
            (__)\       )\/                ||----w |
                ||     ||

Copyright 2024- the Outlines developers

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
    N)TYPE_CHECKINGAnyDictListOptionalTypeUnion)build_regex_from_schema)	BaseModel)CFGGuideGuide
RegexGuide)convert_json_schema_to_str   )OutlinesLogitsProcessor)	Tokenizerc                   @   sv   e Zd ZU dZded< eed< eeef ed< e	e ed< dddefddZ
d	ejd
ejdejfddZdddZdS )GuideLogitsProcessorzBias generation using a finite

    Attributes
    ----------
    tokenizer
        The tokenizer used to convert tokens to ids.
    guide
        The `outlines.fsm.Guide` which is used to bias the logits.
    r   	tokenizerguide_guide_states_seq_start_idxc                 C   s,   || _ || _ttg | jji| _d| _dS )zA Guide-based logits processor.

        Parameters
        ----------
        tokenizer
            The tokenizer used to convert tokens to ids.
        guide
            The `outlines.fsm.Guide. which is used to bias the logits.
        N)r   r   hashtupleinitial_stater   r   )selfr   r    r   R/home/ubuntu/.local/lib/python3.10/site-packages/outlines/processors/structured.py__init__:   s   

zGuideLogitsProcessor.__init__	input_idslogitsreturnc                 C   s0  | j du rt|d | _ g }|D ]=}|| j d }tt| }|| jvrE| jtt|dd   }| j||d  }|| j|< |	| j|  qt
j|t
jd}	g }
g }t|D ]\}}| j|jj|	jdd}|
	| |	t
|| q^t
|
}t
|}d|	||f< ||	td |S )	a  Use the Guide to bias the logits before sampling the next token.

        Parameters
        ----------
        input_ids
            The input token ids.
        logits
            The logits.

        Returns
        -------
        torch.Tensor
            The biased logits.
        Nr   )dtypeT)non_blockingFz-inf)r   lenr   r   tolistr   r   get_next_stateitemappendtorch	ones_likebool	enumerateget_next_instructiontokenstodevice	full_likecatmasked_fill_float)r   r   r   sequence_statesseq_idsgen_idscurr_state_key
prev_state
curr_statemaskallowed_tokens_batchbatch_indicesiguide_stateallowed_tokensallowed_tokens_concatbatch_indices_concatr   r   r   process_logitsI   s6   






z#GuideLogitsProcessor.process_logitsc                 C   s   t | j| j dS )z&Return a copy of the logits processor.r   r   )r   r   r   copy)r   r   r   r   rE      s   zGuideLogitsProcessor.copyN)r    r   )__name__
__module____qualname____doc____annotations__r   r   intr   r   r   r)   
LongTensorFloatTensorTensorrC   rE   r   r   r   r   r   *   s   
 

6r   c                       s*   e Zd ZdZdeddf fddZ  ZS )RegexLogitsProcessorzBias generation based on a regular expression.

    Attributes
    ----------
    tokenizer
        The tokenizer used to convert tokens to ids.
    guide
        The `outlines.fsm.RegexGuide. which is used to bias the logits.
    regex_stringr   r   c                    s    t ||}t j||d dS )zCompile the RegexGuide that drives the regex-guided generation.

        Parameters
        ----------
        regex_string
            A string that represents a regular expression
        tokenizer
            An Outlines tokenizer
        rD   N)r   
from_regexsuperr   )r   rP   r   r   	__class__r   r   r         
zRegexLogitsProcessor.__init__)rF   rG   rH   rI   strr   __classcell__r   r   rS   r   rO      s    
rO   c                       sD   e Zd ZdZ	d	deeee ef ddde	e f fddZ
  ZS )
JSONLogitsProcessorzBias generation based on a JSON schema.

    Attributes
    ----------
    tokenizer
        The tokenizer used to convert tokens to ids.
    guide
        The `outlines.fsm.RegexGuide. which is used to bias the logits.
    Nschemar   r   whitespace_patternc                    s(   t |d}t||}t j||d dS )a  Compile the Guide that drives the JSON-guided generation.

        Parameters
        ----------
        schema
            A JSON schema that encodes the structure we want the model to generate.
        tokenizer
            The tokenizer used to convert tokens to ids.
        whitespace_pattern
            Pattern to use for JSON syntactic whitespace (doesn't impact string
            literals). For example, to allow only a single space or newline with
            `whitespace_pattern=r"[
 ]?"`
        )json_schema)rP   r   N)r   r	   rR   r   )r   rY   r   rZ   
schema_strrP   rS   r   r   r      s   

zJSONLogitsProcessor.__init__)N)rF   rG   rH   rI   r   dictr   r
   rV   r   r   rW   r   r   rS   r   rX      s    rX   c                       sP   e Zd ZU dZeed< deddf fddZdej	d	ej
d
ej
fddZ  ZS )CFGLogitsProcessorzBias generation based on a context-free grammar.

    Attributes
    ----------
    tokenizer
        The tokenizer used to convert tokens to ids.
    guide
        The `outlines.fsm.CFGGuide. which is used to bias the logits.
    r   cfg_strr   r   c                    s    t ||d}t j||d dS )zCompile the CFGGuide that drives the CFG-guided generation.

        Parameters
        ----------
        cfg_str
            A string that represents a grammar
        tokenizer
            The tokenizer used to convert tokens to ids.
        )
cfg_stringr   rD   N)r   rR   r   )r   r_   r   	cfg_guiderS   r   r   r      rU   zCFGLogitsProcessor.__init__r   r   r    c              	   C   s   | j du rt|d | _ g }|D ]=}|| j d }tt| }|| jvrE| jtt|dd   }| j||d  }|| j|< |	| j|  qt
|tj }	t|D ] \}
}t| j|t
j||
 dd}||
|gf |	|
|gf< qZ|	S )zBSame behavior as GuideLogitsProcessor, but uses rejection samplingNr   r!   T)
descending)r   r$   r   r   r%   r   r   r&   r'   r(   r)   r1   mathinfr,   nextiter_valid_token_idsargsort)r   r   r   r5   r6   r7   r8   r9   r:   r;   r>   r?   first_legal_tokenr   r   r   rC      s(   


z!CFGLogitsProcessor.process_logits)rF   rG   rH   rI   r   rJ   rV   r   r)   rL   rN   rC   rW   r   r   rS   r   r^      s   
 
r^   )rI   rc   typingr   r   r   r   r   r   r   r)   outlines_core.fsm.json_schemar	   pydanticr
   outlines.fsm.guider   r   r   outlines.fsm.json_schemar   base_logits_processorr   outlines.models.tokenizerr   r   rO   rX   r^   r   r   r   r   <module>   s    $Z#