o
    
۾ibA                     @  s  d dl mZ d dlZd dlZd dlZd dlZd dlmZ d dl	Z
d dlZd dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZ erqd dlZd dlmZ d dlm Z  d dl!Z"d d	l#m$Z$ d d
l%m&Z& nede' dZ"ede' dZede' dZ ede' dZee(Z)dZ*d<ddZ+G dd  d Z,d=d"d#Z-d$d% Z.e/d&Z0e/d'Z1d>d-d.Z2d?d0d1Z3d@d4d5Z4dAd6d7Z5dBd:d;Z6dS )C    )annotationsN)TYPE_CHECKING)LRUCache)Cache)init_logger)
LazyLoader)GrammarOutputSchedulerOutput)TokenizerLike)
InputBatchxgrxgrammarocoutlines_core
file_utilsztransformers.file_utilsconvert_slow_tokenizerz#transformers.convert_slow_tokenizerscheduler_outputr	   grammar_outputr   input_batchr   logitstorch.TensorreturnNonec                 C  s^  |j }i }d}| j}t|j}t|jD ]\}	}
|	| }|t||
d7 }|
|v r.|||
< qg }tj	|j
d |j
d fd|jd}d}|jD ]3}
t||
d}||
 }durttd| D ]}|| }|||  ||< || q`|d| 7 }qGt|j|jdd}t||j
d k}d}|stj|tjd	dd
}|j|jdd}tj|||d dS )a-  
    Apply grammar bitmask to output logits of the model with xgrammar function.

    Args:
        scheduler_output (SchedulerOutput): The result of engine scheduling.
        input_batch (InputBatch): The input of model runner.
        logits (torch.Tensor): The output logits of model forward.
    r       )shape
fill_valuedtypeNT)non_blockingcpu)r   device
pin_memory)indices)grammar_bitmaskscheduled_spec_decode_tokenssetstructured_output_request_ids	enumeratereq_idslengetnpfullr   r   rangeappendtorch
from_numpytor!   tensorint32r   apply_token_bitmask_inplace)r   r   r   r   r$   struct_out_req_batch_indicescumulative_offsetspec_tokensstruct_out_req_idsbatch_indexreq_idlogit_indexout_indicessorted_bitmaskcumulative_indexnum_spec_tokens	logit_idxibitmask_indexskip_out_indicesindex_tensorr   r   S/home/ubuntu/.local/lib/python3.10/site-packages/vllm/v1/structured_output/utils.pyapply_grammar_bitmask,   sJ   



rG   c                   @  s   e Zd ZdZd	ddZdS )
OutlinesVocabularyzo
    Wrapper class for `outlines_core.Vocabulary`,
    which allows us to store a hash with the vocabulary
    
vocabularyoc.Vocabularyr   r   c                 C  s2   || _ t| d }t|d}|| _d S )Nutf-8   )innerhashlibsha256__repr__encode	hexdigestint_hash)selfrI   hex_strhash_intr   r   rF   __init__   s   

zOutlinesVocabulary.__init__N)rI   rJ   r   r   )__name__
__module____qualname____doc__rX   r   r   r   rF   rH   z   s    rH   strc                  C  sx   t d} t d}t jd}| r| S |rt j|ddS t j|r0|dkr0t j|ddS t }t j|ddS )zFGet the context object that contains previously-computed return valuesOUTLINES_CACHE_DIRXDG_CACHE_HOME~z.cacheoutlines/)osgetenvpath
expanduserjoinisdirtempfile
gettempdir)outlines_cache_dirxdg_cache_homehome_dirtempdirr   r   rF   get_outlines_cache_path   s   

ro   c                  C  sf   t  } tjr.td t| ddd}tjd}|	dd}||kr&|
  |d| |S tdd	S )
z3Get the Cache instance to be used for index cachingzEnabling outlines cache. This is an unbounded on-disk cache. It may consume a lot of disk space and should not be used with untrusted clients.noner   )eviction_policy
cull_limitr   __version__N   )maxsize)ro   envsVLLM_V1_USE_OUTLINES_CACHEloggerwarningr   	importlibmetadataversionr+   clearr&   r   )	cache_dircacheoutlines_versioncached_versionr   r   rF   get_outlines_cache   s   
r   z^<0x[0-9A-F]{2}>$u   ^.{0,6}�+.{0,6}$	tokenizerr
   eos_token_idrS   dict[bytes, list[int]]c           
        s*  dd t   D d fdd}i }g }   D ]v\}}| jv r&q||}|rt|ttfr8t|}nG|dkr@|dksId|v rzt	|szt
	|r[tt|d	d
 dg}n$fdd|D }	d|	v rutd| d| d| t|	}n|d}||kr||g | q|| q|S )zCreate a map from vocabulary tokens to lists of equivalent token ids.

    Returns:
        A Dict of token string -> equivalent token ids
    c                 S  s   i | ]\}}||qS r   r   ).0kvr   r   rF   
<dictcomp>   s    
z'_reduced_vocabulary.<locals>.<dictcomp>tokenr]   r   c                   s8     | g}t| tu r| tjs| dkrd| S |S )Nz<0x20> )convert_tokens_to_stringtyper]   
startswithr   SPIECE_UNDERLINE)r   string)r   r   rF   convert_token_to_string   s   
z4_reduced_vocabulary.<locals>.convert_token_to_stringu   �      rL   c                   s   g | ]}  |qS r   )r+   r   c)unicode_to_bytesr   rF   
<listcomp>   s    z'_reduced_vocabulary.<locals>.<listcomp>NzCannot convert token `z` (z) to bytes: rK   )r   r]   r   r]   )r   bytes_to_unicodeitems	get_vocaball_special_tokens
isinstancebytes	bytearrayre_replacement_seqmatchre_llama_byte_tokenrS   RuntimeErrorrQ   
setdefaultr/   )
r   r   r   rI   empty_token_idsr   	token_idx	token_strtoken_bytes	byte_valsr   )r   r   rF   _reduced_vocabulary   sB   	





r   rJ   c              
   C  s   t | dr| jS z+t | dr| jdur| j}n
tdt|  dt| |}tt||}|| _|W S  t	yJ } ztdt|  d|d}~ww )z2Get the `Vocabulary` object for a given tokenizer._outlines_vocabularyr   Nz?Error during structured outputs setup for outlines: Tokenizer (zi) has no `eos_token_id` property, but `eos_token_id` is required for structured outputs to work properly.z,Cannot get the vocabulary of the tokenizer (z0). The tokenizer should have a get_vocab method.)
hasattrr   r   
ValueErrorr   r   rH   r   
VocabularyAttributeError)r   r   reduced_vocabrI   er   r   rF   get_outlines_vocabulary  s6   
r   grammar_strboolc                 C  sL   | rt | ts	dS | dD ]}tdd| }|sqd|v r# dS qdS )aV  
    Check if grammar appears to use Lark syntax.

    Args:
        grammar_str: Input grammar string

    Returns:
        bool: True if grammar appears to be in Lark format, False otherwise

    Examples:
        >>> grammar_is_likely_lark("rule: 'abc'")
        True
        >>> grammar_is_likely_lark("rule ::= 'abc'")
        False
    F
	(#|//).*$ z::=T)r   r]   splitresubstrip)r   liner   r   rF   grammar_is_likely_lark!  s   r   c                   s  t | tstdt|  |  stdt }t }g }d.dd d/dd}d0dd} fdd| dD }d}t|dD ]F\}}	|	rK|	drLq@d|	v rz |	ddd  d}
|	|
 |du ri|
}|
dkrod}W q@ t
y } z	td| d|d}~ww q@|std|d|  d}g }t|dD ]\}}	|	sqz|d|	v r|	ds|r|| d d!|  |	dd\}
}|
 d}||d"| d#| td$d%|}||| | g}n5|	dr |std&| d'|	dd  }||d(| d#| td$d%|}||| || W q ty; } ztd)| d*t| |d}~ww |rL|| d d!|  || d+h }|rbtd,d-t| d|S )1a  
    Convert a Lark grammar string to EBNF format.

    EBNF reference:
    https://github.com/ggerganov/llama.cpp/blob/master/grammars/README.md
    Lark grammar reference:
    https://lark-parser.readthedocs.io/en/latest/grammar.html

    Args:
        grammar_str: Input grammar in Lark format

    Returns:
        str: Converted grammar in EBNF format

    Examples:
        >>> print(convert_lark_to_ebnf("rule: 'hello'"))
        root ::= rule
        rule ::= "hello"
    zGrammar must be a string, got zGrammar string cannot be emptyr   r]   r   c                 S  s   t dd|  S )z)Remove comments and whitespace from line.r   r   )r   r   r   )r   r   r   rF   
clean_line^  s   z(convert_lark_to_ebnf.<locals>.clean_linetext	rule_nameline_numrS   r   c                 S  s<   |  dd dks|  dd dkrtd| d| dS )z Validate quote matching in text.'   r   "zMismatched quotes in z	 on line N)countr   )r   r   r   r   r   rF   check_quotesb  s   $z*convert_lark_to_ebnf.<locals>.check_quotesset[str]c                 S  s,   t dd| } t dd| } tt d| S )z"Extract rule references from text.z"[^"]*"r   z[+*?()|\[\]{}]r   z\b[a-zA-Z_][a-zA-Z0-9_]*\b)r   r   r&   findall)r   r   r   rF   extract_referencesg  s   z0convert_lark_to_ebnf.<locals>.extract_referencesc                   s   g | ]} |qS r   r   )r   r   r   r   rF   r   o  s    z(convert_lark_to_ebnf.<locals>.<listcomp>r   Nr   |:r   ?startzInvalid rule format on line z". Expected 'rule_name: definition'zNo valid rules found in grammar	root ::= z ::=  | zrule 'r   z	'([^']*)'z"\1"zAlternative '|' on line z$ without a preceding rule definitionzalternative for rule 'zError on line z: rootz"Referenced rules are not defined: z, )r   r]   r   r]   )r   r]   r   r]   r   rS   r   r   )r   r]   r   r   )r   r]   r   r   r   r&   r   r(   r   add
IndexErrorr/   rg   r   r   updatesorted)r   defined_rulesreferenced_rulesoutput_linesr   r   lines
first_ruler   r   namer   current_rulecurrent_definition
definitionalt_defundefined_rulesr   r   rF   convert_lark_to_ebnfA  s   








r   choice	list[str]c                   s8   ddd  fdd| D }dd	 d
d |D  }|S )Nsr]   r   c                 S  s   t dd| S )z+Escape special characters in a EBNF string.z(["\\])z\\\1)r   r   )r   r   r   rF   escape_ebnf_string  s   z-choice_as_grammar.<locals>.escape_ebnf_stringc                 3  s    | ]} |V  qd S )Nr   r   r   r   rF   	<genexpr>  s    z$choice_as_grammar.<locals>.<genexpr>r   r   c                 s  s    | ]	}d | d V  qdS )r   Nr   r   r   r   rF   r     s    )r   r]   r   r]   )rg   )r   escaped_choicesgrammarr   r   rF   choice_as_grammar  s   
r   )
r   r	   r   r   r   r   r   r   r   r   )r   r]   )r   r
   r   rS   r   r   )r   r
   r   rJ   )r   r]   r   r   )r   r]   r   r]   )r   r   r   r]   )7
__future__r   rN   importlib.metadatarz   rc   ri   typingr   numpyr,   regexr   r0   
cachetoolsr   	diskcacher   	vllm.envsrv   vllm.loggerr   vllm.utils.import_utilsr   vllm.v1.core.sched.outputr   r	   r   r   #transformers.convert_slow_tokenizerr   transformers.file_utilsr   r   r   vllm.tokenizersr
   vllm.v1.worker.gpu_input_batchr   globalsrY   rx   CACHErG   rH   ro   r   compiler   r   r   r   r   r   r   r   r   r   rF   <module>   sT   
N




F

  