o
    ]۷i!                     @   s   d dl Z d dlZd dlmZmZ d dlmZmZmZm	Z	m
Z
 ddlmZmZmZmZmZmZmZmZmZ dedefdd	Zd
edefddZeG dd dZG dd de jZG dd deZG dd deZG dd deZG dd deZdS )    N)	dataclassfield)AnyHashableListOptionalTypeVar   )	COMPLETE_ALPHABETCONFIG_ENV_VAR_DEFAULT_ALPHABETWHITESPACE_CHARACTERS#DEFAULT_MAX_CONSECUTIVE_WHITESPACESDEFAULT_FORCE_JSON_FIELD_ORDER*CONFIG_ENV_VAR_MAX_CONSECUTIVE_WHITESPACES&CONFIG_ENV_VAR_STRICT_JSON_FIELD_ORDER$CONFIG_ENV_VAR_MAX_JSON_ARRAY_LENGTHDEFAULT_MAX_JSON_ARRAY_LENGTHsreturnc                 C   s   | o	|    dv S )N)true1)striplower)r    r   [/home/ubuntu/vllm_env/lib/python3.10/site-packages/lmformatenforcer/characterlevelparser.py_parse_bool      r   env_varc                    s2   t  }|tkr
tn| fdd}t|dS )Nc                      s   t jt S N)osenvirongetstrr   default_valr   parser_funcr   r   factory_func   s   z+_env_or_default_field.<locals>.factory_func)default_factory)typeboolr   r   )r   r$   default_val_typer&   r   r#   r   _env_or_default_field   s   
r+   c                   @   sZ   e Zd ZU eeeZeed< ee	e
Zeed< 	 eeeZeed< 	 eeeZeed< dS )CharacterLevelParserConfigalphabetmax_consecutive_whitespacesforce_json_field_ordermax_json_array_lengthN)__name__
__module____qualname__r+   r   r
   r-   r"   __annotations__r   r   r.   intr   r   r/   r)   r   r   r0   r   r   r   r   r,      s    
 r,   c                   @   s   e Zd ZdZddee fddZejde	dd fdd	Z
ejde	fd
dZejdefddZdee fddZdee fddZedefddZejdefddZdS )CharacterLevelParserzCharacterLevelParser is an interface for classes that can parse strings one character at a time, and determine which characters are allowed at any specific timeNconfigc                 C   s   |pt  | _d S r   )r,   _config)selfr7   r   r   r   __init__+   s   zCharacterLevelParser.__init__new_characterr   c                 C      t  )zAdd a character to the parser, and return a new parser that represents the state of the parser after the character has been added. This has to be
        an immutable operation - the original CharacterLevelParser (self) must not be modified.NotImplementedErrorr9   r;   r   r   r   add_character.   s   z"CharacterLevelParser.add_characterc                 C   r<   )zgReturn a string containing all characters that are allowed at the current point in the parsing process.r=   r9   r   r   r   get_allowed_characters4      z+CharacterLevelParser.get_allowed_charactersc                 C   r<   )zReturn True if the parser is in a state where it can end (potentially finished parsing the desired structure), and False otherwise.r=   rA   r   r   r   can_end9   rC   zCharacterLevelParser.can_endc                 C      dS )zpOptional. Return a key that denotes that this state is a repeating state, full tree traversal should be avoided.Nr   rA   r   r   r   shortcut_key>      z!CharacterLevelParser.shortcut_keyc                 C   rE   )z|Optional. Return a key that denotes that this state is a repeating state, and if it is visited again, results can be cached.Nr   rA   r   r   r   	cache_keyB   rG   zCharacterLevelParser.cache_keyc                 C   s   | j S r   r8   rA   r   r   r   r7   F   s   zCharacterLevelParser.config
new_configc                 C   s
   || _ | S r   rI   )r9   rJ   r   r   r   r7   J   s   r   )r1   r2   r3   __doc__r   r,   r:   abcabstractmethodr"   r@   rB   r)   rD   r   rF   rH   propertyr7   setterr   r   r   r   r6   (   s    r6   c                   @   sL   e Zd ZdZdefddZdedefddZdefd	d
Zde	fddZ
dS )StringParsera  RegexParser is an example CharacterLevelParser that only allows an exact string. It is a debugging / learning tool
    to show how CharacterLevelParser works together with TokenizerPrefixTree to filter the allowed tokens (some of whom may contain multiple characters)stringc                 C   
   || _ d S r   
target_str)r9   rQ   r   r   r   r:   S      
zStringParser.__init__r;   r   c                 C   s>   | j |rt| j t|d  S td| j d  d| d)Nz
Expected 'r   z' but got '')rT   
startswithrP   len
ValueErrorr?   r   r   r   r@   V   s   zStringParser.add_characterc                 C   s   | j r| j d S dS )Nr    rS   rA   r   r   r   rB   \   r   z#StringParser.get_allowed_charactersc                 C   s   | j  S r   rS   rA   r   r   r   rD   _   s   zStringParser.can_endN)r1   r2   r3   rK   r"   r:   r6   r@   rB   r)   rD   r   r   r   r   rP   P   s    rP   c                   @   sN   e Zd ZdZddefddZdedefdd	Zdefd
dZ	defddZ
dS )ForceStopParserzbA simple parser that forbids any characters except the stop token. Used to force stop LM operationFallow_whitespacec                 C   rR   r   )r\   )r9   r\   r   r   r   r:   e   rU   zForceStopParser.__init__r;   r   c                 C   s   | S r   r   r?   r   r   r   r@   g      zForceStopParser.add_characterc                 C   s   | j rtS dS NrZ   )r\   r   rA   r   r   r   rB   i   s   z&ForceStopParser.get_allowed_charactersc                 C   rE   )NTr   rA   r   r   r   rD   k   r]   zForceStopParser.can_endN)F)r1   r2   r3   rK   r)   r:   r"   r6   r@   rB   rD   r   r   r   r   r[   c   s    r[   c                   @   st   e Zd ZdZdee fddZdedefddZdefd	d
Z	de
fddZdee fddZdee fddZdS )UnionParserzWA parser that allows a string that would be allowed by any of several different parsersparsersc                 C   rR   r   r`   r9   r`   r   r   r   r:   q   rU   zUnionParser.__init__r;   r   c                    sB    fdd| j D } fdd|D }t|dkr|d S t|S )Nc                    s   g | ]
} |  v r|qS r   rB   .0parserr;   r   r   
<listcomp>v   s    z-UnionParser.add_character.<locals>.<listcomp>c                    s   g | ]}|  qS r   )r@   rd   rg   r   r   rh   w   s    r	   r   )r`   rX   r_   )r9   r;   relevant_parsersnext_parsersr   rg   r   r@   t   s
   zUnionParser.add_characterc                 C   s$   d dd | jD }d t|S )NrZ   c                 S      g | ]}|  qS r   rc   rd   r   r   r   rh   }       z6UnionParser.get_allowed_characters.<locals>.<listcomp>)joinr`   set)r9   allowedr   r   r   rB   |   s   z"UnionParser.get_allowed_charactersc                 C      t dd | jD S )Nc                 S   rk   r   rD   rd   r   r   r   rh      rl   z'UnionParser.can_end.<locals>.<listcomp>)anyr`   rA   r   r   r   rD      r   zUnionParser.can_endc                 C   s0   t dd | jD }t|dkrtt|S d S )Nc                 s       | ]}|  V  qd S r   )rF   rd   r   r   r   	<genexpr>       z+UnionParser.shortcut_key.<locals>.<genexpr>r	   )rn   r`   rX   nextiter)r9   unique_shortcut_keysr   r   r   rF      s   zUnionParser.shortcut_keyc                 C   2   t dd | jD }tdd |D rd|fS d S )Nc                 s   rs   r   rH   rd   r   r   r   rt      ru   z(UnionParser.cache_key.<locals>.<genexpr>c                 s       | ]}|d uV  qd S r   r   re   keyr   r   r   rt      ru   uniontupler`   allr9   all_cache_keysr   r   r   rH         zUnionParser.cache_keyN)r1   r2   r3   rK   r   r6   r:   r"   r@   rB   r)   rD   r   r   rF   rH   r   r   r   r   r_   o   s    r_   c                   @   st   e Zd ZdZdee fddZdedefddZdefd	d
Z	de
fddZdee fddZdee fddZdS )SequenceParserz0A parser that is a sequence of multiple parsers.r`   c                 C   rR   r   ra   rb   r   r   r   r:      rU   zSequenceParser.__init__r;   r   c                 C   s   g }t | jD ]6\}}|| v r7||}|g| j|d d   }t|dkr0||d  n|t| | s= nqt|dkrH|d S t|S Nr	   r   )		enumerater`   rB   r@   rX   appendr   rD   r_   )r9   r;   legal_parsersidxrf   updated_parserrj   r   r   r   r@      s   
zSequenceParser.add_characterc                 C   s6   t  }| jD ]}||  | s nqd|S r^   )rn   r`   updaterB   rD   rm   )r9   allowed_charactersrf   r   r   r   rB      s   

z%SequenceParser.get_allowed_charactersc                 C   rp   )Nc                 S   rk   r   rq   rd   r   r   r   rh      rl   z*SequenceParser.can_end.<locals>.<listcomp>)r   r`   rA   r   r   r   rD      r   zSequenceParser.can_endc                 C   s    t | jdkr| jd  S d S r   )rX   r`   rF   rA   r   r   r   rF      s    zSequenceParser.shortcut_keyc                 C   ry   )Nc                 s   rs   r   rz   rd   r   r   r   rt      ru   z+SequenceParser.cache_key.<locals>.<genexpr>c                 s   r{   r   r   r|   r   r   r   rt      ru   sequencer   r   r   r   r   rH      r   zSequenceParser.cache_keyN)r1   r2   r3   rK   r   r6   r:   r"   r@   rB   r)   rD   r   rF   r   rH   r   r   r   r   r      s    r   ) rL   r   dataclassesr   r   typingr   r   r   r   r   constsr
   r   r   r   r   r   r   r   r   r"   r)   r   r+   r,   ABCr6   rP   r[   r_   r   r   r   r   r   <module>   s    ,(!