o
    V۷i@                     @   sx  d dl mZ d dlmZmZmZmZ d dlmZm	Z	 d dl
Z
eG dd dZG dd deZG d	d
 d
eZeG dd deZeG dd deZeG dd deZeG dd deZeG dd deZeG dd deZeG dd deZG dd dZdeeef ddfddZdedefd d!Zdedefd"d#Zdedefd$d%Zd)d&d'Zed(kre  dS dS )*    )	dataclass)OptionalListTupleIterator)ABCabstractmethodNc                   @   sf   e Zd ZU dZeed< eed< ddedd fddZdefd	d
ZddedefddZ	defddZ
dS )Positionz2Tracks position in source text for error reportingtextpos   nreturnc                 C   s   t | j| j| S N)r	   r
   r   selfr    r   M/home/ubuntu/vllm_env/lib/python3.10/site-packages/llguidance/gbnf_to_lark.pyadvance      zPosition.advancec                 C   s    | j t| jk r| j| j  S dS N )r   lenr
   r   r   r   r   current   s    zPosition.currentc                 C   s   | j | j| j|  S r   )r
   r   r   r   r   r   peek      zPosition.peekc                 C   sf   | j dd| jd }| j td| jd | j }| j | j| jd  }d| dt| dt| S )N
r   r      zline z, z ^ )r
   countr   maxrepr)r   line_noprefsuffr   r   r   __str__   s   zPosition.__str__N)r   )__name__
__module____qualname____doc__str__annotations__intr   r   r   r%   r   r   r   r   r	   	   s   
 r	   c                       s&   e Zd Zdedef fddZ  ZS )GbnfToLarkErrorr   messagec                    s    || _ t | d|  d S )Nz at )r   super__init__)r   r   r.   	__class__r   r   r0   !   s   zGbnfToLarkError.__init__)r&   r'   r(   r	   r*   r0   __classcell__r   r   r1   r   r-       s    r-   c                   @   sd   e Zd ZedefddZdefddZdefddZdefdd	Z	dd
dZ
ded  fddZdS )ASTNoder   c                 C   s   d S r   r   r   r   r   r   r%   (   s   zASTNode.__str__c                 C      dS )NTr   r   r   r   r   	is_atomic,      zASTNode.is_atomicc                 C   s   t dd |  D S )Nc                 s   s    | ]}|  V  qd S r   )is_terminal).0cr   r   r   	<genexpr>0       z&ASTNode.is_terminal.<locals>.<genexpr>)allchildrenr   r   r   r   r8   /   r   zASTNode.is_terminalc                 C   s   |   S r   )r%   r   r   r   r   top_str2      zASTNode.top_strc                 C   s   | S r   r   r   r   r   r   simplify5   r7   zASTNode.simplifyc                 C   s   g S r   r   r   r   r   r   r>   8   r7   zASTNode.childrenN)r   r4   )r&   r'   r(   r   r*   r%   boolr6   r8   r?   rA   listr>   r   r   r   r   r4   '   s    
r4   c                   @   $   e Zd ZU eed< defddZdS )LiteralNodevaluer   c                 C      d| j  dS )N")rF   r   r   r   r   r%   @      zLiteralNode.__str__Nr&   r'   r(   r*   r+   r%   r   r   r   r   rE   <      
 rE   c                   @   rD   )	RegexNoderxr   c                 C   rG   )N/)rM   r   r   r   r   r%   H   rI   zRegexNode.__str__NrJ   r   r   r   r   rL   D   rK   rL   c                   @   sB   e Zd ZU eed< dZed ed< defddZdefdd	Z	dS )
RuleRefNodenameNRuleNodetargetr   c                 C   s   | j d u rdS | j jS NF)rR   rule_is_terminalr   r   r   r   r8   Q   s   
zRuleRefNode.is_terminalc                 C   s   | j d u r| jS | j jS r   )rR   rP   r   r   r   r   r%   V   s   
zRuleRefNode.__str__)
r&   r'   r(   r*   r+   rR   r   rB   r8   r%   r   r   r   r   rO   L   s
   
 rO   c                   @   sX   e Zd ZU eed< eed< ee ed< dee fddZdefddZ	de
fd	d
ZdS )RepetitionNodenode	min_times	max_timesr   c                 C      | j gS r   )rV   r   r   r   r   r>   b   r@   zRepetitionNode.childrenc                 C   s   | j  | _ | S r   )rV   rA   r   r   r   r   rA   e   s   zRepetitionNode.simplifyc                 C   s   t | j}| j sd| d}| jdkr| jd u r| dS | jdkr.| jd u r.| dS | jdkr=| jdkr=| dS | jd urGt | jnd}| d	| j d
| dS )N()r   *r   +?r   {,})r*   rV   r6   rW   rX   )r   innermax_strr   r   r   r%   i   s   




zRepetitionNode.__str__N)r&   r'   r(   r4   r+   r,   r   r   r>   rA   r*   r%   r   r   r   r   rU   \   s   
 rU   c                   @   sV   e Zd ZU ee ed< defddZdefddZ	defddZ
dee fd	d
ZdS )SequenceNodenodesr   c                 C   s    | j sdS ddd | j D S )Nz"" c                 s       | ]}t |V  qd S r   r*   )r9   rV   r   r   r   r;   ~   r<   z'SequenceNode.__str__.<locals>.<genexpr>)re   joinr   r   r   r   r%   {   s   zSequenceNode.__str__c                 C   r5   rS   r   r   r   r   r   r6      r7   zSequenceNode.is_atomicc                 C   D   t t| jD ]}| j|  | j|< qt| jdkr | jd S | S Nr   r   )ranger   re   rA   r   ir   r   r   rA      
   
zSequenceNode.simplifyc                 C      | j S r   )re   r   r   r   r   r>         zSequenceNode.childrenN)r&   r'   r(   r   r4   r+   r*   r%   rB   r6   rA   rC   r>   r   r   r   r   rd   w   s   
 rd   c                   @   sd   e Zd ZU ee ed< defddZdefddZde	fddZ
defd	d
Zdee fddZdS )AlternativeNodealternativesr   c                 C   s   d dd | jD S )Nz
     | c                 s   rg   r   rh   r9   altr   r   r   r;      r<   z*AlternativeNode.top_str.<locals>.<genexpr>ri   rs   r   r   r   r   r?      r   zAlternativeNode.top_strc                 C   s   dd dd | jD  d S )NrZ   z | c                 s   rg   r   rh   rt   r   r   r   r;      r<   z*AlternativeNode.__str__.<locals>.<genexpr>r[   rv   r   r   r   r   r%      s   zAlternativeNode.__str__c                 C   r5   rS   r   r   r   r   r   r6      r7   zAlternativeNode.is_atomicc                 C   rj   rk   )rl   r   rs   rA   rm   r   r   r   rA      ro   zAlternativeNode.simplifyc                 C   rp   r   rs   r   r   r   r   r>      rq   zAlternativeNode.childrenN)r&   r'   r(   r   r4   r+   r*   r?   r%   rB   r6   rA   rC   r>   r   r   r   r   rr      s   
 rr   c                   @   sV   e Zd ZU eed< eed< eed< dZeed< dZde	e fdd	Z
defd
dZdS )rQ   rP   rs   commentFrT   r   r   c                 C   rY   r   rw   r   r   r   r   r>      r@   zRuleNode.childrenc                 C   s   | j  | j d| j  S )Nz: )rx   rP   rs   r?   r   r   r   r   r%      s   zRuleNode.__str__N)r&   r'   r(   r*   r+   r4   rT   rB   orderr   r>   r%   r   r   r   r   rQ      s   
 rQ   c                   @   s  e Zd Zd'ddZdedeeef fddZdede	eef fd	d
Z
dede	eef fddZdede	eef fddZedede	eef fddZedede	eef fddZdededefddZededefddZededefddZdede	eef fddZdedede	eef fddZdedede	eef fd d!Zdedede	eef fd"d#Zded$ee defd%d&ZdS )(GrammarParserr   Nc                 C   s
   d| _ d S r   )curr_commentr   r   r   r   r0      s   zGrammarParser.__init__r
   c                 C   s`   t |d}| j|dd}g }| r)| |\}}|| | j|dd}| sdd |D S )Nr   Tallow_newlinesc                 S   s   i | ]}|j |qS r   )rP   )r9   ruler   r   r   
<dictcomp>   s    z'GrammarParser.parse.<locals>.<dictcomp>)r	   _skip_spacer   _parse_ruleappend)r   r
   r   rulesr~   r   r   r   parse   s   

zGrammarParser.parser   c                 C   s  dt dtfdd}| dkr|dd st|d| }| }|d	v r/d| | fS |d
krZ|ddd }t|dksF||sNt|d| |d}d| |fS |dkr|ddd }t|dksq||syt|d| |d}d|d |fS |dkr|ddd }t|dks||st|d| |d}d|d |fS t|d| | dkrt|d| | fS )Nsr   c                 S   s   t dd | D S )Nc                 s   s    | ]}|d v V  qdS )0123456789abcdefABCDEFNr   )r9   chr   r   r   r;      r<   z@GrammarParser._parse_char.<locals>.is_all_hex.<locals>.<genexpr>)r=   )r   r   r   r   
is_all_hex   r   z-GrammarParser._parse_char.<locals>.is_all_hex\   r   zIncomplete escape sequencez"\[]nrtx   zInvalid \x escape sequence: \xz\xu      zInvalid \u escape sequence: \uz\u0U	      zInvalid \U escape sequence: \Uz\UzInvalid escape sequence \r   zUnexpected end of input)r*   rB   r   r   r-   r   r   lstrip)r   r   r   r:   	hex_valuer   r   r   _parse_char   sH   







zGrammarParser._parse_charc                 C   sh   |  dkrt|dd}| }	 | |\}}|dv r$|d| 7 }n||7 }|dkr-nqt||fS )N[zExpected '['Tz/[r   ])r   r-   r   r   rL   r   r   rr:   r   r   r   _parse_char_class   s   
	zGrammarParser._parse_char_classc                 C   sR   |  dkrt|d| }d}	 | |\}}|dkrn||7 }qt||fS )NrH   zExpected '"'r   )r   r-   r   r   rE   r   r   r   r   _parse_literal  s   
zGrammarParser._parse_literalc                 C   sR   | j }t|  r|  } t|  s
| j |krt| d| j|| j  | fS )NzExpected name)r   rz   _is_word_charr   r   r-   r
   r   startr   r   r   _parse_name  s   

zGrammarParser._parse_namec                 C   sR   | j }|   r|  } |   s	| j |krt| dt| j|| j  | fS )NzExpected integer)r   r   isdigitr   r-   r,   r
   r   r   r   r   
_parse_int  s   

zGrammarParser._parse_intr}   c                 C   s   |  rX|  dv r| }nE|r|  dv rt|}n7|  dkrQ| }d}|  rG|  dvrG||  7 }| }|  rG|  dvs3|  j|d 7  _n	 |S |  s|S )Nz 	
#z//r   )r   r   rz   _skip_newliner{   )r   r   r}   cmtr   r   r   r   !  s"   
zGrammarParser._skip_spacec                 C   sD   |   dkr|  } |   dkr|  } | S |   dkr |  } | S )Nr   )r   r   )r   r   r   r   r   2  s   zGrammarParser._skip_newliner:   c                 C   s   |   p| dkp| dkS )N-_)isalnum)r:   r   r   r   r   <  s   zGrammarParser._is_word_charc                 C   s   |  |\}}| j|dd}|ddkrt|d|d}| j|dd}| j|dd\}}| |}| j}d| _t||||fS )	NFr|   r   z::=zExpected ::=T	is_nestedr   )	r   r   r   r-   r   _parse_alternativesr   r{   rQ   )r   r   rP   rs   r   r   r   r   r   @  s   


zGrammarParser._parse_ruler   c                 C   s`   g }	 |  ||\}}|| | j||d}| dkrn| }| j|dd}qt||fS )NTr|   |)_parse_sequencer   r   r   r   rr   )r   r   r   rs   sequencer   r   r   r   P  s   
z!GrammarParser._parse_alternativesc                 C   sH  g }|  r|  dvr|s|  dvr|  dkr'| |\}}|| nQ|  dkr:| |\}}|| n>|  dkrO| j||d\}}|| n)|  dkra|td | }n| |  rw| |\}}|t	| nn&| j
||d}| ||}| j
||d}|  r|  dvr|s|  dvst||fS )	Nz|)r   rH   r   rZ   r   .r|   )r   r   r   r   _parse_grouprL   r   r   r   rO   r   _parse_repetitionrd   )r   r   r   re   rV   rP   r   r   r   r   b  sB   
zGrammarParser._parse_sequencec                 C   sj   |  dkrt|d| }| |d}| j|dd\}}|  dkr)t|d| }|| ||fS )NrZ   zExpected '('Tr   r[   zExpected ')')r   r-   r   r   r   )r   r   r   rs   r   r   r   r     s   

zGrammarParser._parse_groupre   c                 C   sh  |s|S |  dkrt|d dd |d< | S |  dkr,t|d dd |d< | S |  dkr@t|d dd|d< | S |  dkr| }| |d}| |\}}| |d}|  d	krqt|d |||d< | S |  d
kr| | d}d }|   r| |\}}| |d}|  d	krt|dt|d |||d< | S t|d|S )Nr\   r   r]   r   r^   r_   Tra   r`   zExpected '}'zExpected ',' or '}')r   rU   r   r   r   r   r-   )r   r   re   rW   rX   r   r   r   r     s>   

zGrammarParser._parse_repetitionr   N)r&   r'   r(   r0   r*   dictrQ   r   r	   r   r   r4   r   r   staticmethodr   r,   r   rB   r   r   r   r   rr   r   rd   r   r   r   r   r   r   r   r   rz      s@    
*	



"rz   r   r   c                    sx  dt dtdd ffdd}t D ]\}}||_|j |_qdtdtt f fdd  D ]"} |D ]}t	|t
rS|jvrMtd	|j d
|j |_q8q2dvr]td|d d d}|dkrd} D ]}|jdkr|js|j rd|_|d7 }qp|dksjt D ](}|jdd}tdd| }|jr| }n| }|j|kr||| qd S )Nr   rP   r   c                    s2   | v rt d| d | j= || _|  |< d S )NRule 'z' already exists)	ExceptionrP   )r   rP   )r   r   r   rename  s
   zresolve.<locals>.renamerV   c                 3   s(    |   D ]}|V   |E d H  qd S r   )r>   )rV   r:   )all_childrenr   r   r     s
   zresolve.<locals>.all_childrenr   z' not foundrootzNo 'root' rule foundr   r   r   Tr   r   z([a-z])([A-Z])z\1_\2)rQ   r*   	enumeratevaluesry   rs   rA   r4   r   
isinstancerO   rP   r   rR   rT   r8   rC   replaceresublowerupper)r   r   rn   r   rV   num_fixnew_namer   )r   r   r   resolve  sN   





r   r
   c                 C   s   t  }|| }t| t| }|jdd d d}d}|D ] }t|}|s0d|v r0|d7 }||d 7 }d|v }|r@|d7 }q |S )z<
    Convert a GBNF (llama.cpp) grammar to Lark syntax.
    c                 S   rp   r   )ry   )r   r   r   r   <lambda>  s    zgbnf_to_lark.<locals>.<lambda>)keyz%llguidance {}

Tr   )rz   r   r   rC   r   sortr*   )r
   parserr   rlistresprev_nlr   r   r   r   r   gbnf_to_lark  s"   
r   c                 C   s   t d| duS )z6
    Check if the text is already in Lark syntax.
    z!(?m)^\s*(%llguidance\b|start\s*:)N)r   searchr
   r   r   r   is_lark_syntax  s   r   c                 C   s   t | r| S t| S )z5
    Convert a grammar to Lark syntax if needed.
    )r   r   r   r   r   r   any_to_lark	  s   r   c                     sf   dd l } dd l dtdd f fdd}t| jdk r#td | d | jdd  D ]}|| q*d S )	Nr   fnr   c                    s   t |  dddd t| }| }W d    n1 sw   Y  t|} j| d d }t|d}|| W d    n1 sEw   Y  t d d S )	Nz... r   T)endflushr   z.larkwOK)printopenreadr   pathsplitextwrite)r   fr
   larkfn_larkosr   r   process_file  s   

zmain.<locals>.process_filer   z*Usage: gbnf_to_lark.py <file1> <file2> ...r   )sysr   r*   r   argvr   exit)r   r   r   r   r   r   main  s   


r   __main__r   ) dataclassesr   typingr   r   r   r   abcr   r   r   r	   r   r-   r4   rE   rL   rO   rU   rd   rr   rQ   rz   r   r*   r   r   rB   r   r   r   r&   r   r   r   r   <module>   sB     4
	
