o
    پi\                     @   sb  d dl m Z mZ d dlmZ d dlmZ d dlmZmZm	Z	m
Z
mZmZmZmZmZmZmZ d dlZd dlmZmZmZ d dlmZ d dlmZmZ d d	lmZmZ d d
lm Z m!Z! d dl"m#Z# d dl$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z, d dl-m.Z.m/Z/m0Z0 d dl1m2Z2m3Z3m4Z4m5Z5m6Z6 d dl7m8Z8 d dl9m:Z:m;Z;m<Z<m=Z= d dl>m?Z?m@Z@mAZA eeBeCf ZDeeCe	f ZEeG dd dZFeG dd dZGG dd deZHG dd deZIG dd de(ZJG dd de/ZKG dd de.ZLG d d! d!e:ZMG d"d# d#e<ZNG d$d% d%e=ZOG d&d' d'e)ZPG d(d) d)e&ZQG d*d+ d+e%ZRG d,d- d-e#ZSG d.d/ d/eSZTd0eeJeLf fd1d2ZUd3eId4eeBef fd5d6ZVd7ee d4eeeeCeeeeCeCf  eeC eeCeeC f f f f fd8d9ZWd:eeC d;eeCeeeeCeCf  eeC eeCeeC f f f d4e
eeCeXeXf ddf fd<d=ZY	>dEd?e?d@eeC dAeCdBeXd4eeC f
dCdDZZdS )F    )copydeepcopy)	dataclass)	lru_cache)AnyDict	FrozenSet	GeneratorIteratorListOptionalSequenceSetTupleUnionN)FSMAlphabetOblivionError)Unsupported)LarkToken)	LexerConf
ParserConf)LexErrorUnexpectedInput)Indenter)
BasicLexerContextualLexer
LexerStateLexerThreadScannerUnexpectedCharactersUnexpectedToken_create_unless)ParsingFrontendPostLexConnector_validate_frontend_args)ActionIntParseTableLALR_Analyzer
ParseTableShift)InteractiveParser)LALR_Parser	ParseConfParserState_Parser)	BetterFSMget_token_transition_keysmake_deterministic_fsmc                   @   s.   e Zd ZU eed< eed< eed< eed< dS )PartialTerminalInfopriorityterminal_namecan_transitionis_finalN)__name__
__module____qualname__int__annotations__strbool r@   r@   H/home/ubuntu/.local/lib/python3.10/site-packages/outlines/fsm/parsing.pyr4   ;   s
   
 r4   c                   @   sF   e Zd ZU eedf ed< eed< eedf ed< eedf ed< dS )PartialTokensInfo.fsm_state_seqis_not_finishedterminals_and_infofinal_terminals_and_infoN)r9   r:   r;   r   r<   r=   r?   r4   r@   r@   r@   rA   rB   C   s
   
 rB   c                       s    e Zd ZdZ fddZ  ZS )PartialParserConf)rulesstartparser_typedeterministicuse_value_stackc                    s    t  ||| || _|| _d S N)super__init__rK   rL   )selfrH   	callbacksrI   rK   rL   	__class__r@   rA   rO   T   s   
zPartialParserConf.__init__)r9   r:   r;   __serialize_fields__rO   __classcell__r@   r@   rR   rA   rG   K   s    rG   c                       sR   e Zd ZdZ fddZddeddfdd	ZdddZdd ZddddZ	  Z
S )PartialLark)parserrH   optionsrK   rL   c                    sL   | dd| _| dd| _d|d< t j|fi | | jjdks$J d S )NrK   FrL   Tregexlalr)poprK   rL   rN   rO   rX   rW   )rP   grammarrX   rR   r@   rA   rO   c   s
   zPartialLark.__init__Fdont_ignorereturnPartialBasicLexerc                 C   s,   | j }|rddlm} ||}d|_t|S )Nr   )r   r@   )
lexer_confr   ignorer_   )rP   r]   r`   r   r@   r@   rA   _build_lexerm   s   zPartialLark._build_lexerPartialParsingFrontendc                 C   s   |    t| jj| jj t| j| j| jj| j	| j
}| jj}| jj}| j}t|ts-J t|ts4J ||_|| j_t||| jS rM   )_prepare_callbacksr&   rX   rW   lexerrG   rH   
_callbacksrI   rK   rL   r`   
isinstancer   r   rJ   
lexer_typerc   )rP   parser_confrJ   rh   r`   r@   r@   rA   _build_parserw   s"   	zPartialLark._build_parserc                 C   s    d t| j| j| jj| jjS )Nz,{}(open({!r}), parser={!r}, lexer={!r}, ...))formattyper9   source_pathrX   rW   re   rP   r@   r@   rA   __repr__   s   zPartialLark.__repr__parse_statePartialParseStatec                 C   s   | j j j j||dS )Nis_end)rW   parse_from_state)rP   rp   rs   r@   r@   rA   rt      s   zPartialLark.parse_from_stateF)r^   rc   )rp   rq   )r9   r:   r;   rT   rO   r?   rb   rj   ro   rt   rU   r@   r@   rR   rA   rV   Z   s    


rV   c                   @      e Zd Zdd Zdd ZdS )PartialLexerThreadc                 C   s   t | t| jt| jS rM   )rl   r   re   statern   r@   r@   rA   __copy__   s   zPartialLexerThread.__copy__c                 C       t | j d| jd| jdS )N(lexer=z, state=))rl   r9   re   rx   rn   r@   r@   rA   ro          zPartialLexerThread.__repr__Nr9   r:   r;   ry   ro   r@   r@   r@   rA   rw          rw   c                   @   rv   )PartialPostLexConnectorc                 C   s   t | | jt| jS rM   )rl   re   r   	postlexerrn   r@   r@   rA   ry      s   z PartialPostLexConnector.__copy__c                 C   rz   )Nr{   z, postlexer=r|   )rl   r9   re   r   rn   r@   r@   rA   ro      s   z PartialPostLexConnector.__repr__Nr~   r@   r@   r@   rA   r      r   r   c                       sR   e Zd Zd fdd	Zdd Zdd Zedd	 Zed
d Zedd Z	  Z
S )rc   Nc                    sx   |j dksJ t|jd< t|jd< t|jd< t|jd< t j||||d |jr1t	| j
j
|j| _
d | _d | _d | _d S )NrZ   r-   r   r   r   )rW   )rJ   PartialLALRParser_pluginsr_   PartialContextualLexerrw   rN   rO   postlexr   re   _termset_fsm_info_symbols_to_states_reverse_shifts)rP   r`   ri   rX   rW   rR   r@   rA   rO      s   



zPartialParsingFrontend.__init__c                 C   s   i | _ i | _| jjj}|j D ]3\}}| D ]*\}}|d tkr5| j |d i }||t 	| | j|t 	||f qqdS )z4Compute state transition and symbols-to-states maps.r      N)
r   r   rW   parse_tablestatesitemsr+   
setdefaultsetadd)rP   r   
from_statesymbols_to_opssymbolopsymbols_to_from_statesr@   r@   rA   _compute_maps   s   
z$PartialParsingFrontend._compute_mapsc                    s~   t | }i }i  |j D ]#\}}|j}tdd |jD }|j|jf||<  |t	 
| q fdd| D | _dS )ag  Collect and return information about terminal symbol sets and their FSMs.

        Terminal symbol sets (or "termsets") are ordered sequences of terminal
        symbols that are used by each parser state.  Associated with each is a
        collection of FSMs for each terminal and a single parse state FSM that is
        the union of each terminal's FSM.

        This constructs a list of tuples containing the termset, the set of
        parse states that use the termsets, parse state FSMs, and information
        mapping the components of the parse state FSMs to their terminal symbol
        FSMs.

        c                 s       | ]}|j V  qd S rM   name).0termr@   r@   rA   	<genexpr>       zCPartialParsingFrontend._compute_termset_fsm_info.<locals>.<genexpr>c                    s(   g | ]\}\}}|t  | ||fqS r@   )	frozenset)r   termsetfsmfsms_to_trans_finalstermsets_to_parse_statesr@   rA   
<listcomp>  s    

zDPartialParsingFrontend._compute_termset_fsm_info.<locals>.<listcomp>N)get_contextual_lexerlexersr   scannertuple	terminalsr   r   r   r   r   r   )rP   context_lexertermsets_to_fsmsrp   re   r   keyr@   r   rA   _compute_termset_fsm_info   s   
z0PartialParsingFrontend._compute_termset_fsm_infoc                 C      | j d u r	|   | j S rM   )r   r   rn   r@   r@   rA   termset_fsm_info     
z'PartialParsingFrontend.termset_fsm_infoc                 C   r   rM   )r   r   rn   r@   r@   rA   symbols_to_states  r   z(PartialParsingFrontend.symbols_to_statesc                 C   r   rM   )r   r   rn   r@   r@   rA   reverse_shifts  r   z%PartialParsingFrontend.reverse_shiftsrM   )r9   r:   r;   rO   r   r   propertyr   r   r   rU   r@   r@   rR   rA   rc      s    (!

rc   c                   @   s$   e Zd ZdddZedddZdS )	r   Fc           
         s   t ||js|ndd}|  |j}|| _|j| _|jri   fddt| jj	 dd d}i }|D ]}fdd	| jj| 
 D }	|	||< q4t| j|fd
d	| jj
 D fdd	| jj
 D | _|st| j| _tt| jj	 |	 | _t| j|||jd| _d S )NTdebugc                    s2     | }|d u rtt| dd d}| | < |S )Nc                 S      t | S rM   r>   yr@   r@   rA   <lambda>7      z>PartialLALRParser.__init__.<locals>.to_tuple.<locals>.<lambda>r   )getr   sorted)vnew)
old_to_newr@   rA   to_tuple4  s
   
z,PartialLALRParser.__init__.<locals>.to_tuplec                 S   s   t t| dd dS )Nc                 S   r   rM   r   r   r@   r@   rA   r   =  r   z>PartialLALRParser.__init__.<locals>.<lambda>.<locals>.<lambda>r   )r>   r   xr@   r@   rA   r   =      z,PartialLALRParser.__init__.<locals>.<lambda>r   c                    s6   i | ]\}}||d  t ur|n	|d   |d fqS )r   r   )r+   )r   r   r   r   r@   rA   
<dictcomp>B  s    $z.PartialLALRParser.__init__.<locals>.<dictcomp>c                       i | ]	\}}| |qS r@   r@   r   kr   r   r@   rA   r   J      c                    r   r@   r@   r   r   r@   rA   r   K  r   rL   )r)   rK   compute_lalrrQ   ri   r   _parse_tabler   r   keysr   rl   start_states
end_statesr(   from_ParseTabledictzipstates_to_rulesetsPartialParserrL   rW   )
rP   ri   r   strictanalysisrQ   enum
new_statesstransitionsr@   )r   r   rA   rO   '  sH   

zPartialLALRParser.__init__c                 C   s,   |  | }t|||_t|j|||_|S rM   )__new__r*   deserializer   r   rW   )clsdatamemorQ   r   instr@   r@   rA   r   [  s   
zPartialLALRParser.deserializeNFFru   )r9   r:   r;   rO   classmethodr   r@   r@   r@   rA   r   &  s    
4r   c                       sh   e Zd ZdZ			d fdd	Zd fdd	Zddd	Zd
d Zdd Zdd Z	dd Z
dd Z  ZS )PartialParserStaterL   NFc                    s   t  j||||d || _d S )N)state_stackvalue_stackrN   rO   rL   )rP   
parse_confre   r   r   rL   rR   r@   rA   rO   f  s   
zPartialParserState.__init__c           
   	      s  |j dkrl| jd }t| jj| }d}|jjD ]:}|j|jvrPt	
|jd|}t| j}zz| j||d d}W W || _ n tyK   Y W || _qw || _w d}q|sjdd | jj|  D }	t||	| d d	d S | jryt j||d d S | j||d d S )
NpartialF rr   Tc                 S      h | ]}|  r|qS r@   isupperr   r   r@   r@   rA   	<setcomp>  s    z0PartialParserState.feed_token.<locals>.<setcomp>rx   interactive_parser)rl   r   r   re   r   valuerE   r6   ignore_typesr   new_borrow_posr   feed_token_no_stackr"   r   r   r   rL   rN   
feed_token)
rP   tokenrs   current_statecurrent_lexerr7   terminal_info
test_tokenstackexpectedrR   r@   rA   r   s  s@   




zPartialParserState.feed_tokenc              	   C   s   | j }| jj}| jj}	 |d }z|| |j \}}W n ty5   dd ||  D }	t||	| ddw ||ks<J |tu rK|rDJ |	| dS |}
t
|
j}|rZ|| d= ||d  |
jj \}}|tu slJ |	| |r{|d |kr{dS q)a  
        This is a copy of `ParserState.feed_token` with all the value stack
        steps removed.  Since we're not exactly parsing in order to obtain a
        CST or anything similar, we can avoid the growing expense of tracking
        the parse tree.
        Tr   c                 S   r   r@   r   r   r@   r@   rA   r         z9PartialParserState.feed_token_no_stack.<locals>.<setcomp>Nr   )r   r   r   	end_staterl   KeyErrorr   r"   r+   appendlen	expansionoriginr   )rP   r   rs   r   r   r   rx   actionargr   rulesize_action	new_stater@   r@   rA   r     s8   


z&PartialParserState.feed_token_no_stackc                 C   s   | j jj}|d u r| j ddddd}ntdd|}|d u p.|jdkp.tdd |jj	D }|r:| j
|dd	 d S t|g | d d
)Nz$ENDr   r   r   r   c                 s   r   rM   )r8   )r   tir@   r@   rA   r     r   z.PartialParserState.feed_eof.<locals>.<genexpr>Trr   r   )re   rx   
last_token_Tokenr   r   rl   anyr   rE   r   r"   )rP   r  	eof_tokennew_token_is_legalr@   r@   rA   feed_eof  s   
zPartialParserState.feed_eofc                 C   s   | j jj| j S rM   )r   r   r   positionrn   r@   r@   rA   choices     zPartialParserState.choicesc              	   C   st   t  }t| j}i |_|  D ](}| r7t| }||_z||j|d W n	 t	y1   Y qw |
| q|S )z
        Adapted from https://github.com/lark-parser/lark/blob/be542c2ff6d968817df019b8bf03f37b3111c08c/lark/parsers/lalr_interactive_parser.py#L95
        Returns the set of possible tokens that will advance the parser into a new valid state.
        r   )r   r   r   rQ   r  r   r   re   r  r"   r   )rP   acceptsconf_no_callbackstr  r@   r@   rA   r    s   

zPartialParserState.acceptsc                 C   s,   t | | jt| jt| jt| j| jdS )Nr   )rl   r   r   re   r   r   r   rL   rn   r@   r@   rA   ry     s   zPartialParserState.__copy__c                 C   rz   )Nr{   z, state_stack=r|   )rl   r9   re   r   rn   r@   r@   rA   ro     r}   zPartialParserState.__repr__NNFru   )r9   r:   r;   	__slots__rO   r   r   r  r  r  ry   ro   rU   r@   r@   rR   rA   r   c  s    
.*	r   c                       s4   e Zd Zd	 fdd	Z	d
ddZdddZ  ZS )r   Fc                    s   t  j|||d || _d S )Nr   r   )rP   r   rQ   r   rL   rR   r@   rA   rO     s   
zPartialParser.__init__Nc                 C   sB   t | j| j|}t|t|||| j}|rt| ||jS | |S rM   )	r.   r   rQ   r   r   rL   r,   re   rt   )rP   re   rI   r   r   start_interactiver   parser_stater@   r@   rA   parse  s   
zPartialParser.parsec                 C   s   z |}|j |D ]}|| q	|r|r|jdkr|  |W S  tyB } zzt| ||j |_W | ty=   Y |w d }~w t	ym   | j
rltd td td t|jD ]\}}td| | q\td  w )Nr   r   zSTATE STACK DUMPz----------------z%d))re   lexr   rl   r  r   r,   r   	NameError	Exceptionr   print	enumerater   )rP   rx   r  rs   r   eir   r@   r@   rA   rt     s4   zPartialParser.parse_from_stater   r  )NF)r9   r:   r;   rO   r  rt   rU   r@   r@   rR   rA   r     s
    
r   c                   @   sh   e Zd Zeedd ZdddZdeeedf eedf f fdd	Z	dde
eedf  fddZd
S )PartialScannerc                 C   s2   |j  }t|}t|  \}}||jfS rM   )pattern	to_regexpinteregularparse_patternr3   to_fsmreduceprefix_postfix)r   terminal	regex_strr&  r   _r@   r@   rA   construct_terminal_fsm0  s   


z%PartialScanner.construct_terminal_fsmFc           
      C   sx   || _ || _|| _|| _dd | j D | _d | _g }| j D ]}| |\}}	|	dks,J || qt|\| _	| _
d S )Nc                 S   s   h | ]}|j qS r@   r   )r   r  r@   r@   rA   r   ?      z*PartialScanner.__init__.<locals>.<setcomp>)r   r   )r   g_regex_flags	use_bytesmatch_wholeallowed_types_mresr0  r  	fsm_unionr   r   )
rP   r   r2  re_r3  r4  fsmsr  r   r,  r@   r@   rA   rO   :  s   
zPartialScanner.__init__r^   .c           
      C   sb   d}d}t t|| jD ] \}\}}}| j| j}t||||}	||	f7 }|r,||	f7 }q||fS )z<Get the possible terminal symbols for an FSM state sequence.r@   )r"  get_sub_fsms_from_seqr   r   r   r4   )
rP   rC   rE   rF   r$  fsm_idfsm_reads_morein_finalr6   infor@   r@   rA   get_terminals_infoM  s   


z!PartialScanner.get_terminals_infoNlast_fsm_state_seqc           
      C   s   |}|rt |dksJ |t |d 7 }|d }n| jj}||d }t| jjj| jjj|}t| j||| jd}|s=dS |rG|t	| }	|	S |ft	| }	|	S )zYDetermine an FSM match over `text` starting at `pos` and continuing `last_fsm_state_seq`.r   r   N)
full_match)
r  r   initialr2   fsm_infoalphabet_symbol_mappingalphabet_anything_valuewalk_fsmr4  r   )
rP   textposr@  	start_posstart_state	text_parttext_transitions	state_seqresr@   r@   rA   match^  s2   
zPartialScanner.matchru   rM   )r9   r:   r;   r   r   r0  rO   r   r4   r?  r   r<   rO  r@   r@   r@   rA   r%  /  s    

 r%  c                   @   s2   e Zd ZddddZdededee fd	d
ZdS )r   r@   confr   c              	      s   t |j}|j t|}||_i }i | _| D ]E\}}t|}	z||	 }
W n1 tyW   t|t|j	B t|B }t|} fdd|D |_|jsMY qt
|}
|
||	< Y nw |
| j|< q|j|u seJ t
|| _d S )Nc                    s   g | ]
}| v r | qS r@   r@   )r   nterminals_by_namer@   rA   r     s    z3PartialContextualLexer.__init__.<locals>.<listcomp>)listr   rS  r   r   r   r   r  r   ra   r_   
root_lexer)rP   rP  r   always_acceptr   	trad_conflexer_by_symbolsrx   r  r   re   r`   r@   rR  rA   rO     s0   

zPartialContextualLexer.__init__lexer_stater  r^   c                 c   s    z	 | j |j }|||}|V  q ty   Y d S  tyH   t|j|jjkrEt	|j|jj|jj
|jjd|jo>|jg|| jjdY d S w )NTFallowedtoken_historyrx   rS  )r   r  
next_tokenEOFErrorr  r  rG  line_ctrchar_posr!   linecolumnr  rU  r   )rP   rY  r  re   next_tokr@   r@   rA   r    s.   zPartialContextualLexer.lexN)r@   rP  r   )	r9   r:   r;   rO   r   r   r
   r   r  r@   r@   r@   rA   r     s    r   c                       sH   e Zd Zd fddZdd Zddd	Zdd
ededefddZ	  Z
S )r_   rP  r   c                    s   t  | |   d S rM   )rN   rO   _build_scanner)rP   rP  rR   r@   rA   rO     s   zPartialBasicLexer.__init__c                 C   s   t | j| j| j| j\}| _| jrJ | j D ]#\}}| j| }|j	jD ]}| j
| | j|}| j|| q%qt| j| j| j| j| _d S rM   )r#   r   r2  rer3  callbackuser_callbacksr   rS  r   removeindexinsertr%  _scanner)rP   r   r6   rg  r-  sub_terminalidxr@   r@   rA   re    s   



z PartialBasicLexer._build_scannerNc                 C   s   | j |||S rM   )r   rO  )rP   rG  rH  r@  r@   r@   rA   rO    r  zPartialBasicLexer.match	lex_stater  r^   c              
   C   sl  |j }d }|r|jdkr|jj}|j}|j|rt|d nd }|t|jk r2| |j|j|}|sm|r>|d | j	j
jvr`| j	j| j }|sJdh}t|j|j|j|j||j oZ|j g|| jd|jj}	|jj}
|jj}n
|}	| j	|	\}
}|r}|d n|
d }|j p|jpt|
dk}|j}|t|	 d }|t|jkr|rd}t|	||
|}d}n|j}|j||  }}t| jtsJ || jvrt|||j|j|j}|||| jv  |j|_|j|_ |j|_!|j| jv r| j|j |}t|tst"d| ||_ |S || jv r t|||j|j|j}| j| | |||| jv  d }|t|jk s(t#| )	Nr   r   r   r   z<END-OF-FILE>rZ  r   z+Callbacks must return a token (returned %r))$r  rl   r   rC   r_  r`  r  rG  rO  r   r   finalsr5  r   r!   ra  rb  rS  rE   rF   r?  r8   r7   rB   r6   rg   rg  r   r   feednewline_typesend_line
end_columnend_posr   r^  )rP   ro  r  r  r@  r_  ru  rN  r[  rC   rE   rF   priority_terminal_inforD   rI  	type_nametoken_valuer   r  t2r@   r@   rA   r]    s   



czPartialBasicLexer.next_tokenrd  rM   )r9   r:   r;   rO   re  rO  r   r   r   r]  rU   r@   r@   rR   rA   r_     s
    
# r_   c                   @   s8   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d ZdS )PartialIndenterzJAn `Indenter` that doesn't reset its state every time `process` is called.c                 C   s
   |  |S rM   )_process)rP   streamr@   r@   rA   processY  s   
zPartialIndenter.processc                 c   s~    |D ]9}|j | jv r|  jd7  _n|j | jv r*|  jd8  _| jdk r*t|g |j | jkr9| |E d H  q|V  qd S )Nr   r   )rl   OPEN_PAREN_typesparen_levelCLOSE_PAREN_typesr"   NL_type	handle_NL)rP   r|  r   r@   r@   rA   r{  \  s   

zPartialIndenter._processc                 C   s    || j v r| jd dk rdS dS )Nr   r   FT)r  r  )rP   
token_typer@   r@   rA   accepts_token_typeq  s   z"PartialIndenter.accepts_token_typec                 C   s"   t |  }| j|_t| j|_|S rM   )rl   r  r   indent_level)rP   rN  r@   r@   rA   ry   |  s   
zPartialIndenter.__copy__c                 C   rz   )Nz(paren_level=z, indent_level=r|   )rl   r9   r  r  rn   r@   r@   rA   ro     r}   zPartialIndenter.__repr__N)	r9   r:   r;   __doc__r}  r{  r  ry   ro   r@   r@   r@   rA   rz  V  s    rz  c                   @   s,   e Zd ZdZg dZg dZdZdZdZdS )PartialPythonIndenter_NEWLINE)LPARLSQBLBRACE)RPARRSQBRBRACE_INDENT_DEDENT   N)	r9   r:   r;   r  r~  r  INDENT_typeDEDENT_typetab_lenr@   r@   r@   rA   r    s    r  r   c                 C   s   t | jtr	| jS | jjS rM   )rg   re   r   r   r@   r@   rA   r     s   r   lpr^   c              	   C   s^   i }| j D ]'}t|j }zt|  \}}W n ty&   d}Y nw |||j	< q|S )zRConstruct a ``dict`` mapping terminal symbol names to their finite state machines.N)
r   r(  r)  r&  r'  r3   r*  r+  r   r   )r  symbol_names_and_fsmsr-  r&  r   r/  r@   r@   rA   terminals_to_fsms  s   
r  r9  c              	      s  t jdd | D  \}tt|  dd  D }dtf fdd}|g}t }i }i }d}|t|k r|| tfd	d
 D rI|| i ||< |j	D ]w}	z||	}
W n	 t
yb   Y qPw z||
}W n ty|   t|}||
 Y nw ||| |	< |
 D ]?\}}||t t i f\}}}| }|}||t | ||t | |||f || | jv r|| qqP|d7 }|t|k s5t|tt|d||dd}t|\}fddt| dd dD }||fS )a  Construct an FSM representing the union of the FSMs in `fsms`.

    This is an updated version of `interegular.fsm.FSM.union` made to return an
    extra map of component FSMs to the sets of state transitions that
    correspond to them in the new FSM.

    c                 S   s   g | ]}|j qS r@   )alphabet)r   r   r@   r@   rA   r     r1  zfsm_union.<locals>.<listcomp>c                 S   s   i | ]\}}||j qS r@   )rB  )r   r$  r   r@   r@   rA   r     r   zfsm_union.<locals>.<dictcomp>new_transitionc                    sh   i } D ])\}}| | }|| v r-| | |j v r-||j | |  v r-|j | |  | ||< q|s2t|S rM   )mapr   )r   r  nextr$  fold_transition)indexed_fsms
new_to_oldr@   rA   follow  s   zfsm_union.<locals>.followr   c                 3   s&    | ]\}}  |d |jv V  qdS )r   N)r   rp  )r   jr   )rx   r@   rA   r     s   $ zfsm_union.<locals>.<genexpr>r   T)r  r   rB  rp  r  __no_validation__c                    sP   i | ]$\}\}}}| fd d|D  fdd|D  fdd|  D fqS )c                    s    h | ]\}} |  | fqS r@   r@   )r   s1s2old_to_new_statesr@   rA   r   	  s     z'fsm_union.<locals>.<dictcomp>.<setcomp>c                       h | ]} | qS r@   r@   r   r  r@   rA   r   
  r   c                    s$   i | ]\}}| fd d|D qS )c                    r  r@   r@   )r   r  r  r@   rA   r     r   z2fsm_union.<locals>.<dictcomp>.<dictcomp>.<setcomp>r@   )r   	old_stater   r  r@   rA   r     s    z(fsm_union.<locals>.<dictcomp>.<dictcomp>)r   )r   r;  r   rp  r   r  r@   rA   r     s    	
c                 S   s   | d S )Nr   r@   r   r@   r@   rA   r     r   zfsm_union.<locals>.<lambda>r   )r   unionr   r"  r<   r   r  r  r   by_transitionr   rj  
ValueErrorr  r   r   rp  r   ranger3   r   )r9  r  rB  r  r   rp  r  r   r$  
transitionr  r  r;  	fsm_statefsm_transitions
fsm_finalsfsm_old_to_newold_fromold_tor   _fsms_to_trans_finalsr@   )r  r  r  rx   rA   r7    s|   


*
	
	r7  rM  r   c                 #   sL    t tdd dd d   fdd| D E dH  dS )a  Get the indices of the sub-FSMs in `fsm` that could have matched the state sequence `state_seq`.

    Parameters
    ----------
    state_seq
        A state sequence.
    fsms_to_trans_finals
        A map from FSM indices to tuples containing sets of their state transitions
        and sets of the final/accept states.

    Returns
    -------
    A generator returning tuples containing each sub-FSM index (in the order
    they were union-ed to construct `fsm`) and booleans indicating whether or
    not there is another valid transition from the last state in the sequence
    for the associated sub-FSM (i.e. if the FSM can continue
    accepting/matching) and whether or not the sequence ends in a final state
    of the sub-FSM.
    Nr   r   c                 3   sH    | ]\}\}}} |r|t fd d|D d |v fV  qdS )c                 3   s    | ]	\}} |kV  qd S rM   r@   )r   from_sto_s)last_fsm_stater@   rA   r   ;  s    z2get_sub_fsms_from_seq.<locals>.<genexpr>.<genexpr>r   N)issubsetr  )r   fsm_idxr   rp  r/  r  rM  state_seq_transitionsr@   rA   r   6  s    	

z(get_sub_fsms_from_seq.<locals>.<genexpr>)r   r   r   )rM  r   r@   r  rA   r:    s   	r:  Tr   token_transition_keysrJ  rA  c                 C   s   | j }|}g }d}| j}t|D ]0\}	}
|||
f}|d u r1|s-|dkr-|d |   S g   S |}||v r;|	d }|| q|rK|d |	krKg S |S )Nr   r   )rp  flat_transition_mapr"  r   r  )r   r  rJ  rA  r  rx   accepted_stateslast_final_idxr  r$  	trans_keyr  r@   r@   rA   rF  D  s$   rF  )T)[r   r   dataclassesr   	functoolsr   typingr   r   r   r	   r
   r   r   r   r   r   r   r(  interegular.fsmr   r   r   interegular.patternsr   larkr   r   lark.commonr   r   lark.exceptionsr   r   lark.indenterr   
lark.lexerr   r   r   r   r    r!   r"   r#   lark.parser_frontendsr$   r%   r&   lark.parsers.lalr_analysisr'   r(   r)   r*   r+   $lark.parsers.lalr_interactive_parserr,   lark.parsers.lalr_parserr-   r.   r/   r0   outlines_core.fsm.regexr1   r2   r3   r>   r<   rq   ParseStateTyper4   rB   rG   rV   rw   r   rc   r   r   r   r%  r   r_   rz  r  r   r  r7  r?   r:  rF  r@   r@   r@   rA   <module>   s    4(
?
{= !,U4 0	8
t,
-