o
    qi.e                     @   s  d Z ddlmZmZ ddlmZ ddlmZmZ ddl	m
Z
 ddlmZmZmZmZmZ ddlmZmZmZmZ ddlmZmZmZ g d	ZG d
d deZG dd deZG dd deZejej ej!dZ"de#defddZ$dededefddZ%eddG dd deZ&G dd de&eZ'eddG d d! d!e'Z(d"e(fd#d$Z)eddG d%d& d&e'Z*eddG d'd( d(e&Z+e* Z,e+ Z-e(e.d)d*Z/e(e.d)dZ0e(e.d+d*e(e.d+de(e.d,d*e(e.d,de(e.d-d*e(e.d-de(e.d.d*e(e.d/d*e(e.d0d*e(e.d1d*e(e.d2d*e(e.d3d*e(e.d4d*d5Z1eddG d6d7 d7e&Z2e2e0dd8Z3eddG d9d: d:Z4eddG d;d< d<e&Z5eddG d=d> d>e'Z6G d?d@ d@ee6 Z7dAe#de6fdBdCZ8d8S )Dz
Allows the parsing of python-style regexes to FSMs.
Main access point is `parse_pattern(str) -> Pattern`.
Most other classes are internal and should not be used.
    )abstractmethodABC)	dataclass)Flagauto)indent)Iterable	FrozenSetOptionalTupleUnion)FSManything_elseepsilonAlphabet)SimpleParsernomatchNoMatch)parse_patternPatternUnsupportedInvalidSyntaxREFlagsc                   @      e Zd ZdS )r   N__name__
__module____qualname__ r   r   H/home/ubuntu/.local/lib/python3.10/site-packages/interegular/patterns.pyr          r   c                   @   r   )r   Nr   r   r   r   r   r      r    r   c                   @   s*   e Zd Ze  ZZe  ZZe  ZZ	dS )r   N)
r   r   r   r   CASE_INSENSITIVEI	MULTILINEMSINGLE_LINESr   r   r   r   r      s    

r   )imsplusreturnc              	   C   sD   t d}| D ]}z|t| O }W q ty   td| dw |S )Nr   zFlag  is not implemented)r   _flagsKeyErrorr   )r*   rescr   r   r   
_get_flags(   s   r1   baseaddedremovedc                 C   s   | |O } | | M } | S Nr   )r2   r3   r4   r   r   r   _combine_flags2   s   
r6   T)frozenc                       s   e Zd ZdZeddefddZededefddZ	dedef fd	d
Z
edeeee f fddZedeeee f f fddZedeeee f fddZedeeee f f fddZedddZ  ZS )_BasePattern)_alphabet_cache_prefix_cache_lengths_cacheNr+   c                 C      t r5   NotImplementedErrorselfalphabetprefix_postfixflagsr   r   r   to_fsm=      z_BasePattern.to_fsmrC   c                 C   r<   r5   r=   r@   rC   r   r   r   _get_alphabetA   rE   z_BasePattern._get_alphabetc                    s@   t | dstt| di  || jvr| || j|< | j| S )Nr9   )hasattrsuperr8   __setattr__r9   rG   rF   	__class__r   r   get_alphabetE   s
   


z_BasePattern.get_alphabetc                 C   r<   r5   r=   r@   r   r   r   _get_prefix_postfixL   rE   z _BasePattern._get_prefix_postfixc                    &   t | dstt| d|   | jS )zWReturns the number of dots that have to be pre-/postfixed to support look(aheads|backs)r:   )rH   rI   r8   rJ   rO   r:   rN   rK   r   r   rB   P   s   
z_BasePattern.prefix_postfixc                 C   r<   r5   r=   rN   r   r   r   _get_lengthsW   rE   z_BasePattern._get_lengthsc                    rP   )zuReturns the minimum and maximum length that this pattern can match
         (maximum can be None bei infinite length)r;   )rH   rI   r8   rJ   rQ   r;   rN   rK   r   r   lengths[   s   
z_BasePattern.lengthsc                 C   r<   r5   r=   rN   r   r   r   simplifyc   rE   z_BasePattern.simplify)NNN)r+   r8   )r   r   r   	__slots__r   r   rD   r   r   rG   rM   r   intr
   rO   propertyrB   rQ   rR   rS   __classcell__r   r   rK   r   r8   9   s"      r8   c                   @   r   )_RepeatableNr   r   r   r   r   rX   h   r    rX   c                   @   s   e Zd ZU dZee ed< eed< dZde	de
fddZdeeee f fd	d
Zdeeee f fddZdde	dfdefddZdddZdS )
_CharGroupzqRepresents the smallest possible pattern that can be matched: A single char.
    Direct port from the lego modulecharsnegated)rZ   r[   rC   r+   c                 C   s@   |t j@ rh ttj| jttj| j}n| j}t|t	hS r5   )
r   r!   mapstrlowerrZ   upperr   from_groupsr   )r@   rC   relevantr   r   r   rG   t   s   
"z_CharGroup._get_alphabetc                 C      dS Nr   r   r   rN   r   r   r   rO   {      z_CharGroup._get_prefix_postfixc                 C   rb   N)   rg   r   rN   r   r   r   rQ   ~   re   z_CharGroup._get_lengthsNr   c                    s    d u r	|  | |d u r| j}|dkrtd|tj@ }|tj M }|tj M }|r/t||rFth dd | jD dd | jD }n| j}| j	r\d fddt
 | D i}nd fd	d|D i}t dd
hdd
h|dS )Nrd   .Can not have prefix/postfix on CharGroup-levelc                 s       | ]}|  V  qd S r5   )r^   .0r0   r   r   r   	<genexpr>       z$_CharGroup.to_fsm.<locals>.<genexpr>c                 s   ri   r5   )r_   rj   r   r   r   rl      rm   r   c                       i | ]} | d qS rg   r   rk   symbolrA   r   r   
<dictcomp>       z%_CharGroup.to_fsm.<locals>.<dictcomp>c                    rn   ro   r   rp   rr   r   r   rs      rt   rg   rA   statesinitialfinalsr\   )rM   rB   
ValueErrorr   r!   r%   r   	frozensetrZ   r[   setr   )r@   rA   rB   rC   insensitiverZ   mappingr   rr   r   rD      s4   

*z_CharGroup.to_fsmc                 C      | S r5   r   rN   r   r   r   rS      re   z_CharGroup.simplify)r+   rY   )r   r   r   __doc__r	   r]   __annotations__boolrT   r   r   rG   r   rU   r
   rO   rQ   r   rD   rS   r   r   r   r   rY   l   s   
 'rY   groupsc                 G   sV   t  jdd |D  }t  jdd |D  }|r"tt|| |  S tt|| | S )Nc                 s   s    | ]	}|j s|jV  qd S r5   r[   rZ   rk   gr   r   r   rl          z'_combine_char_groups.<locals>.<genexpr>c                 s   s    | ]	}|j r|jV  qd S r5   r   r   r   r   r   rl      r   )r{   unionrY   rz   )negater   posnegr   r   r   _combine_char_groups   s
   r   c                   @   v   e Zd ZddedfdefddZdedefddZdee	e
e	 f fd	d
Zdee	e
e	 f fddZdddZdS )__DotClsNr   r+   c              	      sb    d u r	|  | |d u s|tj@ st dh }n }t ddhddhd fdd|D idS )N
r   rg   c                    rn   ro   r   )rk   symrr   r   r   rs      rt   z#__DotCls.to_fsm.<locals>.<dictcomp>ru   )rM   r   r%   r{   r   )r@   rA   rB   rC   symbolsr   rr   r   rD      s   
z__DotCls.to_fsmrC   c                 C   s&   |t j@ rtthS tthdhS )Nr   )r   r%   r   r`   r   rF   r   r   r   rG      s   
z__DotCls._get_alphabetc                 C   rb   rc   r   rN   r   r   r   rO      re   z__DotCls._get_prefix_postfixc                 C   rb   rf   r   rN   r   r   r   rQ      re   z__DotCls._get_lengthsc                 C   r~   r5   r   rN   r   r   r   rS      re   z__DotCls.simplify)r+   r   r   r   r   r   r   rD   r   rG   r   rU   r
   rO   rQ   rS   r   r   r   r   r      s    r   c                   @   r   )
__EmptyClsNr   r+   c                 C   s   |d u r	|  |}t|S r5   )rM   r   r?   r   r   r   rD      s   
z__EmptyCls.to_fsmrC   c                 C   s   t thS r5   )r   r`   r   rF   r   r   r   rG         z__EmptyCls._get_alphabetc                 C   rb   rc   r   rN   r   r   r   rO      re   z__EmptyCls._get_prefix_postfixc                 C   rb   rc   r   rN   r   r   r   rQ      re   z__EmptyCls._get_lengthsc                 C   r~   r5   r   rN   r   r   r   rS      re   z__EmptyCls.simplify)r+   r   r   r   r   r   r   r      s    r    F?abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_
0123456789z 	
r   	)wWdDr)   r&   abfnrtvc                   @   s   e Zd ZU dZeed< eed< ee ed< dd Zde	de
fd	d
Zdeeee f fddZdeeee f fddZdde	dfdefddZdddZdS )	_RepeatedzxRepresents a repeated pattern. `base` can be matched from `min` to `max` times.
    `max` may be None to signal infiniter2   minmaxc                 C   s4   d| j  d| jd ur| jnd dtt| jd S )Nz	Repeated[:r   z]:
z    )r   r   r   r]   r2   rN   r   r   r   __str__
  s    z_Repeated.__str__rC   r+   c                 C      | j |S r5   )r2   rM   rF   r   r   r   rG     r   z_Repeated._get_alphabetc                 C   s   | j jS r5   )r2   rB   rN   r   r   r   rO     s   z_Repeated._get_prefix_postfixc                 C   s4   | j j\}}|| j d || jfvr|| j fS d fS r5   )r2   rR   r   r   )r@   lhr   r   r   rQ     s   (z_Repeated._get_lengthsNr   c                 C   s   |d u r	|  |}|d u r| j}|dkrtd| jj|d|d}|| j }| jd u r3| }|| S | }|j	d  |j
hO  < || j| j 9 }|| S )Nrd   rh   rC   rx   )rM   rB   ry   r2   rD   r   r   starcopy__dict__rw   )r@   rA   rB   rC   unit	mandatoryoptionalr   r   r   rD     s   


z_Repeated.to_fsmc                 C      |  | j | j| jS r5   )rL   r2   rS   r   r   rN   r   r   r   rS   *     z_Repeated.simplify)r+   r   )r   r   r   r   rX   r   rU   r
   r   r   r   rG   r   rO   rQ   r   rD   rS   r   r   r   r   r     s   
 r   Nc                   @   sJ   e Zd ZU dZeed< eed< eed< dZdede	fdd	Z
dd
dZdS )_NonCapturingzRepresents a lookahead/lookback. Matches `inner` without 'consuming' anything. Can be negated.
    Only valid inside a `_Concatenation`inner	backwardsr   )r   r   r   rC   r+   c                 C   r   r5   )r   rM   rF   r   r   r   rM   :  r   z_NonCapturing.get_alphabetc                 C   r   r5   )rL   r   rS   r   r   rN   r   r   r   rS   =  r   z_NonCapturing.simplifyN)r+   r   )r   r   r   r   r8   r   r   rT   r   r   rM   rS   r   r   r   r   r   1  s   
 r   c                   @   s   e Zd ZU dZeeeef df ed< dZ	dd Z
dedefd	d
Zdeeee f fddZdeeee f fddZddedfdefddZdddZdS )_ConcatenationzXRepresents multiple Patterns that have to be match in a row. Can contain `_NonCapturing`.parts)r   c                 C      dd dd | jD  S )NzConcatenation:
r   c                 s       | ]
}t t|d V  qdS z  Nr   r]   rk   pr   r   r   rl   H      z)_Concatenation.__str__.<locals>.<genexpr>)joinr   rN   r   r   r   r   G     z_Concatenation.__str__rC   r+   c                    s   t j fdd| jD  d S )Nc                 3       | ]}|  V  qd S r5   rM   r   r   r   r   rl   K      z/_Concatenation._get_alphabet.<locals>.<genexpr>r   )r   r   r   rF   r   r   r   rG   J  s   z_Concatenation._get_alphabetc                 C   s   d}d}| j D ]/}t|ts||jd 7 }q|jr6|jj\}}||kr,td||f || }||kr6|}qd}d}t| j D ]+}t|tsO||jd 7 }q@|jsk|jj\}}|d u ra|| }n|| }||krk|}q@||fS )Nr   z$lookbacks have to have fixed length )r   
isinstancer   rR   r   r   r   reversed)r@   preoffr   r   r   reqpostr   r   r   rO   M  s6   



z"_Concatenation._get_prefix_postfixc                 C   sP   d\}}| j D ]}t|ts#|j\}}||7 }d ||fvr!|| nd }q||fS rc   )r   r   r   rR   )r@   lowhighr   plphr   r   r   rQ   i  s   


z_Concatenation._get_lengthsNr   c                 C   sf  |d u r	|  |}|d u r| j}|d | jd k s"|d | jd k r&tdt|}| }g }||d g}| jD ]1}t|t	rc|j
|d|}	|jrRtd|d |f |||	f g }q<|||d| q<|||d  tj| }
t|D ]/\}}|d u rtjg ||
R  }
qt|t	r|jrJ |jr|
|| }
q|
|| }
q|
S )Nr   rg   zHGroup can not have lookbacks/lookaheads that go beyond the group bounds.rd   zlookbacks are not implemented)rM   rB   r   _ALLrD   r   timesr   r   r   r   r   appendr   concatenater   r   
differenceintersection)r@   rA   rB   rC   all_all_star	fsm_partscurrentpartr   resultr(   r   r   r   r   rD   r  s:   
$



z_Concatenation.to_fsmc                 C   s   |  tdd | jD S )Nc                 s   ri   r5   rS   r   r   r   r   rl     rm   z*_Concatenation.simplify.<locals>.<genexpr>)rL   tupler   rN   r   r   r   rS     r   z_Concatenation.simplify)r+   r   )r   r   r   r   r   r   r8   r   r   rT   r   r   r   rG   rU   r
   rO   rQ   r   rD   rS   r   r   r   r   r   A  s   
 	(r   c                   @   s   e Zd ZU eedf ed< edZeed< edZeed< dd Z	ded	e
fd
dZd	eeee f fddZd	eeee f fddZddedfd	efddZedfdeded	d fddZdddZdS )r   .optionsr   added_flagsremoved_flagsc                 C   r   )Nz	Pattern:
r   c                 s   r   r   r   rk   or   r   r   rl     r   z"Pattern.__str__.<locals>.<genexpr>)r   r   rN   r   r   r   r     r   zPattern.__str__rC   r+   c                    s.   t  | j| j tj fdd| jD  d S )Nc                 3   r   r5   r   r   r   r   r   rl     r   z(Pattern._get_alphabet.<locals>.<genexpr>r   )r6   r   r   r   r   r   rF   r   r   r   rG     s   zPattern._get_alphabetc                 C   sV   d\}}| j D ]}|j\}}|d u s||k r|}|d u s$|d ur&||kr&|}q||fS )N)Nr   )r   rR   )r@   r   r   r   olohr   r   r   rQ     s   

zPattern._get_lengthsc                 C   sN   d\}}| j D ]}|j\}}||kr|}|d u s |d ur"||kr"|}q||fS rc   )r   rB   )r@   r   r   r   opreopostr   r   r   rO     s   

zPattern._get_prefix_postfixNc                    sN   t | j| j d u r|  d u r| jtj fdd| jD  S )Nc                 3   s    | ]
}|  V  qd S r5   )rD   r   rA   rC   rB   r   r   rl     r   z!Pattern.to_fsm.<locals>.<genexpr>)r6   r   r   rM   rB   r   r   r   r?   r   r   r   rD     s   
zPattern.to_fsmr3   r4   c                 C   s   |  | j||S r5   )rL   r   )r@   r3   r4   r   r   r   
with_flags  s   zPattern.with_flagsc                 C   s   t | jdkr<| jd }t|tr<t |jdkr<t|jd tr<|jd  }tttd| j	| j
|j	|j
}||S | tdd | jD | j	| j
S )Nrg   r   c                 s   ri   r5   r   r   r   r   r   rl     rm   z#Pattern.simplify.<locals>.<genexpr>)lenr   r   r   r   r   rS   r6   r   r   r   r   rL   r   )r@   r   r   r   r   r   r   rS     s   
(
"zPattern.simplify)r+   r   )r   r   r   r   r8   r   r   r   r   r   r   rG   rU   r
   rQ   rO   r   rD   r   rS   r   r   r   r   r     s   
 

r   c                       s   e Zd ZU eh dZee ed< eddhZee ed< eh dZ	ee ed< def fd	d
Z
 fddZdd Zdd Zdd Zdd Zdd Zdd Zdd ZdefddZdefdd Zd(d"d#Zd$d% Zdefd&d'Z  ZS ))_ParsePattern>   *.$()+?[\^|SPECIAL_CHARS_STANDARDr   ]SPECIAL_CHARS_INNER>   ABUZr   uRESERVED_ESCAPESdatac                    s   t t| | d | _d S r5   )rI   r   __init__rC   )r@   r  rK   r   r   r    s   
z_ParsePattern.__init__c                    s$   zt t|  W S  ty   tw r5   )rI   r   parser   r   rN   rK   r   r   r    s
   z_ParsePattern.parsec                 C   s(   d | _ |  }| j d ur|| j }|S r5   )rC   patternr   r@   r   r   r   r   start  s
   
z_ParsePattern.startc                 C   s8   |   g}| dr||    | ds
tt|S )Nr   )concstatic_br   r   r   )r@   r   r   r   r   r    s
   


z_ParsePattern.patternc                 C   s:   g }	 z	| |   W n	 ty   Y nw qtt|S r5   )r   objr   r   r   )r@   r   r   r   r   r	    s   z_ParsePattern.concc                 C   s    |  dr	|  S | |  S )Nr   )r
  group
repetitionatomrN   r   r   r   r     s   
z_ParsePattern.objc                 C   s.   |  dr	|  S |  }| d | |S )Nr   r   )r
  extension_groupr  staticr  r  r   r   r   r    s
   


z_ParsePattern.groupc                 C   s>  |   }|dv rT|  jd8  _| ddd }| dr#| ddd }nd}| drB|  }|t|t|}| d | |S |dkrHt	| d t|| _
tS |dkrf|  }| d | |S |d	kr| d
r| ddd  | d |  }| d | |S | drtdd S |dkr| ds|    | drd S d S |dkr|  }| d t|ddS |dkr|  }| d t|ddS |d
kr|   }|dkr|  }| d t|ddS |dkr|  }| d t|ddS d S |dkr
tdtd|d| j| jd | jd  )NzaiLmsux-rg   aiLmsuxr   -r   r   r   P<r   >=$Group references are not implemented#F!Tr   z'Conditional matching is not implementedzUnknown group-extension: z (Context:       )anyindexmultipler
  r  r   r1   r  r  r   rC   _EMPTYr   r   r   r  )r@   r0   r   r   r   r   r   r   r    sv   



















$z_ParsePattern.extension_groupc                 C   s   |  dr| |  S |  dr| |  S |  dr"| tS |  dr+td|  dr4td| j| j }| tt	|hdS )	Nr   r   r   r   z'$'r   z'^'F)
r
  r  	chargroupescaped_DOTr   any_butr   rY   rz   )r@   r0   r   r   r   r  I  s   





z_ParsePattern.atomr2   c                 C   s   |  dr|  dr	 t|dd S |  dr"|  dr	 t|dd S |  dr3|  dr-	 t|ddS |  drtz|  }W n tyI   d}Y nw |  draz|  }W n ty`   d }Y nw |}| d |  drn	 t|||S |S )	Nr   r   r   r   rg   {,})r
  r   numberr   r  )r@   r2   r   r(   r   r   r   r  X  s<   









z_ParsePattern.repetitionr+   c                 C   s   t | ddd S )Nr   rg   )rU   r  rN   r   r   r   r'  x  s   z_ParsePattern.numberFc                 C   s  |  dr| ddd}tt|d}tt|hdS |  dr6| ddd}tt|d	}tt|hdS | d
ddddrCtd|s}z	| ddd}W n	 tyW   Y nw tt|d	}tt|hdS z| ddd W td ty|   Y n#w z	| ddd}W n	 ty   Y nw tt|d	}tt|hdS |sz| j	| j
 }W n	 ty   Y n	w td| dz
| j	t }W t| S  ty   Y nw | d}| rttt|dS )Nx0123456789abcdefABCDEF      F001234567rg      Nr   r  r  r   z2regex module unicode properties are not supported.r  r   r  zEscape \r,   4abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ)r
  r  chrrU   rY   rz   anyof_br   r   anyofr  _CHAR_GROUPSr#  isalpha)r@   r   r   r0   r   r   r   r!  {  s`   


z_ParsePattern.escapedc                 C   s   |  drd}nd}g }	 z	||   W n	 ty   Y nw q| d t|dkr;t|d }t|j||j	A S t|dkrHtt
i |S t|d|iS )Nr   TFr   rg   r   r   )r
  r   chargroup_innerr   r  r   r   rY   rZ   r[   rz   r   )r@   r   r   r   r   r   r   r     s$   

z_ParsePattern.chargroupc                 C   s  | j }| dr| d}ntt| j| j d}| dr| dr)| d}n| dr:t|ttddddS tt| j| j d}t	|j
dksSt	|j
dkr`td| j|| j   t|j
 t|j
 }}||kr|td| j|| j   ttd	d
 t||d D dS |S )Nr   TFr  r   )r   rg   zInvalid Character-range: c                 s   s    | ]}t |V  qd S r5   )r1  )rk   r'   r   r   r   rl     rm   z0_ParsePattern.chargroup_inner.<locals>.<genexpr>)r  r
  r!  rY   rz   r#  r   peek_staticr   r   rZ   r   r  ordrange)r@   r  r2   endr   r   r   r   r   r6    s"   



"z_ParsePattern.chargroup_inner)F)r   r   r   rz   r   r	   r]   r   r   r  r  r  r  r  r	  r  r  r  r  rX   r  rU   r'  r!  r   rY   r6  rW   r   r   rK   r   r     s(   
 	< 
3r   r  c                 C   s   t | }| }| }|S r5   )r   r  rS   )r  r   outr   r   r   r     s   r   )9r   abcr   r   dataclassesr   enumr   r   textwrapr   typingr   r	   r
   r   r   interegular.fsmr   r   r   r   interegular.utils.simple_parserr   r   r   __all__	Exceptionr   r   r   r"   r$   r&   r-   r]   r1   r6   r8   rX   rY   r   r   r   r"  r  rz   _NONEr   r4  r   	_ALL_STARr   r   r   r   r   r   r   r   r   <module>   sv    
.?	!+\6  