o
    i0y                     @   sL   d Z ddlZddlZed ddlmZ ddlmZ G dd dejZ	dS )z
Unit tests for nltk.tgrep.
    N	pyparsing)tgrep)ParentedTreec                   @   s   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zd d! Zd"d# Zd$d% Zd&d' Zd(d) Zd*d+ Zd,d- Zd.d/ Zd0d1 Zd2d3 Zd4d5 Zd6d7 Zd8d9 Zd:d; Z d<S )=TestSequenceFunctionsz5
    Class containing unit tests for nltk.tgrep.
    c                 C   s   t d}| |g d dS )z.
        Simple test of tokenization.
        %A .. (B !< C . D) | ![<< (E , F) $ G])A..(B!<C.D)|r   [<<r	   E,Fr   $G]N)r   tgrep_tokenizeassertEqual)selftokens r   M/home/ubuntu/.local/lib/python3.10/site-packages/nltk/test/unit/test_tgrep.pytest_tokenize_simple   s
   
z*TestSequenceFunctions.test_tokenize_simplec                 C   s   |  tdtd dS )zM
        Test that tokenization handles bytes and strs the same way.
        s%   A .. (B !< C . D) | ![<< (E , F) $ G]r   Nr   r   r   r   r   r   r   test_tokenize_encoding@   s   z,TestSequenceFunctions.test_tokenize_encodingc                 C   sX  |  tdg d |  tdg d |  tdg d |  tdg d |  td	g d
 |  tdg d |  tdg d |  tdg d |  tdg d |  tdg d |  tdg d |  tdg d |  tdg d |  tdg d |  tdg d |  tdg d  |  td!g d" |  td#g d$ |  td%g d& |  td'g d( |  td)g d* |  td+g d, |  td-g d. |  td/g d0 |  td1g d2 |  td3g d4 |  td5g d6 |  td7g d8 |  td9g d: |  td;g d< |  td=g d> |  td?g d@ |  tdAg dB |  tdCg dD |  tdEg dF |  tdGg dH |  tdIg dJ |  tdKg dL |  tdMg dN |  tdOg dP |  tdQg dR |  tdSg dT |  tdUg dV |  tdWg dX |  tdYg dZ |  td[g d\ |  td]g d^ |  td_g d` |  tdag db |  tdcg dd |  tdeg df |  tdgg dh |  tdig dj |  tdkg dl |  tdmg dn |  tdog dp |  tdqg dr |  tdsg dt |  tdug dv |  tdwg dx |  tdyg dz |  td{g d| d}S )~z8
        Test tokenization of basic link types.
        zA<B)r   r   r
   zA>B)r   >r
   zA<3B)r   <3r
   zA>3B)r   >3r
   zA<,B)r   <,r
   zA>,B)r   >,r
   zA<-3B)r   <-3r
   zA>-3B)r   >-3r
   zA<-B)r   <-r
   zA>-B)r   >-r
   zA<'B)r   <'r
   zA>'B)r   >'r
   zA<:B)r   <:r
   zA>:B)r   >:r
   zA<<B)r   r   r
   zA>>B)r   >>r
   zA<<,B)r   <<,r
   zA>>,B)r   >>,r
   zA<<'B)r   <<'r
   zA>>'B)r   >>'r
   zA<<:B)r   <<:r
   zA>>:B)r   >>:r
   zA.B)r   r   r
   zA,B)r   r   r
   zA..B)r   r   r
   zA,,B)r   ,,r
   zA$B)r   r   r
   zA$.B)r   $.r
   zA$,B)r   $,r
   zA$..B)r   $..r
   zA$,,B)r   $,,r
   zA!<B)r   r   r   r
   zA!>B)r   r   r$   r
   zA!<3B)r   r   r%   r
   zA!>3B)r   r   r&   r
   zA!<,B)r   r   r'   r
   zA!>,B)r   r   r(   r
   zA!<-3B)r   r   r)   r
   zA!>-3B)r   r   r*   r
   zA!<-B)r   r   r+   r
   zA!>-B)r   r   r,   r
   zA!<'B)r   r   r-   r
   zA!>'B)r   r   r.   r
   zA!<:B)r   r   r/   r
   zA!>:B)r   r   r0   r
   zA!<<B)r   r   r   r
   zA!>>B)r   r   r1   r
   zA!<<,B)r   r   r2   r
   zA!>>,B)r   r   r3   r
   zA!<<'B)r   r   r4   r
   zA!>>'B)r   r   r5   r
   zA!<<:B)r   r   r6   r
   zA!>>:B)r   r   r7   r
   zA!.B)r   r   r   r
   zA!,B)r   r   r   r
   zA!..B)r   r   r   r
   zA!,,B)r   r   r8   r
   zA!$B)r   r   r   r
   zA!$.B)r   r   r9   r
   zA!$,B)r   r   r:   r
   zA!$..B)r   r   r;   r
   zA!$,,B)r   r   r<   r
   Nr!   r"   r   r   r   test_tokenize_link_typesI   s|   z.TestSequenceFunctions.test_tokenize_link_typesc                 C   s   |  tdg d |  tddg |  tdg d |  tdg d |  tdg d	 |  td
g d |  tdg d |  tdg d |  tdg d |  tdg d |  tdg d dS )zJ
        Test tokenization of the TGrep2 manual example patterns.
        NP < PP)NPr   PP/^NP/NP << PP . VP)r?   r   r@   r   VPNP << PP | . VP)r?   r   r@   r   r   rC   NP !<< PP [> NP | >> VP])r?   r   r   r@   r   r$   r?   r   r1   rC   r   NP << (PP . VP))r?   r   r	   r@   r   rC   r   NP <' (PP <, (IN < on)))r?   r-   r	   r@   r'   r	   INr   onr   r   S < (A < B) < C)	Sr   r	   r   r   r
   r   r   r   S < ((A < B) < C))rK   r   r	   r	   r   r   r
   r   r   r   r   S < (A < B < C))	rK   r   r	   r   r   r
   r   r   r   zA<B&.C)r   r   r
   &r   r   Nr!   r"   r   r   r   test_tokenize_examples   sB   z,TestSequenceFunctions.test_tokenize_examplesc                 C      |  tdg d dS )z/
        Test tokenization of quoting.
        z"A<<:B"<<:"A $.. B"<"A>3B"<C)z"A<<:B"r6   z	"A $.. B"r   z"A>3B"r   r   Nr!   r"   r   r   r   test_tokenize_quoting   s   z+TestSequenceFunctions.test_tokenize_quotingc                 C   s   |  tddg |  tddg |  tddg |  tddg |  tdddg |  tdg d	 |  td
g d |  tdg d dS )z2
        Test tokenization of node names.
        Robertz	/^[Bb]ob/*__zN()N(r   zN(0,))rU   0r   r   zN(0,0))rU   rV   r   rV   r   zN(0,0,))rU   rV   r   rV   r   r   Nr!   r"   r   r   r   test_tokenize_nodenames   s   z-TestSequenceFunctions.test_tokenize_nodenamesc                 C   rP   )z9
        Test tokenization of macro definitions.
        z4@ NP /^NP/;
@ NN /^NN/;
@NP [!< NP | < @NN] !$.. @NN)@r?   rA   ;rX   NNz/^NN/rY   z@NPr   r   r   r?   r   r   @NNr   r   r;   r[   Nr!   r"   r   r   r   test_tokenize_macros   s   z*TestSequenceFunctions.test_tokenize_macrosc                 C   sv   t d}| ttd|gddgg | ttd|g|d |d gg | ttd|gg dg dS )z`
        Test a simple use of tgrep for finding nodes matching a given
        pattern.
        A(S (NP (DT the) (JJ big) (NN dog)) (VP bit) (NP (DT a) (NN cat)))rZ   r      r_      NN|JJ)r   ra   r^   r`   Nr   
fromstringr   listr   tgrep_positionstgrep_nodesr   treer   r   r   test_node_simple   s     z&TestSequenceFunctions.test_node_simplec                 C   s^   t d}| ttd|gttd|g | ttd|gttd|g dS )z9Test that the tgrep print operator ' is properly ignored.(S (n x) (N x))Nz'Nz/[Nn]/z'/[Nn]/Nr   re   r   rf   r   rg   ri   r   r   r   test_node_printing   s   
z(TestSequenceFunctions.test_node_printingc                 C   s   t d}| ttd|gttd|g | ttd|gttd|g | ttd|gttd|g dS )z]
        Test that tgrep search strings handles bytes and strs the same
        way.
        r]   s   NNrZ   s   NN|JJrb   Nrd   ri   r   r   r   test_node_encoding  s   z(TestSequenceFunctions.test_node_encodingc                 C   L   t d}| ttd|gdgg | ttd|gddgg dS )zI
        Test selecting nodes using case insensitive node names.
        rl   "N"ra   zi@"N"r   Nrn   ri   r   r   r   test_node_nocase  s   
$z&TestSequenceFunctions.test_node_nocasec                 C   s   t d}| ttd|gdgg | ttd|gdgg | ttd|gdgg | ttd|gd	gg d
S )z?
        Test selecting nodes using quoted node names.
        z(N ("N" x) (N" x) ("\" x))rr   r   z"\"N\""rt   z"N\""rs   z"\"\\\""r_   Nrn   ri   r   r   r   test_node_quoted$  s
   
"z&TestSequenceFunctions.test_node_quotedc                 C   s.   t d}| ttd|gddgg dS )/
        Test regex matching on nodes.
        $(S (NP-SBJ x) (NP x) (NNP x) (VP x))rA   rt   rs   Nrn   ri   r   r   r   test_node_regex.  s   
$z%TestSequenceFunctions.test_node_regexc                 C   sN   t d}| ttd|gddgg | ttd|gg dg dS )rx   z(S (SBJ x) (SBJ1 x) (NP-SBJ x))z/^SBJ/rt   rs   z/SBJ/)rt   rs   rv   Nrn   ri   r   r   r   test_node_regex_27  s
   
 z'TestSequenceFunctions.test_node_regex_2c                    s   t dfddtt D   fdd D }|D ]$}d| }tt|g}| 	t|d d | 	|d d | q!d	S )
zE
        Test matching on nodes based on NLTK tree position.
        ry   c                    s   h | ]}  |qS r   )leaf_treeposition.0x)rj   r   r   	<setcomp>I  s    z@TestSequenceFunctions.test_node_tree_position.<locals>.<setcomp>c                    s   g | ]}| vr|qS r   r   r}   )leaf_positionsr   r   
<listcomp>J  s    zATestSequenceFunctions.test_node_tree_position.<locals>.<listcomp>rm   r   ra   N)
r   re   rangelenleavestreepositionsrf   r   rg   r   )r   tree_positionspositionnode_idrg   r   )r   rj   r   test_node_tree_positionC  s   

z-TestSequenceFunctions.test_node_tree_positionc                 C   sL   t d}| ttd|gddgg | ttd|gdg g dS )zS
        Test node name matching with the search_leaves flag set to False.
        (S (A (T x)) (B (N x)))r   r   r   r   ra   r   r   FNrn   ri   r   r   r   test_node_noleavesQ  s
   
"z(TestSequenceFunctions.test_node_noleavesc                 C   s  t d}| ttd|gdgg | ttd|gdgg | ttd|gg dg | ttd|gdgg | ttd	|gd
gg | ttd|gdgg | ttd|gg dg | ttd|gg dg | ttd|gg dg | ttd|gdd
gg | ttd|gddgg | ttd|gddgg | ttd|gdgg | ttd|gdgg | ttd|gg dg t d}| ttd|gdgg | ttd|gddgg | ttd|gg dg | ttd|gdgg t d }| ttd!|gdgg | ttd"|gg d#g t d$}| ttd%|gg d&g | ttd'|gg d(g d)S )*zC
        Test matching nodes based on dominance relations.
        r   z* < Trt   z	* < T > Sz* !< T)r   r   r   r   rs   ra   r   r   z
* !< T > Srs   z* > Ar   z* > Br   z* !> B)r   rt   r   r   rs   r   z* !> B >> S)rt   r   rs   z* >> S)rt   r   rs   r   z* >>, Sz* >>' Sz* << Tr   z* <<' Tz* <<1 Nz* !<< T)r   r   rs   r   r   z(S (A (T x)) (B (T x) (N x )))z* <: Tz* !<: T)r   r   r   rs   r   r   )ra   ra   )ra   ra   r   z* !<: T > Sz(S (T (A x) (B x)) (T (C x)))z* >: Tz* !>: T)r   rt   r   r   rc   r   ra   r   rs   r   z=(S (A (B (C (D (E (T x)))))) (A (B (C (D (E (T x))) (N x)))))z* <<: T)rt   r   r   r   r   r   r   r   r   r   r   r   ra   r   r   r   )ra   r   r   r   r   z* >>: A)r   r   r   r   )r   r   r   r   r   r   r   r   Nrn   ri   r   r   r   tests_rel_dominance[  sz   
 
 
z)TestSequenceFunctions.tests_rel_dominancec                 C   s(   t d}| tjttd|g dS )zC
        Test error handling of undefined tgrep operators.
        r   z* >>> SN)r   re   assertRaisesr   TgrepExceptionrf   rg   ri   r   r   r   test_bad_operator  s   
z'TestSequenceFunctions.test_bad_operatorc                 C   sV   t d}d}| tt||gddgg d}| tt||gddgg dS )z`
        Test that comments are correctly filtered out of tgrep search
        strings.
        z(S (NN x) (NP x) (NN x))z=
        @ NP /^NP/;
        @ NN /^NN/;
        @NN
        rt   rv   zg
        # macros
        @ NP /^NP/;
        @ NN /^NN/;

        # search string
        @NN
        Nrn   )r   rj   search1search2r   r   r   test_comments  s
   
 $z#TestSequenceFunctions.test_commentsc                 C   s   t d}| ttd|gdgg | ttd|gdgg | ttd|gdgg | ttd|gdgg | ttd|gddgg d	S )
z7
        Test matching sister nodes in a tree.
        (S (A x) (B x) (C x))z* $. Brt   z* $.. Bz* $, Brv   z* $,, Bz* $ BNrn   ri   r   r   r   test_rel_sister_nodes  s   
$z+TestSequenceFunctions.test_rel_sister_nodesc                 C   s  t d}| ttd|gdgg | ttd|gdgg | ttd|gdgg | ttd|gdgg | ttd	|gdgg | ttd
|gdgg | ttd|gdgg | ttd|gdgg t d}| ttd|gdgg | ttd|gdgg | ttd|gdgg | ttd|gdgg | ttd|gdgg | ttd|gdgg | ttd|gdgg | ttd|gdgg dS )zP
        Test matching nodes based on their index in their parent node.
        r   z* >, Srt   z* >1 Sz* >2 Srs   z* >3 Srv   z* >' Sz* >-1 Sz* >-2 Sz* >-3 SzE(S (D (A x) (B x) (C x)) (E (B x) (C x) (A x)) (F (C x) (A x) (B x)))z* <, Az* <1 Az* <2 Az* <3 Az* <' Az* <-1 Az* <-2 Az* <-3 ANrn   ri   r   r   r   tests_rel_indexed_children  s(   
"z0TestSequenceFunctions.tests_rel_indexed_childrenc                 C   s  t d}| ttd|gg dg | ttd|gddgg | ttd|gg dg | ttd	|gg d
g | ttd|gddgg | ttd|gg dg | ttd|gg dg | ttd|gg dg dS )zD
        Test matching nodes based on precedence relations.
        zV(S (NP (NP (PP x)) (NP (AP x))) (VP (AP (X (PP x)) (Y (AP x)))) (NP (RC (NP (AP x)))))z* . X)rt   rc   r   z* . Yr   r   z* .. X)rt   r   r   rc   r   z* .. Y)rt   r   r   rc   r   r   r   z* , Xra   r   ra   ra   r   ra   r   z* , Y)rv   r_   r   r_   r   r   r_   r   r   r   z* ,, X)r   r   rv   r   r   r   z* ,, YNrn   ri   r   r   r   test_rel_precedence  s@   z)TestSequenceFunctions.test_rel_precedencec                 C   sf  t d}| ttd|gdgg t d}| ttd|gdgg t d}| ttd|gddgg t d	}| ttd
|gddgg t d}| ttd|gdgg t d}| ttd|gdgg t d}| ttd|gdgg t d}| ttd|gdgg | ttd|gdgg dS )zA
        Test the Basic Examples from the TGrep2 manual.
        z(S (NP (AP x)) (NP (PP x)))r>   rs   z$(S (NP x) (VP x) (NP (PP x)) (VP x))rB   rv   z6(S (NP (AP x)) (NP (PP x)) (NP (DET x) (NN x)) (VP x))rD   zX(S (NP (NP (PP x)) (NP (AP x))) (VP (AP (NP (PP x)) (NP (AP x)))) (NP (RC (NP (AP x)))))rE   rc   r   z:(S (NP (AP (PP x) (VP x))) (NP (AP (PP x) (NP x))) (NP x))rF   rt   ze(S (NP (DET a) (NN cat) (PP (IN on) (NP x))) (NP (DET a) (NN cat) (PP (IN on) (NP x)) (PP x)) (NP x))rG   z;(S (S (C x) (A (B x))) (S (C x) (A x)) (S (D x) (A (B x))))rJ   z/(S (S (A (B x) (C x))) (S (S (C x) (A (B x)))))rL   rM   Nrn   ri   r   r   r   test_examples  sX   

z#TestSequenceFunctions.test_examplesc                 C   sH   t d}| ttd|gddgg | tjttd|g dS )z8
        Test defining and using tgrep2 macros.
        zi(VP (VB sold) (NP (DET the) (NN heiress)) (NP (NN deed) (PREP to) (NP (DET the) (NN school) (NN house))))z+@ NP /^NP/;
@ NN /^NN/;
@NP !< @NP !$.. @NNrs   )r_   r_   z,@ NP /^NP/;
@ NN /^NN/;
@CNP !< @NP !$.. @NNN)r   re   r   rf   r   rg   r   r   ri   r   r   r   test_use_macrosk  s$   	z%TestSequenceFunctions.test_use_macrosc                 C   s0   |  tdg d |  tdg d dS )z#Test tokenization of labeled nodes.!S < @SBJ < (@VP < (@VB $.. @OBJ)))rK   r   @SBJr   r	   @VPr   r	   @VBr;   @OBJr   r   z%S < @SBJ=s < (@VP=v < (@VB $.. @OBJ)))rK   r   r   =sr   r	   r   r   vr   r	   r   r;   r   r   r   Nr!   r"   r   r   r   test_tokenize_node_labels  s   z/TestSequenceFunctions.test_tokenize_node_labelsc                 C   rP   )z(Test tokenization of segmented patterns.z0S < @SBJ=s < (@VP=v < (@VB $.. @OBJ)) : =s .. =v)rK   r   r   r   r   r   r	   r   r   r   r   r	   r   r;   r   r   r   :z=sr   z=vNr!   r"   r   r   r    test_tokenize_segmented_patterns  s   z6TestSequenceFunctions.test_tokenize_segmented_patternsc                 C   s*  d}t d}t d}|dd d }d}| tt||gd  | tt||gd  | tt||gd  | tt||gtt||g | tt||gd  | tt||gd  | tt||gd  | tt||gtt||g dS )	zN
        Test labeled nodes.

        Test case from Emily M. Bender.
        z
            # macros
            @ SBJ /SBJ/;
            @ VP /VP/;
            @ VB /VB/;
            @ VPoB /V[PB]/;
            @ OBJ /OBJ/;

            # 1 svo
            S < @SBJ=s < (@VP=v < (@VB $.. @OBJ)) : =s .. =vz2(S (NP-SBJ I) (VP (VB eat) (NP-OBJ (NNS apples))))z2(S (VP (VB eat) (NP-OBJ (NNS apples))) (NP-SBJ I))z

r   r   z-S < (/.*SBJ/ $.. (/VP/ < (/VB/ $.. /.*OBJ/)))N)	r   re   split
assertTruerf   r   rg   r   assertFalse)r   searchsent1sent2search_firsthalfsearch_rewriter   r   r   test_labeled_nodes  s.   
z(TestSequenceFunctions.test_labeled_nodesc                 C   rq   )zm
        Test that multiple (3 or more) conjunctions of node relations are
        handled properly.
        z'((A (B b) (C c)) (A (B b) (C c) (D d)))z(A < B < C < D)rs   z(A < B < C)rt   Nrn   )r   sentr   r   r   test_multiple_conjs  s   
z)TestSequenceFunctions.test_multiple_conjsc                 C   sn   t d}| ttd|gddgg | ttd|gddgg | ttd|gddgg dS )zp
        Test that semicolons at the end of a tgrep2 search string won't
        cause a parse failure.
        r]   rZ   r^   r`   zNN;zNN;;Nrn   ri   r   r   r   test_trailing_semicolon  s     z-TestSequenceFunctions.test_trailing_semicolonN)!__name__
__module____qualname____doc__r    r#   r=   rO   rQ   rW   r\   rk   ro   rp   ru   rw   rz   r{   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r      s>    !	C&	 
	
W	'O+(r   )
r   unittestpytestimportorskipnltkr   	nltk.treer   TestCaser   r   r   r   r   <module>   s   	
