o
    i7                     @   sh   d dl Z d dlZd dlmZ dd Zdd Zdd Zdd	 Zd
d Zdd Z	dd Z
dd Zdd ZdS )    Nc                   C   n   t t td W d   n1 sw   Y  t t td W d   dS 1 s0w   Y  dS )z3No-protocol traversal sequences should be rejected.../../etc/passwdNz../relative/../etc/passwdpytestraises
ValueErrordatanormalize_resource_url r
   r
   U/home/ubuntu/.local/lib/python3.10/site-packages/nltk/test/unit/test_data_security.py,test_normalize_rejects_no_protocol_traversal   s   "r   c                   C   :   t t td W d   dS 1 sw   Y  dS )zQWindows-style backslash traversal should be rejected when no protocol is present.z..\..\etc\passwdNr   r
   r
   r
   r   .test_normalize_rejects_no_protocol_backslashes      "r   c                  C   s    t d} | dsJ ddS )zIValid package-style resource names should still be treated as nltk: URLs.zcorpora/brownznltk:z5Package-style paths should be treated as 'nltk:' URLsN)r   r	   
startswith)outr
   r
   r   #test_normalize_allows_package_paths   s   
r   c                   C   r   )zLDefense-in-depth: direct calls to find() should reject traversal-like names.r   Nr   r   r   r   findr
   r
   r
   r   'test_find_rejects_traversal_direct_call   r   r   c                   C   r   )z
    Defense-in-depth edge case: a path can become unsafe only after normalization.

    Example from review: "foo/../../etc/passwd" normalizes to "../etc/passwd" and
    must still be rejected.
    zfoo/../../etc/passwdNr   r
   r
   r
   r   Ctest_find_rejects_traversal_that_becomes_unsafe_after_normalization%   s   "r   c                   C   r   )z;Absolute POSIX paths without a protocol should be rejected.z/etc/passwdNr   r
   r
   r
   r   6test_normalize_rejects_no_protocol_absolute_posix_path0   r   r   c                   C   r   )z
    Windows drive letter paths should be rejected even on non-Windows platforms.

    Review note: don't gate 'C:/etc/passwd' on Windows only; ensure robust rejection
    regardless of runtime platform.
    zC:\etc\passwdNzC:/etc/passwdr   r
   r
   r
   r   =test_normalize_rejects_no_protocol_windows_drive_letter_paths6   s   "r   c                   C   r   )z8A resource name that is exactly '..' should be rejected.z..Nr   r
   r
   r
   r   .test_normalize_rejects_no_protocol_dotdot_onlyE   r   r   c                 C   s   | d }t |d}|dd W d    n1 sw   Y  tjdt| gd}| }| }t|t	r=|
d}|dksCJ W d    d S 1 sNw   Y  d S )Nza.zipwzb.zip/c.txtokza.zip/b.zip/c.txt)pathszutf-8)zipfileZipFilewritestrr   r   stropenread
isinstancebytesdecode)tmp_pathzpathzfptrfgotr
   r
   r   !test_find_zip_split_is_non_greedyK   s   


"r,   )r   r   	nltk.datar   r   r   r   r   r   r   r   r   r,   r
   r
   r
   r   <module>   s    	