o
    R
i06                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlZ	zd dl
mZ d dlmZ W n ey;   dZY nw zd dlZd dlmZ d dlmZ W n ey[   d ZZY nw ejjZejjdd Zejjdd Zejjd	d
 Zejjdd Zejjdd Zejjdd Zdd Zdd Zejjej dg ddd Z!ejjej ddgdd Z"ejjej dddgd d! Z#d"d# Z$ejj%ej d$d%d&d' Z&d(d) Z'd*d+ Z(d,d- Z)d.d/ Z*d0d1 Z+d2d3 Z,d4d5 Z-dS )6    N)mock)_write_table)alltypes_samplec                  C   sr   t dd} tj| }t }t||ddd |d t	|}|d tj
||d}t| |   d S )N'  sizesnappy2.6)compressionversionr   )metadata)r   paTablefrom_pandasioBytesIOr   seekpqread_metadataParquetFiletmassert_frame_equalread	to_pandas)dfa_tablebufr   fileh r   [/home/ubuntu/.local/lib/python3.10/site-packages/pyarrow/tests/parquet/test_parquet_file.pytest_pass_separate_metadata1   s   



r    c                     s   d\} }t | d}tj|}t }t||| | ddd |d t	|  j
|ks/J  fddt|D }t|}t||  d S )	Nr      r   r   r	   row_group_sizer
   r   r   c                    s   g | ]}  |qS r   read_row_group.0ipfr   r   
<listcomp>W   s    z.test_read_single_row_group.<locals>.<listcomp>)r   r   r   r   r   r   r   r   r   r   num_row_groupsrangeconcat_tablesr   r   r   NKr   r   r   
row_groupsresultr   r*   r   test_read_single_row_groupE   s   



r5   c                     s   d\} }t | d}tj|}t }t||| | ddd |d t	|t
|jd d   fdd	t|D }t|}t|  |   fd
d	t|D }t|}t|  |  d S )Nr!   r   r   r	   r#   r      c                    s   g | ]	}j | d qS columnsr%   r'   colsr+   r   r   r,   j   s    zAtest_read_single_row_group_with_column_subset.<locals>.<listcomp>c                    s   g | ]}j |   d qS r7   r%   r'   r:   r   r   r,   p   s    )r   r   r   r   r   r   r   r   r   r   listr9   r.   r/   r   r   r   r0   r   r:   r   -test_read_single_row_group_with_column_subset\   s    




r=   c                  C   s   d\} }t | d}tj|}t }t||| | ddd |d t	|}|j
|ks/J |t|}t||  d S )Nr!   r   r   r	   r#   r   )r   r   r   r   r   r   r   r   r   r   r-   read_row_groupsr.   r   r   r   )r1   r2   r   r   r   r+   r4   r   r   r   test_read_multiple_row_groupsu   s   


r?   c                  C   s   d\} }t | d}tj|}t }t||| | ddd |d t	|}t
|jd d }|jt||d}t|| |  |jt||| d}t|| |  d S )	Nr!   r   r   r	   r#   r   r6   r8   )r   r   r   r   r   r   r   r   r   r   r<   r9   r>   r.   r   r   r   )r1   r2   r   r   r   r+   r;   r4   r   r   r   0test_read_multiple_row_groups_with_column_subset   s   


r@   c                  C   s   d\} }t | d}tj|}t }t||| | ddd |d t	|}|
 dks0J |
|jd d dks>J d S )	Nr!   r   r   r	   r#   r   r   r"   )r   r   r   r   r   r   r   r   r   r   scan_contentsr9   )r1   r2   r   r   r   r+   r   r   r   test_scan_contents   s   


 rB   c                 C   s   | d }t t| dt| d}tt}t| W d    n1 s(w   Y  |t	r9t
jdkr9d S || d S )N	directoryzCannot open for reading: path 'z' is a directorywin32)osmkdirstrpytestraisesIOErrorr   r   errisinstancePermissionErrorsysplatformmatch)tempdirpathmsgexcr   r   r   0test_parquet_file_pass_directory_instead_of_file   s   rT   c               	   C   s   t jt ddgt ddggddgd} t  }t| | t| }|j	d
 ddgks4J |j	d	
 ddgksBJ d
D ]}tttf |j	| W d    n1 s^w   Y  qDd S )Nr"      foobarintsstrs)namesr      )r6   )r   tablearrayBufferOutputStreamr   write_tabler   getvaluereaderread_column	to_pylistrH   rI   
ValueError
IndexError)r]   biofindexr   r   r   test_read_column_invalid_index   s   rj   
batch_size)i,    i  c              	   C   s   d}d}t |d}| d }tj|}t||d|d t|}|jd d |jdd  fD ]>}|j||d}	t	d	|| |}
t
|	|
D ]'\}}t||| }t| |j||d d f jd d |f jd
d qDq.d S )Ni  rl   r   pandas_roundtrip.parquetr	   r   
chunk_size
   )rk   r9   r   Tdrop)r   r   r   r   r   r   r   r9   iter_batchesr.   zipminr   r   r   iloclocreset_index)rP   rk   
total_sizero   r   filenamearrow_tablefile_r9   batchesbatch_startsbatchstartendr   r   r    test_iter_batches_columns_reader   s(   

 *r   ro   rl   c           
      C   s   t ddd}| d }tj|}|jjd usJ t||d|d t|}dd }t	||}d	}t
|jD ]:}	t||  ||	g d
 |d7 }t||  jdd||	g jd
d  jdd |d7 }q6d S )Nr   T)r   categoricalrm   r	   rn   c                 s   s6    t | jD ]}| jd|gd}|D ]}|V  qqd S )N  )rk   r3   )r.   r-   rs   )rh   	row_groupr}   r   r   r   r   get_all_batches   s   z1test_iter_batches_reader.<locals>.get_all_batchesr   r   r[   rq   )r   r   r   r   schemapandas_metadatar   r   r   r<   r.   r-   r   r   r   r>   headrx   rv   )
rP   ro   r   rz   r{   r|   r   r}   batch_nor)   r   r   r   test_iter_batches_reader   s2   



r   
pre_bufferFTc                 C   sj   d\}}t |d}tj|}t }t|||| ddd |d tj	|| d}|
 j|ks3J d S )Nr!   r   r   r	   r#   r   )r   )r   r   r   r   r   r   r   r   r   r   r   num_rows)r   r1   r2   r   r   r   r+   r   r   r   test_pre_buffer  s   

r   c              	   C   s  |  d}tddgddgd}t|| t|d5}t|}|  |jr,J |jr1J W d   n1 s;w   Y  |jrEJ |jrJJ W d   n1 sTw   Y  |js^J |jscJ t|}|  |jrrJ W d   n1 s|w   Y  |jsJ dS )z
    Unopened files should be closed explicitly after use,
    and previously opened files should be left open.
    Applies to read_table, ParquetDataset, and ParquetFile
    zfile.parquetr   r[   )col1col2rbN)	joinpathr   r]   r   r`   openr   r   closed)rP   fnr]   rh   pr   r   r   #test_parquet_file_explicitly_closed"  s&   




r   use_uri)TFc           
      C   s   | \}}}|r	|n|f}|ri nt |d}tdtdi}tj|||d tj|i |}| |ks6J |jr;J |	  |jsDJ tj|i |}	|	 |ksUJ |	jrZJ W d    n1 sdw   Y  |	jsnJ d S )N
filesystemarp   )
dictr   r]   r.   r   r`   r   r   r   close)
s3_example_fsr   s3_fss3_uris3_pathargskwargsr]   parquet_filerh   r   r   r   !test_parquet_file_with_filesystem?  s   


r   c                  C   s   t dt g di} t }t| | |d t|	 j
d jd j}|jdu s/J |jdks6J |jd u s=J |jdu sDJ |jdksKJ |jsPJ |jdksWJ |js\J t|d	ksdJ d S )
Nvalue)r\   N   r   Tr[   Fr\   r   zmarrow.ArrayStatistics<null_count=1, distinct_count=None, min=-1, is_min_exact=True, max=3, is_max_exact=True>)r   r]   r^   r   r   r   r   r   r   r   r9   chunks
statisticsis_null_count_exact
null_countdistinct_countis_distinct_count_exactru   is_min_exactmaxis_max_exactrepr)r]   r   r   r   r   r   test_read_statisticsV  s   



r   c                 C   sF   |  d}t | }|jddgksJ |d  g dks!J d S )Nz/unknown-logical-type.parquetzcolumn with known typezcolumn with unknown type)s   unknown string 1s   unknown string 2s   unknown string 3)r   r   r   column_namesrd   )parquet_test_datadir	test_filer]   r   r   r    test_read_undefined_logical_typem  s   
r   c                  C   s   t d tdtdi} t| d td}| |s J d}t j	tj
|d td W d    d S 1 s;w   Y  d S )Nfsspecr   rp   fsspec+memory://example.parquetz#Unrecognized filesystem type in URIrO   znon-existing://example.parquet)rH   importorskipr   r]   r.   r   r`   
read_tableequalsrI   ArrowInvalid)r]   table2rR   r   r   r    test_parquet_file_fsspec_supporty  s   

"r   c                  C   s   zddl m}  W n ty   td Y nw tdtdi}|  }|jddd |	ds1J d	}t
j|d
|d t
d}||sGJ d S )Nr   MemoryFileSystemz&fsspec is not installed, skipping testbrp   z/path/to/prefixT)create_parentszfsspec+memory://path/to/prefixz	b.parquetr   z(fsspec+memory://path/to/prefix/b.parquet)fsspec.implementations.memoryr   ImportErrorrH   skipr   r]   r.   rF   existsr   r`   r   r   )r   r]   fsfs_strr   r   r   r   <test_parquet_file_fsspec_support_through_filesystem_argument  s   
r   c                  C   s   zddl m}  W n ty   td Y nw td}| |_tj	
dd|i& d}tdtdi}t|| t|}||sFJ W d    d S 1 sQw   Y  d S )	Nr   r   z3fsspec is not installed, skipping Hugging Face testhuggingface_hubzsys.modulesz'hf://datasets/apache/arrow/test.parquetr   rp   )r   r   r   rH   r   types
ModuleTypeHfFileSystemr   patchr   r   r]   r.   r   r`   r   r   )r   fake_hf_moduleurir]   r   r   r   r   $test_parquet_file_hugginface_support  s   

"r   c                  C   sr   zdd l } W n	 ty   Y nw td td}tjt|d td W d    d S 1 s2w   Y  d S )Nr   z"fsspec is available, skipping testzI`fsspec` is required to handle `fsspec+<filesystem>://` and `hf://` URIs.r   r   )	r   r   rH   r   reescaperI   r   r   )r   rR   r   r   r   1test_fsspec_uri_raises_if_fsspec_is_not_available  s   
"r   c                 C   st   t g }t jjg |d}| d }t|| t|}tt	 |j
dd W d    d S 1 s3w   Y  d S )N)r   zempty_file.parquetr   )rk   )r   r   r   from_batchesr   r`   r   rH   rI   re   rs   )rP   r   empty_tableparquet_file_pathr   r   r   r   (test_iter_batches_raises_batch_size_zero  s   

"r   ).r   rE   r   rM   r   rH   unittestr   pyarrowr   pyarrow.parquetparquetr   pyarrow.tests.parquet.commonr   r   pandaspdpandas.testingtestingr   r   mark
pytestmarkr    r5   r=   r?   r@   rB   rT   rj   parametrizer   r   r   r   s3r   r   r   r   r   r   r   r   r   r   r   r   <module>   sp   





+