o
    R
i$                     @   s  d dl Z d dlZd dlZd dlZd dlZzd dlZW n ey%   dZY nw d dlZd dl	m
Z
 d dlZd dlmZ d dlmZmZmZmZmZmZ d dlmZ d dlmZ zd dlmZ d dlmZm Z m!Z!m"Z" W n eyu   dZY nw zd dl#Z$d dl%m&Z' W n ey   d Z$Z'Y nw ej(jej(j)gZ*dd Z+ej(j#dd	 Z,ej(j#d
d Z-ej(j#dd Z.ej(j#dd Z/ej(j0e1e2fddej(j#dd Z3ej(j#dd Z4ej(j#dd Z5ej(j#dd Z6ej(j#dd Z7ej(j#dd Z8ej(j#ej(9ddgdgge:d d!k e:d"d#d!k e:d"d$;e< d!k fej(9d%d&d'd( Z=ej(j#d)d* Z>ej(j?d+d, Z@ej(j?d-d. ZAej(j#d/d0 ZBej(j#ej(j?d1d2 ZCd3d4 ZDd5d6 ZEd7d8 ZFej(j#d9d: ZGej(j#d;d< ZHej(j#d=d> ZIej(jd?d@ ZJej(jdAdB ZKej(jdCdD ZLddGdHZMdIdJ ZNej(jej(9dKdLdMgdNdO ZOej(jdPdQ ZPej(jdRdS ZQej(jej(9dKdLdMgdTdU ZRdVdW ZSdXdY ZT			ddZd[ZU	dd\d]ZVej(j#d^d_ ZWej(j#d`da ZXej(j#dbdc ZYej(j#ddde ZZej(j#dfdg Z[ej(j#ej(j?dhdi Z\ej(j#ej(j?ej(j]ej^djkdkdldmdn Z_ej(j#ej(j?dodp Z`ej(j#dqdr ZaddtduZbej(j#dvdw Zcej(j#dxdy Zddzd{ Zed|d} Zfd~d Zgdd Zhdd Zidd Zjdd Zkej(9dddd Zlej(j#dd ZmdS )    N)FileSelector
FileSystemLocalFileSystemPyFileSystemSubTreeFileSystemFSSpecHandler)util)guid)_read_table_test_dataframe_test_table_write_tablec                 C   s~   t dg di}| d }|  |d }t|t| tj|t d}||s,J tjdt	
| d}||s=J d S )Na         data_dirdata.parquet
filesystemzdata_dir/data.parquet)patablemkdirpqwrite_tablestr
read_tabler   equalsr   _filesystem_uri)tempdirr   	directorypathresult r$   V/home/ubuntu/.local/lib/python3.10/site-packages/pyarrow/tests/parquet/test_dataset.pytest_filesystem_uri;   s   
r&   c                 C   s   t  }t||  d S N)r   _partition_test_for_filesystem)r    localr$   r$   r%   test_read_partitioned_directoryN   s   r*   c                 C   s@   t  }| }t|| t|}|jdgd}|jdgksJ d S )Nvaluescolumns)r   r(   r   ParquetDatasetreadcolumn_names)r    r)   	base_pathdatasetr#   r$   r$   r%   'test_read_partitioned_columns_selectionT   s   

r3   c                 C   s  t  }| }ddg}g d}ddg}d|gd|gd|gg}ttj|d	d
dtttj|td
ddtttj|dd
ddtdd}t	|||| t
j||g dd}| }	|	 jdd}
d|
d jvsqJ d|
d jvszJ d|
d jvsJ g dddgg}t
j|||d}| }	|	 jdd}
|
d dk|
d dk@ |
d dk@ }t|
d dk|
d dk@ }| dksJ | dksJ |
jd | |  ksJ dggdggfD ]}t
j|||d}| jdksJ qd S )Nr   r   r   bcTFintegerstringbooleani4dtype      r   boolr      r7   r8   r9   r+   ))r7   =r   )r8   !=r5   )r9   ==Truer   filtersdropr5   )r7   rB   r   )r9   rD   FalserE   rJ   )r8   rD   s   1 a)r8   rD   z1 a)r   pd	DataFramenparrayrepeattileobjectarange_generate_partition_directoriesr   r.   r/   	to_pandasreset_indexr+   sumshapenum_rows)r    r)   r1   integer_keysstring_keysboolean_keyspartition_specdfr2   r   	result_dfrG   df_filter_1df_filter_2r$   r$   r%   test_filters_equivalencya   sh   



ra   c                 C      t  }| }g d}d|gg}d}tjt|tj|dddddgd}t|||| tj||d	d
gd}|	 }|
 jddjdd}	dd tt|	d jD }
|
ddgksZJ d S )Nr   r   r   r      integersr>   r:   r;   indexre   rg   r,   )re   <rd   )re   >r   rF   byTrH   c                 S   s   g | ]}|qS r$   r$   .0xr$   r$   r%   
<listcomp>   s    z9test_filters_cutoff_exclusive_integer.<locals>.<listcomp>r   r   r   rK   rL   rM   rR   rN   rS   r   r.   r/   rT   sort_valuesrU   mapintr+   r    r)   r1   rY   r\   Nr]   r2   r   r^   result_listr$   r$   r%   %test_filters_cutoff_exclusive_integer   6   rw   z5Loss of type information in creation of categoricals.)raisesreasonc              	   C   s  t  }| }tdddtdddtdddtdddtdddg}d|gg}d	}tjt|tj|d
ddddgd}t|||| t	j
||ddgd}| }| jddjdd}	tjtjtdddgd
dtj|d
dd}
|	d j|
ksJ d S )Ni  rd   	   
            datesr>   
datetime64r;   )rg   r   rg   r,   )r   rh   z
2018-04-12)r   ri   z
2018-04-10rF   rj   TrH   
categories)r   datetimedaterK   rL   rM   rR   rN   rS   r   r.   r/   rT   rq   rU   Categoricalr+   )r    r)   r1   	date_keysr\   ru   r]   r2   r   r^   expectedr$   r$   r%   &test_filters_cutoff_exclusive_datetime   sF   r   c              	   C   sp   | d }t t jddddtddj|dd tj|d	d
tdddfgd}|d	 g dks6J d S )Nztimestamps.parquetz
2020-01-01r|   D)periodsfreq)r   idT)use_deprecated_int96_timestampsr   <=i  r   r>   rG   r   rc   )
rK   rL   
date_rangerange
to_parquetr   r   r   column	to_pylist)r    r"   r   r$   r$   r%   test_filters_inclusive_datetime   s   r   c                 C   rb   )Nrc   re   r>   r:   r;   rf   rg   r,   )re   r   r   )re   z>=r   rF   rj   TrH   c                 S   s   g | ]}t |qS r$   )rs   rl   r$   r$   r%   ro   *      z2test_filters_inclusive_integer.<locals>.<listcomp>r   r   rp   rt   r$   r$   r%   test_filters_inclusive_integer  rx   r   c                 C   s|  t  }| }ddg}g d}ddg}d|gd|gd|gg}ttj|d	d
dtttj|td
ddtttj|dd
ddtdd}t	|||| t
j||dgd}| }	|	 jdd}
d|
d jv spJ d|
d jv syJ d|
d jvsJ t
j||dddgfddddhfgd}| }	|	 jdd}
d|
d jvsJ d|
d jvsJ d|
d jvsJ d S )Nr   r   r4   TFr7   r8   r9   r:   r;   r=   r>   r   r?   r   r@   rA   )r8   inabrF   rH   r   r5   r6   r   )r8   r   r   r5   znot inrJ   )r   rK   rL   rM   rN   rO   rP   rQ   rR   rS   r   r.   r/   rT   rU   r+   )r    r)   r1   rY   rZ   r[   r\   r]   r2   r   r^   r$   r$   r%   test_filters_inclusive_set.  sH   
r   c                 C   sV  t  }| }g d}d|gg}d}tjt|tj|dddddgd}t|||| tt	 t
j||d	gd
 W d    n1 sDw   Y  tt t
j||dgd
 W d    n1 sbw   Y  t
j||ddt fgd
}| jdks}J t
j||dddhfgd
}tt | jdksJ W d    d S 1 sw   Y  d S )Nrc   re   r>   r:   r;   rf   rg   r,   )re   r   r   rF   )re   z=<r   r   r   rC   r   )r   rK   rL   rM   rR   rN   rS   pytestry   	TypeErrorr   r.   
ValueErrorsetr/   rX   NotImplementedError)r    r)   r1   rY   r\   ru   r]   r2   r$   r$   r%   test_filters_invalid_pred_op]  sJ   "r   c                 C   s   t  }| }g d}d|gg}d}tjt|tj|dddddgd}t|||| d	}tjt	|d
 t
j||dgd  W d    d S 1 sKw   Y  d S )Nrc   re   r>   r:   r;   rf   rg   r,   z1No match for FieldRef.Name\(non_existent_column\)match)non_existent_columnrh   r   rF   )r   rK   rL   rM   rR   rN   rS   r   ry   r   r   r.   r/   )r    r)   r1   rY   r\   ru   r]   msgr$   r$   r%   test_filters_invalid_column  s&   
"r   rG   )re   rh   r   re   r   nestedr   r5   read_method)r   read_pandasc              	   C   s   t t|}t }| }g d}d|gg}t|}tt|tj|ddtdd t	|D d}	t
||||	 t||d}
||fi |
}|jd	ksNJ d S )
Nrc   re   r:   r;   c                 S   s   g | ]	}|t |d qS )r   )r   rm   ir$   r$   r%   ro         z+test_filters_read_table.<locals>.<listcomp>)rg   re   r   rF   r   )getattrr   r   lenrK   rL   rM   rR   rN   r   rS   dictrX   )r    rG   r   r/   r)   r1   rY   r\   ru   r]   kwargsr   r$   r$   r%   test_filters_read_table  s    
	r   c           	      C   s   t  }| }ddg}d|gg}d}tjt|tj|dddddgd	}t|||| t|}|	 }|
d |ks?J d S )
N2019_22019_3	year_weekr   rQ   r;   )rg   r   rg   r,   )r   rK   rL   rM   rR   rN   rS   r   r.   r/   r   r   )	r    r)   r1   rZ   r\   ru   r]   r2   r#   r$   r$   r%   $test_partition_keys_with_underscores  s    
r   c                 C   sN   | \}}|d }t dg di}t|||d t||d}||s%J d S Nz/test.parquetr   r   r   r   r   r   r
   r   )s3_example_s3fsfsr"   r   r#   r$   r$   r%   test_read_s3fs     r   c                 C   sN   | \}}|d }t dg di}t|||d t||d}||s%J d S r   r   )r   r   r!   r"   r   r#   r$   r$   r%   test_read_directory_s3fs  r   r   c                 C   sJ   t | d }tdg di}t|| t|g }||s#J d S )Nr   r   r   )r   r   r   r   r   r.   r/   r   )r    	data_pathr   r#   r$   r$   r%   test_read_single_file_list  s
   
r   c                 C   s   | \}}t || d S r'   )r(   r   r   r"   r$   r$   r%   $test_read_partitioned_directory_s3fs  s   r   c                 C   s  ddg}g d}d|gd|gg}d}t jt|tj|ddd	tttj|tdd
dtj	|dg dd}t
| ||| tj|| d}| }| jddjdd}	|jddjddj|	jd}
|
d d|
d< |
d d|
d< |	jg dk sJ t|	|
 d S )Nr   r   r4   foobarr@   r:   r;   r=   r>   r   )rg   r   r   r+   r,   r   rg   rj   TrH   category)rg   r+   r   r   )rK   rL   rM   rR   rN   rO   rP   rQ   randomrandnrS   r   r.   r/   rT   rq   rU   reindexr-   astypealltmassert_frame_equal)r   r1   foo_keysbar_keysr\   ru   r]   r2   r   r^   expected_dfr$   r$   r%   r(      s>   

r(   c                    sX   t tsttt tdtdd fdd|dg  d S )Npathsepsep/c              	      sn  | \}}|D ]}|||fg } t| | d| g}| | d krddlm}  |t g}	t|}
tj	|
}
|	}t|| W d    n1 sXw   Y  |	j|jkshJ |	j|jkssJ  |dg}
|}W d    n1 sw   Y  q||d |  |dg}
|}W d    n1 sw   Y  qd S )NrB   r   r   )FileType_SUCCESS)joinr   
create_dir
pyarrow.fsr   r	   _filter_partitionr   Tablefrom_pandasopen_output_streamr   get_file_infotypeNotFoundFile)base_dirlevel	part_keysnamer+   valuethis_part_keys	level_dirr   	file_pathfiltered_df
part_tableffile_successDEPTH_visit_levelr]   r   r\   r   r$   r%   r   2  s<   

z5_generate_partition_directories.<locals>._visit_levelr   )
isinstancer   r   r   r   r   )r   r   r\   r]   r$   r   r%   rS   '  s   
 rS   c                 C   sl   t jt| td}g }|D ]\}}|| t|tjtjfr$t	|}|| | |kM }q| | j
|ddS )Nr;   r   )axis)rM   onesr   r?   appendr   r   r   rK   	TimestamprI   )r]   r   	predicateto_dropr   r   r$   r$   r%   r   U  s   

r   c                 C   s   | d }|   tjtdg di}t||d  | d }|   tjtdg di}t||d  tj| dggd}|	d
tg dgsSJ d S )	NzA=0Br   r   zA=1r4   )ArD   r   r   )r   r   r   r   rK   rL   r   r   r   r   r   chunked_array)r    dir1table1dir2table2r   r$   r$   r%   "test_filter_before_validate_schemae  s   $r   c                    sx  d}d}| t   }|  g }g }t|D ].}t||d}|d tj|d< || d }tj	|}	t
|	| ||	 || q|d   ddd	}
|
| t|} |s_J d
dd jd g} fdd|D }tj||d}tjj fdd|D | jjd}||sJ tj|dd t||djd d d df }| t   d }tj	|}t
|| d S )Nr|   r>   seeduint32.parquetz_SUCCESS.crcTc                 [   s    t j| fi |}|j||dS )N)r-   use_threads)r   r.   r/   )pathsr-   r  r   r2   r$   r$   r%   read_multiple_files  s   z5test_read_multiple_files.<locals>.read_multiple_filesr   r      r   c                    s   g | ]}  |jqS r$   )fieldr   r   r#   r$   r%   ro         z,test_read_multiple_files.<locals>.<listcomp>r,   c                    s   g | ]}  |qS r$   )r   r   r  r$   r%   ro         )namesmetadata)r  rd   )NT)r	   r   r   r   r   rM   int64r   r   r   r   r   touchconcat_tablesr   num_columnsr   r   from_arraysschemar
  iloc)r    nfilessizedirpath	test_datar  r   r]   r"   r   r  r   to_read	col_namesout	bad_applebad_apple_pathtr$   r  r%   test_read_multiple_files{  s@   




r  c                    s(  d}d}| t   }|  g }g }g }t|D ]:}t||d}t|| |d | |_d|j_|| d }	tj	
|}
t|
|	 ||
 || ||	 qt|}ddg |j d }t fd	d
|D }t|| |jt d }|j|jksJ t|j|jd| d S )Nr>   r   r   rg   r   uint8stringsr,   c                    s   g | ]}|  qS r$   r$   rl   r,   r$   r%   ro     r   z,test_dataset_read_pandas.<locals>.<listcomp>)r	   r   r   r   rM   rR   rg   r   r   r   r   r   r   r   r.   r   rT   rK   concatr   r   r   rW   r   r-   )r    r  r  r  r  framesr  r   r]   r"   r   r2   r#   r   r$   r,   r%   test_dataset_read_pandas  s2   




r!  c                 C   sX   | t   }|  tddd}|d }t||dd tj|dd}| |s*J d S )	Nr|   r   r   	0.parquet2.6versionT)
memory_map)r	   r   r   r   r   r.   r/   r   )r    r  r   r"   r2   r$   r$   r%   test_dataset_memory_map  s   
r'  c                 C   s   | t   }|  tddd}|d }t||dd tt tj|dd W d    n1 s1w   Y  d	D ]}tj||d}|	 
|sJJ q8d S )
Nr|   r   r   r"  r#  r$  i)buffer_size)   i   )r	   r   r   r   r   ry   r   r   r.   r/   r   )r    r  r   r"   r(  r2   r$   r$   r%   #test_dataset_enable_buffered_stream  s    
r*  c                 C   s~   | t   }|  tddd}|d }t||dd dD ] }tj||d}| |s.J tj||d}||s<J qd S )	Nr|   r   r   r"  r#  r$  )TF)
pre_buffer)	r	   r   r   r   r   r.   r/   r   r   )r    r  r   r"   r+  r2   actualr$   r$   r%   test_dataset_enable_pre_buffer  s   
r-  r|   r>   c                 C   sN   g }g }t |D ]}t||d}| | d }|t|| || q|S )Nr   r   )r   r   r   r   )r1   r  
file_nrowsr  r  r   r   r"   r$   r$   r%   _make_example_multifile_dataset  s   r/  c                 C   s(   dd |D }t |t | jksJ d S )Nc                 S   s   g | ]}t | qS r$   )r   as_posix)rm   r"   r$   r$   r%   ro   )  r  z)_assert_dataset_paths.<locals>.<listcomp>)r   files)r2   r  r$   r$   r%   _assert_dataset_paths(  s   r2  
dir_prefix_.c                 C   sJ   | t   }|  t|ddd}|| d   t|}t|| d S )Nr|   r>   r  r.  stagingr	   r   r/  r   r.   r2  r    r3  r  r  r2   r$   r$   r%   test_ignore_private_directories-  s   

r:  c                 C      | t   }|  t|ddd}|d d}|d W d    n1 s'w   Y  |d d}|d W d    n1 sCw   Y  t|}t|| d S )Nr|   r>   r6  z	.DS_Storewbs	   gibberishz.privater	   r   r/  openwriter   r.   r2  r    r  r  r   r2   r$   r$   r%   test_ignore_hidden_files_dot>     

rA  c                 C   r;  )Nr|   r>   r6  _committed_123r<  s   abcd_started_321r=  r@  r$   r$   r%   #test_ignore_hidden_files_underscoreQ  rB  rE  c                 C   sZ   | | d t   }|jdd t|ddd}t|}t|| t|}t|| d S )NdataTparentsr|   r>   r6  r8  r9  r$   r$   r%   /test_ignore_no_private_directories_in_base_pathd  s   


rI  c                 C   s   dgd dgd  }t jt tt|t | gddgd}tj|t| dgd | d }|	  tj|t|dgd tj
| d	gd
}||sNJ d S )Nxxxr   yyyrg   _partr	  partition_cols_private_duplicate_private)ignore_prefixes)r   r   rN   r   r   dictionary_encoder   write_to_datasetr   r   r   r   )r    partr   private_duplicater/   r$   r$   r%   test_ignore_custom_prefixesw  s"   rW  c                 C   sB   | d }|   t|}| }|jdksJ |jdksJ d S )Nr2   r   )r   r   r.   r/   rX   r  )r    	empty_dirr2   r#   r$   r$   r%   test_empty_directory  s   
rY  c                 C   s  dd l }dd lm} dd lm} |tdtdttdtj	gd tj
ddddd	d
}|j }ddg}	tjj||ddd}
|j|
| |	|d tjt| d}|d urw||d}||
j| W d    n1 sqw   Y  n||
j| |j| |d}t|jj}|t|
jjksJ | }| }|j }|	|dt|	 d  ksJ || }|	D ]}|| d||< q|r|dj ! }|d ||d< |"|| d S )Nr   
aaabbbbccc
eefeffgeeer|   
2017-01-01
2017-01-11datetime64[D]r;   datetime64[ns])group1group2numnanr   r`  ra  F)r  safepreserve_indexr   _common_metadatar<  r   r   )#pandaspandas.testingtestingpyarrow.parquetparquetrL   listr   rM   rc  rR   r   r-   tolistr   r   r   rT  osr"   r   r   r>  write_metadatar  r.   r   r	  r/   rT   r   r  r   to_pandas_dtyper   )r1   r   r  
index_namerK   r   r   	output_dfcolspartition_byoutput_tablemetadata_pathr   r2   dataset_colsinput_tableinput_dfinput_df_colscolexpected_date_typer$   r$   r%   &_test_write_to_dataset_with_partitions  sV   




r~  c              
   C   s  dd l }dd lm} |tdtdttdtjddddd	d
}|j	
 }tj|}|d u r8t }nt|tsCtt|}d}t|D ]
}|j|| |d qItt| ddd}	||	}
dd |
D }t||ksqJ |j| |d }| }| }|| }t|| d S )Nr   rZ  r[  r|   r\  r]  r^  r;   r_  )r`  ra  rb  r   r>   r   FT)allow_not_found	recursivec                 S   s   g | ]
}|j d r|qS )r   )r"   endswith)rm   infor$   r$   r%   ro     s    z8_test_write_to_dataset_no_partitions.<locals>.<listcomp>)rh  rk  rl  rL   rm  r   rM   rR   r   r-   rn  r   r   r   r   r   r   r   r   rT  r   r   r   r   r.   r/   rT   drop_duplicatesr   r   )r1   r   rK   r   rs  rt  rv  nr   selectorinfosoutput_filesry  rz  r$   r$   r%   $_test_write_to_dataset_no_partitions  sH   




r  c                 C      t t|  d S r'   r~  r   r    r$   r$   r%   %test_write_to_dataset_with_partitions     r  c                 C   sr   t t jdt  dt jdt  dt jdt  dt jdt  dt jdt jdddg}tt| |d	 d S )
Nr`  )r   ra  rb  rc  r   us)unitr  )	r   r  r  r8   r  int32	timestampr~  r   )r    r  r$   r$   r%   0test_write_to_dataset_with_partitions_and_schema  s   
r  c                 C   s   t t| dd d S )Nrr  )rr  r  r  r$   r$   r%   4test_write_to_dataset_with_partitions_and_index_name  s   
r  c                 C   r  r'   )r  r   r  r$   r$   r%   #test_write_to_dataset_no_partitions  r  r  c                 C   s   t | d  t| d  d S )Ntest1test2)r~  r  r  r$   r$   r%   test_write_to_dataset_pathlib  s   r  c                 C   s   |\}}t jtdd t| d |d W d    n1 sw   Y  t jtdd t| d |d W d    d S 1 s>w   Y  d S )Nz"path-like objects are only allowedr   r  r   r  )r   ry   r   r~  r  )r    r   r   r4  r$   r$   r%   &test_write_to_dataset_pathlib_nonlocal#  s   "r  win32z,test fails because of unsupported characters)rz   c                 C      | \}}t ||d d S Nr   )r~  r   r$   r$   r%   *test_write_to_dataset_with_partitions_s3fs2  s   
r  c                 C   r  r  )r  r   r$   r$   r%   (test_write_to_dataset_no_partitions_s3fs>  s   
r  c                 C   sT   t dg di}tj|}t| }tj||t d t	|}|
|s(J d S )Nr   r   r   )rK   rL   r   r   r   r   r   rT  r   r   r   )r    r]   r   r"   r#   r$   r$   r%    test_write_to_dataset_filesystemG  s   
r  d   c                 C   s   | d }t  }tjt|tj|dddgd}tj	|}d}t
||j}t|D ]}|| q.W d    n1 s@w   Y  t
|}	|	jj|ksRJ | d }
|t|
}t
|j| W d    n1 sow   Y  t
j| |d}|S )	Nr   )rg   r+   rg   r+   r,   r   	_metadatar   )r   rK   rL   rM   rR   r   r   r   r   r   r   ParquetWriterr  r   r   ParquetFiler
  num_row_groupsr   r   rp  r.   )r    ru   r"   r)   r]   r   
num_groupswriterr   readerrw  r   r2   r$   r$   r%   _make_dataset_for_picklingR  s2   

r  c                    s$    fdd}t | }||sJ d S )Nc                    s   |    | kS r'   )loadsdumps)objpickle_moduler$   r%   is_pickleablep  s   z*test_pickle_dataset.<locals>.is_pickleable)r  )r    r  r  r2   r$   r  r%   test_pickle_datasetn  s   r  c                 C   sl   | d }t g dg dg dd}tj|}tj|t|ddgd t|	 }t
||d	  d S )
Nz
ARROW-3208)rg  r|   g      @r    r   g333333=@)rg  r|   r   r  r  r   r}   )r   r   r   r   r   r   r   )onetwothreer  r  )	root_pathrO  zoutput.parquet)rK   rL   r   r   r   r   rT  r   r.   r/   r   )r    r"   r]   r   r$   r$   r%   test_partitioned_datasetw  s   r  c                 C   s(  | d }t jdd tdD d gdgd}t jdd tdD d gdgd}tj|t|d	 tj|t|d	 tj|dgd
 }|d d	 |d d	 g}|d j
dks_J |d d|d d}}||d r||d s~J d S ||d sJ ||d sJ d S )NzARROW-3325-datasetc                 S      g | ]}t d qS r|   r   randsr   r$   r$   r%   ro     r  z0test_dataset_read_dictionary.<locals>.<listcomp>r>   r|   f0rM  c                 S   r  r  r  r   r$   r$   r%   ro     r  )r  )read_dictionaryr   r   r   )r   r   r   r   rT  r   r.   r/   chunkrS  
num_chunksr   )r    r"   t1t2r#   	ex_chunksc0c1r$   r$   r%   test_dataset_read_dictionary  s&   $$r  c                 C   s   t dt g dt  i}t|| d  t|| d  t dg}tj| d |d}t jdg di|d}||s@J tj| |d}t jdg di|d}||sYJ tj	| |d}t jdg di|d}|
 |stJ d S )Nr   r   zdata1.parquetzdata2.parquet)r   r  r  )r   r   r   r   r   r   )r   r   rN   r  r   r   r  r   r   r.   r/   )r    r   r  r#   r   r$   r$   r%   test_read_table_schema  s   r  c                 C   s   t t g dt  t g dt  d}t|| d  tj| d ddgd}t ddg}|j	ddgks;J |j|ksBJ d S )Nr   r   r   r   r,   )r   r  )
r   r   rN   r  r  r   r   r   r  r0   )r    r   r#   expected_schemar$   r$   r%   *test_read_table_duplicate_column_selection  s   r  c                 C   s   dd l m} | d }|d d d jdd tdg d	i}t|t|d d d d
  |jg dd}tj	t||d}|j
g dksIJ tjt||d }|j
g dks]J d S )Nr   test_partitioning20121001TrG  r   r   r   )yearmonthday)field_names)partitioning)r   r  r  r  )pyarrow.datasetr2   r   r   r   r   r   r   r  r   r0   r.   r/   )r    dsr  r   rU  r#   r$   r$   r%   test_dataset_partitioning  s$   r  c                 C   sZ   t dg di}t|| d  tt| t }tjd|d}| }|	|s+J d S )Nr   r   r   r5  r   )
r   r   r   r   r   r   r   r.   r/   r   )r    r   r   r2   r#   r$   r$   r%   #test_parquet_dataset_new_filesystem  s   r  c                 C   st   t d}|d}tdg di}t|| d  t| dd}tj	||d}|d	 }|j
d
 j|ks8J d S )Nfsspecfiler   r   r   \r   r   z/data.parquetr   )r   importorskipr   r   r   r   r   r   replacer.   	fragmentsr"   )r    r  r   r   r"   r2   r   r$   r$   r%   6test_parquet_dataset_partitions_piece_path_with_fsspec  s   

r  c                    s   t dg di}| d }g   fdd}d}tj||dg||d |d d	 |d
 d	 |d d	 h}tttj }||ksAJ d S )Nr   r   r  c                    s     | j d S r'   )r   r"   )written_filepaths_writtenr$   r%   file_visitor  s   zDtest_parquet_write_to_dataset_exposed_keywords.<locals>.file_visitorzpart-{i}.parquet)r  r  basename_template1zpart-0.parquet23)r   r   r   rT  r   rr   pathlibPath)r    r   r"   r  r  expected_pathspaths_written_setr$   r  r%   .test_parquet_write_to_dataset_exposed_keywords  s   


r  write_dataset_kwarg))r   T)r   Fc                 C   s   ddl m} tdg di}| d }t|j}|\}}|ttjj	vs(J ||j	v s/J t
jj|ddd%}tj||fi ||i |jd \}	}
}|| |ksUJ W d   dS 1 s`w   Y  dS )	zEVerify kwargs in pq.write_to_dataset are passed onto ds.write_datasetr   Nr   r   zout.parquetwrite_datasetT)autospec)r  r2   r   r   inspect	signaturer  r   rT  
parametersmockpatchrQ   
mock_calls)r    r  r  r   r"   r  keyargmock_write_dataset_name_argsr   r$   r$   r%   #test_write_to_dataset_kwargs_passed  s   "r  c                 C   s   t t jg dg ddg dd}t|}| d }tj|| d dgd d	d
 | D }t|dks8J d|vs>J d S )N)r   r5   r   r4   r   r   )catr|  r2   r  rN  c                 S   s   g | ]	}|  r|jqS r$   )is_dirr   )rm   r   r$   r$   r%   ro   5  r   z;test_write_to_dataset_category_observed.<locals>.<listcomp>r   zcat=c)	rK   rL   r   r   r   r   rT  iterdirr   )r    r]   r   r"   subdirsr$   r$   r%   'test_write_to_dataset_category_observed'  s   
r  )r|   r>   )NNNr'   )r  )nr   r  ro  r  sysnumpyrM   ImportErrorr   unittest.mockr  pyarrowr   pyarrow.computecomputepcr   r   r   r   r   r   r   pyarrow.testsr   pyarrow.utilr	   rk  rl  r   pyarrow.tests.parquet.commonr
   r   r   r   rh  rK   ri  rj  r   markr2   
pytestmarkr&   r*   r3   ra   rw   xfailr   AssertionErrorr   r   r   r   r   r   parametrizer  castr  r   r   s3r   r   r   r   r(   rS   r   r   r  r!  r'  r*  r-  r/  r2  r:  rA  rE  rI  rW  rY  r~  r  r  r  r  r  r  r  skipifplatformr  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r$   r$   r$   r%   <module>   s0   


F
!*

!
.
(







'.

G
%






>
.












