o
    R
iT                    @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlmZ d dlmZ zd dlZW n eyI   dZY nw d dlZd dlZd dlmZ d dlZd dlZd dlmZ d dlZd dlmZ d dlm Z m!Z!m"Z"m#Z#m$Z$ zd dl%Z&W n ey   dZ&Y nw zd dl'm(Z) W n ey   dZ)Y nw zd dl*m+Z, W n ey   dZ,Y nw ej-j(Z.G dd dZ/dd	 Z0d
d Z1dd Z2ej3dd Z4ej3dd Z5ej3dd Z6ej3dddd Z7ej3dd Z(ej3ddgddgddd  Z8ej-j+d!d" Z9d#d$ Z:ej-j+d%d& Z;d'd( Z<d)d* Z=ej-j+d+d, Z>ej-j+d-d. Z?ej-j+d/d0 Z@ej-j+d1d2 ZAej-j+d3d4 ZBej-j+d5d6 ZCd7d8 ZDd9d: ZEd;d< ZFej-Gd=g d>d?eHd@eIdAeIfdBdCZJdDdE ZKdFdG ZLej-j+dHdI ZMej-j+dJdK ZNej-j+dLdM ZOdNdO ZPdPdQ ZQej-GdRejRdSddTdUdVggej-GdWddgej-j+dXdY ZSej-j+dZd[ ZTej-j+ej-jUd\d] ZVd^d_ ZWd`da ZXej-j+dbdc ZYej-j+ddddeZZej-j+dfdg Z[ej-j%ej-j+dhdi Z\ej-j+djdk Z]ej-j+dldm Z^ej-j+dndo Z_ej-j%ej-j+dpdq Z`ej-j+drds Zaej-j+dtdu ZbddvdwZcej-j%ej-j+dxdy Zdej-j+dzd{ Zeej-j%ej-j+d|d} Zfej-j+d~d Zgej-j+dd Zhej-j+dd Ziej-j+dd Zjej-j+dd Zkej-j+dd Zlej-j%ej-j+dd Zmej-j+ej-Gddd dd gdd Znej-j+ej-Gdddgej-Gddd dd gdd Zoej-Gddd dd gdd Zpej-Gddd dd gdd Zqdd Zrdd Zsej-j+ej-j%dd Ztdd Zudd Zvdd Zwdd Zxdd Zydd Zzdd Z{ej-j+dd Z|dd Z}dddZ~dd Zdd Zdd Zej-j+dd Zej-j+dd Zej-j+ddĄ Zej-j+ddƄ Zej-j+ddȄ Zej-j+ddʄ Zej-j+dd̄ Zej-j+dd΄ Zej-j+ddЄ Zej-j+dd҄ ZddԄ Zddք Zdd؄ Zddڄ Zej-Gdddgdd݄ Zdd߄ Zej-j+dd Zej-j+dd Zej-j+dd Zej-j+dd Zdd Zdd Zej-j+ej-Gdddgej-Gdddgej-Gdddgej-Gdg dg dfg dg dfg dg dfg dg dfg dg dfg dg dfg dg dfgdd Zej-j%dd Zej3dd Zej-j+ej-jUdd  Zej-j+ej-jUdd Zej-j+ej-jUdd Zej-j+ej-jUdd Zej-j+dd Zej-j+d	d
 Zej-j%dd Zej-j+dd Zej-j+dd Zej-j+dd Zdd Zdd Zdd Zdd Zej-j+dd Zej-j+dd Zdd  Zej-jd!d" Zej-jd#d$ Zd%d& Zej-jd'd( Zej-j%d)d* Zej-j%ej-Gd+g d,d-d. Zd/d0 Zd1d2 Zd3d4 Zej-j%d5d6 Zej-j%d7d8 Zej-j%d9d: Zd;d< Zd=d> Zd?d@ Zej-j%ej-Gd+g dAdBdC ZdDdE Zej-j+ej-j%dFdG Zej-j+ej-j%ej-jejdHkdIdJdKdL Zej-j+ej-j%dMdN Zej-j+dOdP Zej-j+ej-j%dQdR ZĐdSdT ZŐdUdV Zej-j+ej-j%dWdX Zej-j+ej-j%dYdZ Zej-j+ej-j%d[d\ Zej-j+ej-j%d]d^ Zej-j+d_d` Zej-j+ej-j%dadb Zej-j+ej-j%dcdd Z͐dedf Zej-j%ej-j+dgdh Zej-j+ej-j%didj ZАdkdl Z	ddmdnZej-j+dodp Zej-j+ej-j%dqdr ZԐdsdt ZՐdudv Z֐dwdx Zej-j+ej-j	dydz Zؐd{d| Zej-j%d}d~ ZڐdddZېdd Zܐdd Zej-j+dd Zej-j+dd Zej-j+dd Zej-j+dd Zej-j+ej-j%dd Zej-j+ej-j%dd Zej-j+ej-j%dd Zdd Zdd Zdd Zdd Zdd Zej-jej-j+dd Zdd Zej-j+dd Zej-j+dd Zej-j+ej-j%dd Zdd Zej-j+dd Zej-j+ej-jUdd ZdZej-j+ej-jUdd Zej-j+dd Zej-j(dd Zej-j(dd Zej-j(dd Zej-j(dd Zej-j(dd Zej-j(dd Zej-j(dd Zej-GdÐdĐdgdƐdǄ Zej-GdÐdĐdgdȐdɄ Zdʐd˄ Zd̐d̈́ Zej-GdΐdϡdАdф Z dҐdӄ Zej-j+dԐdՄ Zd֐dׄ Zdؐdل Zej-GdÐdĐdgdڐdۄ Zdܐd݄ Zdސd߄ Zdd ZdS (      N)copytreequote)is_threading_enabled)FSProtocolClassProxyHandler_configure_s3_limited_user_filesystem_uri
change_cwdc                   @   s   e Zd Zdd ZdddZdS )TableStreamWrapperc                 C   s
   || _ d S Ntable)selfr    r   N/home/ubuntu/.local/lib/python3.10/site-packages/pyarrow/tests/test_dataset.py__init__F      
zTableStreamWrapper.__init__Nc                 C   s   | j |S r   )r   __arrow_c_stream__)r   requested_schemar   r   r   r   I      z%TableStreamWrapper.__arrow_c_stream__r   )__name__
__module____qualname__r   r   r   r   r   r   r   E   s    r   c                 C   s~   dd l }dd l}| ddd}|jdd}|g d}g }t| D ]}|||t|t|f ||7 }q"tj	|g ddS )	Nr   i        )days)greenblueyellowredorange)dateindexvaluecolorcolumns)
datetime	itertools	timedeltacyclerangeappendfloatnextpd	DataFrame)nr(   r)   dayintervalcolorsdatair   r   r   _generate_dataM   s   
r8   c              
   C   s\   t t dt  t dt  t dt  t dt  g}t jj| |dd}|	 S )Nr"   r#   r$   r%   F)schemapreserve_index)
par9   fielddate32int64float64stringTablefrom_pandasreplace_schema_metadata)dfr9   r   r   r   r   _table_from_pandas]   s   rE   c              	   C   sx   |   D ]5}| '}t|tjsJ |jrJ | sJ | s$J | r*J W d    n1 s4w   Y  qd S r   )	get_fragmentsopen
isinstancer;   
NativeFileclosedseekablereadablewritable)datasetfragmentnfr   r   r   +assert_dataset_fragment_convenience_methodsh   s   

rQ   c            
      C   s$  t  } ddg}t|D ]\}}| d| d}| | | |e}ttdttttdttt	td|gd dd tdD g}t
dt
 fd	t
 fd
t
 fdt
 fdt
t
 t
 dfg}t
j||d}t
j|g}	t|	| W d    n1 sw   Y  q| S )Nzsubdir/1/xxxzsubdir/2/yyyz/file.parquetr   c                 S   "   g | ]}|d  t |d  dqS    abstr).0jr   r   r   
<listcomp>      " zmockfs.<locals>.<listcomp>i64f64rZ   conststructrV   r9   )fs_MockFileSystem	enumerate
create_diropen_output_streamlistr,   mapr.   rZ   r;   r9   r>   r?   r@   rb   record_batchrA   from_batchespqwrite_table)
mockfsdirectoriesr7   	directorypathoutr6   r9   batchr   r   r   r   ro   s   s6   





ro   c               	   C   s  t  } ttjddgt dtjtjddgt dddgd}ttjdd	gt dtjtjddgt	 dd
dgd}| 
d d}| |}t|| W d   n1 scw   Y  d}| |}t|| W d   n1 sw   Y  | ||fS )z
    Creates a _MockFileSystem with two parquet files that have promotable schemas.
    - file1: value: int8, dictionary: dictionary<int8, string>
    - file2: value: uint16, dictionary: dictionary<int16, string>
    r      typer   rW   rX   r$   
dictionaryrU      dcz
subdir/zzzzsubdir/zzz/file1.parquetNzsubdir/zzz/file2.parquet)rd   re   r;   r   arrayint8DictionaryArrayfrom_arraysuint16int16rg   rh   rm   rn   )ro   table1table2path1rs   path2r   r   r   promotable_mockfs   s2   

r   c                    sx   ddl m}m} ddlm} |   fddt  fdd}| |d	| || }tjfd
d}||fS )Nr   )LocalFileSystemPyFileSystemr   )r   c                    s    fdd| D S )Nc                    s   h | ]	}  t|qS r   )normalize_pathrZ   r[   plocalfsr   r   	<setcomp>       z6open_logging_fs.<locals>.normalized.<locals>.<setcomp>r   )pathsr   r   r   
normalized   s   z#open_logging_fs.<locals>.normalizedc                    s$     t|}| | j|S r   )r   rZ   add_fsopen_input_file)r   rr   )r   openedr   r   r      s   
z(open_logging_fs.<locals>.open_input_filer   c              	   3   sB       zd V  W   | ksJ d S   | ks J w r   )clear)expected_opened)r   r   r   r   assert_opens   s
   .z%open_logging_fs.<locals>.assert_opens)	
pyarrow.fsr   r   test_fsr   setsetattr
contextlibcontextmanager)monkeypatchr   r   r   r   rd   r   r   )r   r   r   r   open_logging_fs   s   r   module)scopec              	      s  | j jd | j jd td t }t  fddtdd D \}}}|d tt	fd	dtdd
 D D ]'\}}d| d}|
|}tt|| W d    n1 skw   Y  qI|d ||jjj|jgD ]7\}	}d|	d  d|	d  }
|
 d}||
 |
|}tt|| W d    n1 sw   Y  q|d ||jjj|jjjgD ]7\}	}d|	d  d|	d  }
|
 d}||
 |
|}tt|| W d    n1 sw   Y  q|d |dD ]2\}	}d|	 }
|
 d}||
 |
|}tt|| W d    n	1 s9w   Y  q|S )Npandasparquet  c                    "   g | ]} j ||d    qS )rz   ilocr[   r7   )rD   r2   r   r   r]      r^   z!multisourcefs.<locals>.<listcomp>r   rz   plainc                    r   
   r   r   )df_ar2   r   r   r]      r^   r   zplain/chunk-rR   r9   zschema//r   z/chunk.parquethivez
hive/year=z/month=
hive_colorr%   zhive_color/color=)configpyarrowrequiresr8   rd   re   lenr,   rg   rf   rh   rm   rn   rE   groupbyr"   dt	dayofweekr%   yearmonth)requestro   df_bdf_cdf_dr7   chunkrr   rs   partfolderr   )rD   r   r2   r   multisourcefs   sT   (
,



"





r   c              
   C   sf   t  }tjddd}t d}t ttdt	 tdt
 g|_t | |||}| S )NsubdirT	recursivegroupkey)dsParquetFileFormatrd   FileSelectorFileSystemFactoryOptionsDirectoryPartitioningr;   r9   r<   int32r@   partitioningFileSystemDatasetFactoryfinish)ro   formatselectoroptionsfactoryr   r   r   rN     s   
rN   TFthreadedserial)paramsidsc                    s   | j  G  fddd}| S )z]
    Fixture which allows dataset scanning operations to be
    run with/without threads
    c                       sT   e Zd Z fddZ fddZdd Zdd Zd	d
 Zdd Zdd Z	dd Z
dS )zdataset_reader.<locals>.readerc                    s
    | _ d S r   use_threads)r   r   r   r   r   +  r   z'dataset_reader.<locals>.reader.__init__c                    s   d|v rt d |d< d S )Nr   z9Invalid use of dataset_reader, do not specify use_threads)	Exception)r   kwargsr   r   r   _patch_kwargs.  s
   z,dataset_reader.<locals>.reader._patch_kwargsc                 [      |  | |jdi |S Nr   )r   to_tabler   rN   r   r   r   r   r   5     
z'dataset_reader.<locals>.reader.to_tablec                 [   r   r   )r   
to_batchesr   r   r   r   r   9  r   z)dataset_reader.<locals>.reader.to_batchesc                 [   r   r   )r   scannerr   r   r   r   r   =  r   z&dataset_reader.<locals>.reader.scannerc                 [      |  | |j|fi |S r   )r   head)r   rN   num_rowsr   r   r   r   r   A     
z#dataset_reader.<locals>.reader.headc                 [   r   r   )r   take)r   rN   indicesr   r   r   r   r   E  r   z#dataset_reader.<locals>.reader.takec                 [   r   r   )r   
count_rowsr   r   r   r   r   I  r   z)dataset_reader.<locals>.reader.count_rowsN)r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   reader)  s    r   )param)r   r   r   r   r   dataset_reader  s   	$r   c                    sX  t t dt  g}t  ddg}dd tddD } fddt||D }td	td
k}tj	|| |d}tj	j
|| ||d}||fD ]}t|tj	sYJ t|jtjsbJ |j|sjJ t|jt|ksuJ t| }t|||D ]q\}	}
}|	j|
sJ |	j|ksJ t|	jtjsJ t|	tjsJ |	jdgksJ |	jdksJ t|	 }|	jt|  krdksJ  J t|d tjsJ |d j|ksJ |d jdgksJ |d jdksJ qt|jtddkd}t|dks	J qOtj	|| d}|jtdsJ tj	j
|| d}|jtds5J | D ]}	|	jtdsGJ q9tjtdd t	| | W d    n	1 scw   Y  tjtdd tj	|| dd W d    n	1 sw   Y  tjtdd tj	j
| d W d    d S 1 sw   Y  d S )Nra   subdir/1/xxx/file0.parquetsubdir/2/yyy/file1.parquetc                 S   s   g | ]	}t d |kqS )r   r   r<   r[   xr   r   r   r]   W  r   z+test_filesystem_dataset.<locals>.<listcomp>r   rU   c                    s   g | ]\}}  ||qS r   make_fragment)r[   rr   r   file_formatro   r   r   r]   X  s    leveli9  )r9   r   
filesystemroot_partition)r9   r   r   
partitionsr   r   filterru   r9   r   r   Tzincorrect typematch)r9   r   r   r   )r;   r9   r<   r>   r   r   r,   zipscalarFileSystemDataset
from_pathsrH   r   partition_expressionequalsr   filesri   rF   rr   ParquetFileFragment
row_groupsnum_row_groupssplit_by_row_groupr   pytestraises	TypeError)ro   r9   r   r   	fragmentsr   dataset_from_fragmentsdataset_from_pathsrN   rO   	partitionrr   row_group_fragmentsr   r   r   test_filesystem_datasetP  sx   "$r  c                 C   s   t t dt  g}t }dg}tjj|||t	 d}|
  tt | | W d    d S 1 s9w   Y  d S )Nf1znonexistingfile.arrowr   )r;   r9   r<   r>   r   IpcFileFormatr  r  rd   r   rF   r  r  FileNotFoundErrorr   )r   r9   r   r   rN   r   r   r   1test_filesystem_dataset_no_filesystem_interaction  s   "r  c                 C   s*  t | tjsJ t | jtjsJ td}tjt	dd | j
|d W d    n1 s.w   Y  tdd }tjt	dd | j
|d W d    n1 sRw   Y  tjg dt d}tjg dt d}|| D ]}t |tjs|J |d|sJ |d|sJ qr||  D ]}t |tjsJ t |jtjsJ q|
| }t |tjsJ t|d	ksJ tddk}| j
d
|d}	|	d }	|	d ddgksJ |	d ddgksJ t|	d ddgksJ t|	d ddgksJ tddk}| j
d
|d}	|	d }	|	d g dks#J |	d g dks.J |	d g dks9J |	d g dksDJ tdtdtddkd}
| j
d
|
d}	|	d }	t|	g dksnJ |	d g dksyJ |	d g dksJ |	d g dksJ t|  d S )Nr_   zmust evaluate to boolr   r   r   r   r   ru   rU   rz   rv   r   r   T)r   r   r   r`         ?ru   r   xxxyyy)rb   rX   1)r   rz   r   rz   )r        @r  r  )r   r   ru   ru   )r  r  r  r  )r_   r`   new)r   r'   )
r   r   r   r   ru   ru   rU   rU   rz   rz   )
        r!  r  r         @r"        @r#  r  r  r   )
FFTTFFFFTT)rH   r   Datasetr9   r;   Schemar<   r  r  r  r   r}   r>   r?   r   RecordBatchcolumnr  r   scan_batchesTaggedRecordBatchrO   FragmentrA   r   sort_by	to_pydictsortedri   rQ   )rN   r   non_boolean_exprnon_boolean_expr2expected_i64expected_f64rt   r   	conditionresult
projectionr   r   r   test_dataset  s^   

r5  c                 C   sn  | \}}}t |||gt  }tjtjdd |  W d    n1 s'w   Y  |jdd}tt	dt
 t	dtt t g}||sRJ ||}| }|jdd ttjd	d
gddggt
 dttjtjdd	gt dddgtjtjd	dgt dddggd}	||	sJ |jdd	d}
|
	djt
 ksJ d S )NzCUnable to merge: Field value has incompatible types: int8 vs uint16r   
permissivepromote_optionsr$   ry   Tfullr   ru   rU   rz   rv   r   rW   rX   r{   r|   rx   )r8  r  )r   r   r   r  r  r;   ArrowTypeErrorinspectr9   r<   r   ry   r   r@   r  r   r   validater   chunked_arrayr   r   r}   rw   )r   ro   r   r   r   r9   expected_schemarN   r   expected_tableinspected_schema_one_fragr   r   r   -test_dataset_factory_inspect_schema_promotion  sJ   


rB  c                 C   s   | \}}}t |||gt  }tjtdd |jdd W d    n1 s(w   Y  tjtdd |jdd W d    n1 sEw   Y  tjtdd |jd	d W d    d S 1 scw   Y  d S )
Nz#Invalid promote_options: bad_optionr   
bad_optionr7  z<Fragment count must be a non-negative int or None; got 'one'one)r  z9Fragment count must be a non-negative int or None; got -1)r   r   r   r  r  
ValueErrorr<  )r   ro   r   r   r   r   r   r   'test_dataset_factory_inspect_bad_params  s"   
"rG  c                 C   s(   | j ddd}t|}|jdksJ d S )N      )fragment_readaheadbatch_readahead   )r   r/   num_columns)rN   r   rt   r   r   r   test_scanner_options1  s   rN  c           	      C   sV  |j | t d}t|tjsJ ttj |j | dgd W d    n1 s*w   Y  |j | dgt d}|j	| j
ksBJ |jt
dt fgksQJ t|tjsYJ | }| D ]}|j
|jkskJ |jdksrJ qa||  ks}J |j
|jksJ t|jD ]}t|g}||||ksJ qttj |t|jg W d    n1 sw   Y  |j| ksJ |j | g dt d}| }g d}|j|ksJ |d}|d	  d
gd dgd  ksJ |d  dgd dgd  ksJ |d  dgd ksJ |d  dgd ks)J d S )N)memory_poolunknownr&   r_   )r'   rO  r   )
__filename__fragment_index__batch_index__last_in_fragmentrR  rQ  r   r   r   r   rS  r   rT  T)r   r;   default_memory_poolrH   r   Scannerr  r  ArrowInvaliddataset_schemar9   projected_schemar>   r   r   rM  	to_readerread_allr,   r   r}   r   ArrowIndexErrorr   column_namesr+  	to_pylist)	rN   r   r   r   rt   r7   r   expected_namessorted_tabler   r   r   test_scanner8  sR   


& ra  c              	   C   sd   t  }t  }t | z| }tj| }| }| |ks$J W t | d S t | w r   )	r;   rU  system_memory_poolset_memory_poolbytes_allocatedr   rV  from_datasetr   )rN   old_poolpoolallocated_beforer   _r   r   r   test_scanner_memory_pooli  s   
rj  c                 C   s  | | d}|tjjg | jdksJ |j | ddgd }|ddgiks'J |j | ddgtddkd }|dddgiksBJ |j | d	dgd }|dtt	d
d iks[J t
|  }|j ddgd }|ddgikstJ |j d	dgd }|dtt	d
iksJ d S )Nr   rc   r   r_   r&   ru   r'   r   rU      r   )r   r;   rA   rl   r9   r,  r   r<   ri   r,   r/   rF   )rN   r   r3  rO   r   r   r   	test_head|  s"   rm  c                 C   s
  t |  }ddgtddgfD ]}|||}||||ks%J qtt ||tdg W d    n1 s@w   Y  ddgtddgfD ]}|| ||| |ksbJ qPtt || tdg W d    d S 1 s~w   Y  d S )Nr   rU   r   rL  r   )	r/   rF   r;   r}   r   r   r  r  
IndexError)rN   r   rO   r   expectedr   r   r   	test_take  s    
"rp  c                 C   s   t |  }||dksJ |j|tddkddksJ || dks(J |j| tddkddks8J |j| tddkddksHJ |j| tdd	k dd	ksXJ d S )
Nr   r_   rz   r   r   r   r   rU   r   )r/   rF   r   r   r<   )rN   r   rO   r   r   r   test_count_rows  s    $rq  c               	   C   sN   t jt jt jg} | D ]}tt |  W d    n1 sw   Y  q
d S r   )r   
FileFormatrV  Partitioningr  r  r  )classesklassr   r   r   test_abstract_classes  s   rv  c                  C   sD  t t dt  t dt  g} tjtjtjfD ]}|| }t	|tj
s)J ||| ks1J |dks7J qt t dt  t dt  g} t| }t|jdksYJ tdd |jD seJ |d	}t	|tjsrJ tdd
ktddk@ }||sJ tt j |d W d    n1 sw   Y  |d}tdd
k}||sJ |tj| ddksJ t t dt  t dt  g} tj| dd}t|jdksJ tdd |jD sJ |d}tdtdktdtd
k@ }||sJ |d}td tdtd
k@ }||s.J dD ]}tt j || W d    n	1 sIw   Y  q0|tj| ddks\J t t dt  t dt  g} t| }t|jdks~J tdd |jD sJ |d}t	|tjsJ tdd
ktddk@ }||sJ tt j |d W d    n	1 sw   Y  |tj| ddksJ t t dt  t dt t  t  g} tj| dt g did}|jd d u sJ |jd  g dksJ |tj| d dks"J tjt t dt  t dt t  t  gdt g did}|jd d u sQJ |jd  g dks_J t jt td t d!d td D t d"gd# d$gd#  gg d%d&}t d't  fg}tjtjtjfD ]9}t )}||}tj||d(|d) tj |d(|d)}	|	! }
|
|sJ W d    n	1 sw   Y  qt B}t|}tj||d(|d) d }	tjt"d*d+ tj |d(t#dd)}	W d    n	1 sw   Y  |	d u sJ W d    d S 1 sw   Y  d S ),Nr_   r`   zother objectr   r   ru   c                 s       | ]}|d u V  qd S r   r   r   r   r   r   	<genexpr>      z$test_partitioning.<locals>.<genexpr>z/3/3.14/rU   gQ	@z/prefix/3/aaaz/3/nonesegment_encodingalphabetaxyz)null_fallbackc                 s   rw  r   r   r   r   r   r   rx    ry  z/alpha=0/beta=3/r   z/alpha=xyz/beta=3/)z/alpha=one/beta=2/z/alpha=one/z
/beta=two/otherc                 s   rw  r   r   r   r   r   r   rx     ry  z3_3.14_prefix_3_aaa_)firstsecondthirddictionariesr      c                 s       | ]}t   V  qd S r   randomr[   ri  r   r   r   rx  &  ry  rW   r   rX   r  f2r   namesr   ipcr   r   z,Expected Partitioning or PartitioningFactoryr   )$r;   r9   r<   r>   r?   r   r   HivePartitioningFilenamePartitioningrH   rs  r   r  allparse
Expressionr  r  r  rW  r  is_nullry   r~   r@   r}   r^  r   r,   tempfileTemporaryDirectorywrite_datasetrN   r   rF  int)r9   ru  r   exprro  
shouldfailr   partitioning_schematempdir	load_backload_back_tabler   r   r   test_partitioning  s   




 

"



$r  c              
   C   s   t t dt  t dt  g}t|t|t|tj|ddtj|ddtj|dddg}|D ]}| 	| 
||ksDJ q6d S )Nr_   r`   rz  r{  r  )r|  r  )r;   r9   r<   r>   r?   r   r   r  r  loadsdumps)pickle_moduler9   partsr   r   r   r   test_partitioning_picklingB  s   	r  z@flavor, expected_defined_partition, expected_undefined_partition))r  )zfoo=A/bar=ant%20bee r  r  )r   )z	A/ant beer  r  )r  )r  z
A_ant bee_)r  ri  flavorexpected_defined_partitionexpected_undefined_partitionc                 C   s  t dt  fdt  fg}tt| |d}|tddktddk@ |ks,J |d	|
tddktddk@ sEJ |tddktddk@ tddktddk@ @ |kshJ |tddktddk@ tddktddk@ B |ksJ | dkrtjt jdd	 |tddk W d    d S 1 sw   Y  d S |tddkd
ksJ d S )Nfoobarrc   zant beeAr   r  zDNo partition key for foo but a key was provided subsequently for barr   )zbar=ant%20beer  )r;   r9   r@   getattrr   r   pcr<   r  joinr  r  r  rW  )r  r  r  r  r   r   r   r    test_dataset_partitioning_formatT  s<    
" 
r  c                  C   s   t tg dg dd} t d}t d}| j|d || |d |d| d	d
}tg dg dg dg dd	}||sHJ d S )Nr   ru   rU   )ru   ru   ru   rV   rW   rX   r   ru   r?   )za+1zb-aza*2za/br&   ru   rU   rz   )r   r   rE  )ru   rz      )      ?r  g      ?)r   rN   r;   r   r<   r   castr  )rN   rW   rX   r3  ro  r   r   r   $test_expression_arithmetic_operators  s   


r  c                  C   s   dd dD \} }}t | ddiksJ t | t | ks!J t | |@ |@ dd dD ks3J t ddk}t |i ksCJ t | |@ ddiksPJ t d }t |dd iksbJ d S )	Nc                 S   s   g | ]	}t ||kqS r   r   r[   fr   r   r   r]     r   z'test_partition_keys.<locals>.<listcomp>abcrW   c                 S   s   i | ]}||qS r   r   r  r   r   r   
<dictcomp>  s    z'test_partition_keys.<locals>.<dictcomp>r{   rU   )r   get_partition_keys_get_partition_keysr<   r  )rW   rX   r|   nopenullr   r   r   test_partition_keys  s   $r  c                  C   s  t  } t jddgd}t jdd}t jt d}t jtjd}| jt ks)J |jddhks2J | jdks9J |jdks@J | jt	 ksIJ |jt ksRJ | j
tju sZJ |j
tju sbJ | | kshJ | |ksnJ | |kstJ | |kszJ | |ksJ d |_|jt	 ksJ | |ksJ t |_|jt ksJ | |ksJ tj|_
|j
tju sJ || ksJ tj|_
|j
tju sJ || ksJ d S )	NrW   rX   dictionary_columnsmscoerce_int96_timestamp_unitbinary_type	list_typens)r   ParquetReadOptionsr;   binary_viewLargeListTyper  r   r  r  binaryr  ListTypelarge_binary)opts1opts2opts3opts4opts5r   r   r   test_parquet_read_options  s<   
r  c                  C   s   t  } t jdhd}t jdd}t jt d}t jtjd}| jt  ks)J |jt jdgdks5J |jt jddks@J |jt jt dksMJ |jt jtjdksYJ d S )NrW   r  sr  r  r  )r   r   r;   r  r  read_optionsr  )pff1pff2pff3pff4pff5r   r   r   %test_parquet_file_format_read_options  s    r  c                  C   s  t  } t jdd}t jddd}t jddd}t jdd	d
}t jdd}tjdddd}t jd|d}| jdu s;J | jdksBJ t rL| jdu sLJ | jdksSJ | j	dksZJ | j
du saJ |jdu shJ |jdksoJ t ry|jdu syJ |jdu sJ |jdksJ t r|jdu sJ |jdu sJ |jdksJ t r|jdu sJ |jdksJ |j	d	ksJ |j
du sJ t r|jdu sJ |j|ksJ |j| jksJ | | ksJ | |ksJ ||ksJ ||ksJ || ksJ || ksJ || ksJ d S )N   buffer_sizei    T)r  use_buffered_streamF)r  
pre_bufferi@ i )thrift_string_size_limitthrift_container_size_limitpage_checksum_verificationrl  )hole_size_limitrange_size_limitlazy)r  cache_optionsi @B )r   ParquetFragmentScanOptionsr;   CacheOptionsr  r  r   r  r  r  r  r  )r  r  r  r  r  opts6
cache_optsopts7r   r   r   test_parquet_scan_options  sd   r  c                 C   s  t  t  t tjjdddt jtjjddgddt jtjjddd	dt  t jtjjdd
ddt jtjjddddg}z	|	t 
  W n	 tyT   Y nw td urt|t  t jdhdt jddt jdddddg |D ]}| | ||ksJ qvd S )N	T)	delimiterignore_empty_linesrU   r  )	skip_rowsr]  r  i   )r  
block_sizeignorenewlines_in_valuesunexpected_field_behavior)parse_optionsF   r   r  rW   r  )r  r  {   i  )r  r  r  r  )r   r  CsvFileFormatr;   csvParseOptionsReadOptionsJsonFileFormatjsonr-   OrcFileFormatImportErrorrm   extendr   r  r  )r  formatsr   r   r   r   test_file_format_pickling,  sR   



r  c              
   C   s   t  t jtjjdddt jtjjdddt  t tjjddd	t jtjjdd
ddg}t	d urD|
t jddt jddg |D ]}| | ||ksTJ qFd S )NT)strings_can_be_nullconvert_options   r  r  Ferrorr  i   r  r  r  )r  )r   CsvFragmentScanOptionsr;   r  ConvertOptionsr  JsonFragmentScanOptionsr	  r  rm   r  r  r  r  )r  r   optionr   r   r   #test_fragment_scan_options_picklingS  s2   

r  paths_or_selectorr   r   r   r   r  c                 C   sj  t jt jdhd|d}t d}t ttdt tdt	 g|_
|jdks/J |jddgks8J |jd	u s?J t | |||}| }| jttd
t tdt tdtt t	 tdt tdtt t	 dtdt tdt	 gd	dsJ t| tsJ t||t jsJ |jt dsJ | }t|t jsJ | }tjg dt d}	tjg dt d}
tjtjg dt dtjd  t	 d}tdd t!dD }|" }t#|ddgddgD ]\\}}}}tj|gd t d}tj|gd t	 d}tj|d gd t d}|j$d usEJ |j%dksMJ |d |	sWJ |d |
saJ |d |skJ |d |suJ |d |sJ |d |sJ |d |sJ q|& }t|tj'sJ t(|d ksJ |j%dksJ d S )!NrZ   r  )r  r  r   r   r   .ri  Fr_   r`   ra   rb   rV   check_metadataTr  rv   z	0 1 2 3 4c                 S   rS   rT   rY   r   r   r   r   r]     s    z+test_filesystem_factory.<locals>.<listcomp>r   r   ru   r  r  rL  r   rU   rz   r  r   ))r   r   r  r   r   r;   r9   r<   r   r@   r   partition_base_dirselector_ignore_prefixesexclude_invalid_filesr   r<  r  r>   r?   ry   rb   rH   inspect_schemasri   r   r  r   r  r   r}   r   r   splitr,   r(  r  r  rM  r   rA   r   )ro   r  r  r   r   r   inspected_schemarN   r   r0  r1  expected_strexpected_structiteratorrt   rO   r   r   expected_groupexpected_keyexpected_constr   r   r   r   test_filesystem_factoryl  s   

	


"r*  c                 C   s   t  }t jd| |d}|jD ]A}||| }|jdgksJ |j|| dgd}||fD ]}t|t js6J |j|ks=J t|j	t
| sGJ q,|jdgksPJ qd S )N/plainr   r   r   r
  )r   r   rN   r  r   r
  rH   r	  rr   r   rw   )r   parquet_formatrN   rr   rO   row_group_fragmentr  r   r   r   test_make_fragment  s    
r0  c                    s  | \}}}}}}}t  |g}fdd|D }	t j|	|jd   }
|
|s0J  fdd jD }fddt||D }t j||jd}  }
|
|s\J dd |D }fddt||D }t j||jd}tj	t
jjdd	 | }W d
   n1 sw   Y  dd |D }fddt||D }t j||jd}tj	tdd	 | }W d
   d
S 1 sw   Y  d
S )z
    Test passing file_size to make_fragment. Not all FS implementations make use
    of the file size (by implementing an OpenInputFile that takes a FileInfo), but
    s3 does, which is why it's used here.
    c                    s   g | ]}  |qS r   r   r[   rr   r   rd   r   r   r]     s    z0test_make_fragment_with_size.<locals>.<listcomp>)r   r9   r   c                    s   g | ]	} j |jqS r   )r   get_file_infosizer   rN   r   r   r]     r   c                        g | ]\}} j ||d qS )	file_sizer   r[   rr   r4  r2  r   r   r]         c                 S      g | ]}d qS )r   r   r1  r   r   r   r]         c                    r6  r7  r   r9  r2  r   r   r]     r:  zParquet file size is 1 bytesr   Nc                 S   r;  )r  r   r1  r   r   r   r]     r<  c                    r6  r7  r   r9  r2  r   r   r]     r:  zHTTP status 416)r   r   r  r9   r   r  r  r  r  r  r   librW  OSError)s3_example_simpler   rr   urihostport
access_key
secret_keyr   r  tbl
sizes_truefragments_with_sizedataset_with_sizesizes_toosmallsizes_toolarger   )rN   r   rd   r   test_make_fragment_with_size  sP   





"rK  c                 C   s   t d}t|d}t }||}t|	 tj
s J tjg dg dg dgg dd}| ||s<J |||}| || sPJ d S )NzT
        alpha,num,animal
        a,12,dog
        b,11,cat
        c,10,rabbit
    utf-8rW   rX   r|         r   dogcatrabbitr}  numanimalr  )textwrapdedentr;   	py_bufferencoder   r  r   rH   rG   BufferReaderr   r   r  r  r  )r   r  contentbuffer
csv_formatrO   ro  pickledr   r   r   "test_make_csv_fragment_from_buffer
  s   


ra  c                 C   s   d}t |d}t }||}t| t jsJ t j	g dg dg dgg dd}| 
||s9J |||}| 
||
 sMJ d S )Nz{"alpha" : "a", "num": 12, "animal" : "dog"}
{"alpha" : "b", "num": 11, "animal" : "cat"}
{"alpha" : "c", "num": 10, "animal" : "rabbit"}
rL  rM  rN  rQ  rU  r  )r;   rZ  r[  r   r  r   rH   rG   r\  r   r   r  r  r  )r   r  r]  r^  json_formatrO   ro  r`  r   r   r   #test_make_json_fragment_from_buffer#  s   

rc  c                 C   sJ  t g dt g dt g dg}|d  |d |d  g}tjtjddgd	d
dd}|t f||fg}|D ]e\}}t j|g dd}t  }t	|| |
 }	||	}
| |
|sgJ |||
}| ||syJ t |	}||}
|
 }t|t jsJ |jsJ t|
  |sJ q=d S )NrM  rN  rQ  r   r   ru   r}  rW  r  Tr  )r  r  r  rU  r  )r;   r}   dictionary_encoder   r   r  r   BufferOutputStreamrm   rn   getvaluer   r   r  r  r  r\  rG   rH   rI   rL   ParquetFileread)r   r  arraysdictionary_arraysdictionary_formatcasesformat_r   rs   r^  rO   r`  	file_likeopened_filer   r   r   &test_make_parquet_fragment_from_buffer9  sD   


	



rp  c                 C   sl   t jtddgd dgd dgd  gg dd}t| d }tj||d	g|d
 tj|dd|d}||fS )NrI  r   rW   rz   rX   r  r  test_parquet_datasetr   )partition_cols
chunk_sizer   r   )r   r   r   )r;   r   r,   rZ   rm   write_to_datasetr   rN   )r  rs  r   r   rr   rN   r   r   r   _create_dataset_for_fragmentsg  s   "ru  c                 C   s2  t | \}}t| }t|dksJ |d }ddg}|jj|ks$J |j|j|j	|jks2J |j
tddks?J ||}|j|ksKJ ||dddsYJ |j||jd}|jg d	ksjJ ||ddsuJ |j|jdksJ |j||jtddk d
}|jg d	ksJ d S )Nru   r   r  r  r   rW   rz   rc   r  )r9   r   )ru  ri   rF   r   physical_schemar  r   r<  rr   r   r  r  r   r<   r   r]  remove_columnslicer9   remove)r  r   r   rN   r  r  physical_namesr3  r   r   r   test_fragmentsy  s&   
r{  c                 C   s   t jtddgd dgd  gddgd}t| d }tj||dgd	 tjt d
gdd}tj	|d|d}|j
tddkd}tt|dksLJ d S )NrI  r   rz   ru   colr   r  rq  rr  )r   r~   r   r  r   r  r   )r;   r   r,   rZ   rm   rt  r   r   r9   rN   rF   r<   r   ri   )r  r   rr   r   rN   r  r   r   r   test_fragments_implicit_cast  s   *r  c           
         s  t | \ }	 d fdd	}t| d }|j}|||}||||ks-J |j|j|j	|j
d}||||sEJ ||d |j|j|j	|j
d}||dtddk d	 |j|j|j	|j
d}||ddgtdd
k d |j|j|j	|j
d}||dtddkd	 d|jddd }	tjt|	d  |j|j|j	|j
d}|j|tddkd	 W d    d S 1 sw   Y  d S )Nc                    sP   | j  j||d}|r|n j}|j|ksJ  j| |}||s&J d S )Nr9   r'   r   )r   r9   r]  rx  selectr  )rO   	row_slicer'   r   actualr]  ro  r   r   r   assert_yields_projected  s   z;test_fragments_reconstruct.<locals>.assert_yields_projectedr   )r  )r   rz   )r   ru   r  ru   r   r"  rk  r   rW   z&No match for FieldRef.Name\(part\) in Fr   NN)ru  ri   rF   r   r  r  r   r   rr   r   r  r  r   r<   rv  	to_stringr  r  rF  )
r  r   r  rN   r  rO   r.  pickled_fragmentnew_fragmentpatternr   r   r   test_fragments_reconstruct  s`   


"r  c                 C   s^  t | dd\}}t| d }t| }t||j  kr$dks'J  J |j|d |jd}|jg dks:J t|dksBJ |	|
ddsMJ |d jd usVJ |d jdks_J |d jd jdddddddkstJ t|jtd	dk d
d }t|td	dk }t|dksJ |j|d td	dk d
}t|dksJ d S )Nru   rs  r   rc   r  r   minmaxr  r  r  r   )ru  ri   rF   r  r   r  r   r9   r]  r  rx  r
  
statisticsr   r<   )r  r   r   rN   rO   r  r3  r   r   r   !test_fragments_parquet_row_groups  s.   "
r  c                 C   s   t dtdi}tj|| d dd tj| d dd}t| d }|j	j
|j|jd	d
gd}|jdks8J |  |jdksCJ t|jdksLJ d S )NrW   rI  test.parquetru   row_group_sizer   r  r   r   rU   r-  )r;   r   r,   rm   rn   r   rN   ri   rF   r   r   rr   r   r  ensure_complete_metadatar   r
  )r  r   rN   original_fragmentrO   r   r   r   %test_fragments_parquet_num_row_groups  s   r  c                 C   s   t tddgddgd}|d d|d< tt|| d  d	d lm	} |	| d }|j
||ddkd
}|jd	 | k  sIJ d S )NrW   rX   r   ru   )col1col2r  categoryztest_filter_dictionary.parquetr   r   )r0   r1   dictastyperm   rn   r;   r   pyarrow.datasetrN   r   r<   r   	to_pandasr  )r  r   rD   r   rN   r3  r   r   r   ,test_fragments_parquet_row_groups_dictionary  s   "r  c                 C   s  |\}}t | d|d\}}t| d }||jg |  W d    n1 s*w   Y  |jddgks8J |g  |  W d    n1 sKw   Y  t|jtj	sYJ |j
j|j|jddgd}|j|jksnJ |  |jd }	|	jdks~J |	jdksJ |	jd usJ |||}
||jg% |
jddgksJ |
jd }	|	jdksJ |	jd usJ W d    d S 1 sw   Y  d S )Nru   rs  r   r   r   r-  )ru  ri   rF   rr   r  r
  rH   metadatarm   FileMetaDatar   r   r   idr   r  r  r  )r  r   r  rd   r   ri  rN   rO   r  	row_groupr  r   r   r   &test_fragments_parquet_ensure_metadata$  s:   





"r  c           
      C   s   |\}}t | |d\}}t| d }|g  |||}W d    n1 s+w   Y  ||jg |j}	W d    n1 sDw   Y  |	dgksPJ d S )Nr   r   r   )ru  ri   rF   r  r  rr   r
  )
r  r   r  rd   r   ri  rN   rO   r  r
  r   r   r   )test_fragments_parquet_pickle_no_metadataM  s   
r  c                 C   s  t jt g dt  t g dt  t g dt  t g dt  t g dt  t g dt  t g dt 	 t g dt 
 t g dt  t g dt  t g dt  t g dt  t g dt  t g dt dt g dt dt g dt dt g dt  t g dt  t g dt dt g dt dgg d	d
}t| d }tj|||d |tj|dddfS )N)TNF)r   r   *   )r  g      $@      E@)rW   Nzr  r  us)r   r   l    jt )booleanr~   uint8r   r   r   uint32r>   uint64r.   doubleutf8r  ts[s]ts[ms]ts[us]r=   date64time32time64r  test_parquet_dataset_all_typesr  r   r   r  )r;   r   r}   bool_r~   r  r   r   r   r  r>   r  float32r?   r  r  	timestampr=   r  r  r  rZ   rm   rt  r   rN   )r  rs  r   rr   r   r   r   _create_dataset_all_types_  s6   /r  c              
      s  t | \}}t| d }dd l  fdd} fdd} fdd} j} j}t| }	|	d jd us9J |	d jd }
|
jdksGJ |
j	d	ksNJ |
j
i d
ddddddddddddddddddddddddddddddddddddddddddddddddd d!dd"|d|ddd#|d|ddd$|d|ddd%|d&dd'|d&d'd(d|d&dd|d&d'd)d|ddd|dddd|dddd|dddddd*ksJ d S )+Nr   c                    s     ddddd| S N  r   r   r(   r   r  r   r   dt_s      z.test_parquet_fragment_statistics.<locals>.dt_sc              
      s     dddddd| d S )Nr  r   r   r   r  r  r  r   r   dt_ms  r   z/test_parquet_fragment_statistics.<locals>.dt_msc              	      s     dddddd| S r  r  r  r  r   r   dt_us      z/test_parquet_fragment_statistics.<locals>.dt_usrU   r   r  FTr  r~   r   r  r  r   r   r   r  r>   r  r.   r  r  r  r  rW   r  r     a   zr  r  r  r=   r  ru   rO     )r  r  r  )r  ri   rF   r(   r"   timer  r
  r   total_byte_sizer  )r  r   rN   rO   r  r  r  r"   r  r  r  r   r  r    test_parquet_fragment_statistics  sh   








	




r  c                 C   sn   t g dg dd}tj|| d dd tj| d dd}t| d	  }|d
 j	d	 j
i ks5J d S )N)r   r   NN)rW   rX   NNrV   r  ru   r  r   r  r   r   )r;   r   rm   rn   r   rN   ri   rF   r  r
  r  )r  r   rN   r  r   r   r   &test_parquet_fragment_statistics_nulls  s
   r  c                 C   st   t g dg ddd d }|j| d dd tj| d dd	}t| d  }|d jd j	i ks8J d S )
N)rW   rX   rX   rz   r   r  rV   r   r  r   enginer   r  )
r0   r1   
to_parquetr   rN   ri   rF   r  r
  r  )r  rD   rN   r  r   r   r   'test_parquet_empty_row_group_statistics  s
    r  c                 C   s   t | dd\}}t| d }|jtddksJ t|jtddk|jd}t	|dks4J t|jtddk|jd}t	|dksKJ d S )Nru   r  r   r   rW   r   r9   rX   )
ru  ri   rF   r  r  r   r<   r  r9   r   )r  r   rN   rO   r  r   r   r   +test_fragments_parquet_row_groups_predicate  s   r  c                 C   sL  t | dd\}}t| d }|j}t| }|||}||||ks-J |j|j	|j
|jdgd}	||	}
|
||d sKJ |j|j	|j
|jdhd}	|j|	|jddgtddk d	}
|
jddgksrJ t|
dkszJ |j|j	|j
|jdhd}	tjtd
d ||	 W d    d S 1 sw   Y  d S )Nru   r  r   )r  r
  r   r  r   rU   r  zreferences row group 2r   )ru  ri   rF   r   r  r  r  r   r   rr   r   r  r  r9   r   r<   r]  r   r  r  rn  )r  r   r  r   rN   rO   r.  r  r  r  r3  r   r   r   -test_fragments_parquet_row_groups_reconstruct  sH   
"r  c           
      C   s  |\}}t | d|d\}}t| d }|jddgd}|g " |jdks)J |jddgks2J |jd jd us<J W d    n1 sFw   Y  ||}	|	 ddgddgdks_J |jg d}|jdkslJ |jg kssJ |j||j	d}	|	j
dksJ |	|d d sJ d S )	Nr   r  r   rU   row_group_idsru   r  rc   )ru  ri   rF   subsetr  r
  r  r   r,  r9   r   r  
r  r   r   rd   r   r   rN   rO   subfragr3  r   r   r   !test_fragments_parquet_subset_ids  s&   


r  c           
      C   sR  |\}}t | d|d\}}t| d }|tddk}|g " |jdks+J t|jdks4J |jd j	d us>J W d    n1 sHw   Y  |
|}	|	 g dg ddksaJ |tdd	k}|jdksrJ |jg ksyJ |j
||jd
}	|	jdksJ |	|d d sJ |jtddk|jd
}|jdksJ d S )Nr   r  r   r  rU   r  )r   r   r   r  r   rc   r   rW   rz   )ru  ri   rF   r  r   r<   r  r   r
  r  r   r,  r9   r   r  r  r   r   r   $test_fragments_parquet_subset_filter7  s*   


r  c                 C   s   t | dd\}}t| d }tt |jtddkddgd W d    n1 s.w   Y  tt |  W d    d S 1 sHw   Y  d S )Nr   r  r   r  ru   r  )	ru  ri   rF   r  r  rF  r  r   r<   )r  ri  rN   rO   r   r   r   %test_fragments_parquet_subset_invalidW  s   
"r  c           
      C   s  t g d}t g d}t g d}t jj||gddgd}t jj||gddgd}t d	|i}tj|| d
 dd tj| d
 dd}t	|
 d }|jdksVJ |td	ddk}	|	jdkshJ |td	ddk}	|	jdkszJ |td	dddk}	|	jdksJ |td	dddk}	|	jdksJ tjt jdd |td	ddk W d    n1 sw   Y  tjtdd |td	ddk W d    d S 1 sw   Y  d S )N)r   r   ru   rU   )皙?皙?333333?皙?r   ru   rU   rz   f21f22r  r  r  r|  zdata_struct.parquetru   r  r   r  r   r   r   zNo match for FieldRef.Nestedr   f3z)Function 'greater' has no kernel matching)r;   r}   StructArrayr   r   rm   rn   r   rN   ri   rF   r  r  r<   r  r  rW  NotImplementedError)
r  r  r  r  r  
struct_colr   rN   rO   r  r   r   r   0test_fragments_parquet_subset_with_nested_fieldsd  s4   "r  c                 C   s   t | d }t|dkst|dksJ t| \}}tj|dd}t | d }t|d|jt| dks=J | d }t	j
|| tj|d	d}t | d }t|d
|jt| dksiJ d S )Nr   zb<pyarrow.dataset.ParquetFileFragment path=subdir/1/xxx/file0.parquet partition=[key=xxx, group=1]>zb<pyarrow.dataset.ParquetFileFragment path=subdir/1/xxx/file0.parquet partition=[group=1, key=xxx]>r   r  z*<pyarrow.dataset.ParquetFileFragment path=>data.featherfeatherz,<pyarrow.dataset.FileFragment type=ipc path=)ri   rF   repr_create_single_filer   rN   r   r   rZ   r;   r  write_feather)r  rN   rO   r   rr   r   r   r   test_fragments_repr  s0   r  r`  c                 C      | S r   r   r   mr   r   r   <lambda>      r  c                 C      | || S r   r  r  r  r   r   r   r    r<  c                 C   s   t jddd}t }td}tjddg}|||}t|tjs%J ||_	t
| |||}| }tdt fdt fdt fd	t fd
tt t dfdt fdt fg}	||	skJ tj }
t|
tjsxJ d S )Nr   Tr   r   r   r_   r`   rZ   ra   rb   rV   )rd   r   r   r   r   r   discoverrH   PartitioningFactorypartitioning_factoryr   r<  r;   r9   r>   r?   r@   rb   r   r  r  )ro   r`  r  r  r   r   r  r   r#  r?  hive_partitioning_factoryr   r   r   test_partitioning_factory  s.   







	
r  infer_dictionaryc                 C   r  r   r   r  r   r   r   r    r  c                 C   r  r   r  r  r   r   r   r    r<  c                 C   s4  t jddd}t }td}tjjddg|d}||||_t| |||}|	 }	|rt
t
 t
 }
|	dj|
ksBJ |   }|dd}t
dgd	 d
gd	   }||shJ | jtddkd}|dd}|dd	}||sJ d S |	djt
 ksJ d S )Nr   Tr   r   r   r  r   r  r   r  r   )rd   r   r   r   r   r   r  r  r   r<  r;   ry   r   r@   r<   rw   r   r   combine_chunksr'  r   r}   rd  r  rx  )ro   r  r`  r  r  r   r   r  r   inferred_schemaexpected_typer   r  ro  r   r   r   $test_partitioning_factory_dictionary  s.   
r  c                 C   r  r   r   r  r   r   r   r    r  c                 C   r  r   r  r  r   r   r   r    r<  c              
   C   s  t  }t }tdt fg}tjtt	dg|d}tdt
dfdt fg}tdt fdt fg}tt|t| }dD ]>}	||	 ||	d (}
tj|
|}|| |  W d    n1 suw   Y  W d    n1 sw   Y  qKt jd	d
d}td	}tjj|d}| |||_t||||}| }||ksJ | jdtdt id}|d d  dksJ tjjddgdd}| |||_t||||}t|  }|d j !tddktddk@ s
J tj|dd}| |||_"t||||}t|  }|d j !tddktddk@ s>J tjj|dd}| |||_t||||}t#j$tj%dd | }W d    n	1 slw   Y  t jdd
d}td}tj&j|d}| |||_t||||}| }||ksJ | jdtdt id}|d d  dksJ tj&jdd}| |||_t||||}t|  }|d j !tddktddk@ sJ tj&|dd|_"t||||}t|  }|d j !tddktddk@ sJ tj&j|dd}| |||_t||||}t#j$tj%dd | }W d    d S 1 sNw   Y  d S )Nr_   r   rc   r"   r  r@   )z%directory/2021-05-04 00%3A00%3A00/%24z,hive/date=2021-05-04 00%3A00%3A00/string=%24
/0.featherrq   Tr   date_intr&   r   逎`rz  r{  2021-05-04 00%3A00%3A00%24r9   r|  +Could not cast segments for partition fieldr   r   )'rd   re   r   r  r;   r9   r>   r   r}   r,   r  r@   ri   rg   rh   r  new_filern   closer   r   r   r  r  r   r<  r   r   r<   r  as_pyrF   r  r  r   r  r  rW  r  )r`  r  ro   r   r9   r   partition_schemastring_partition_schemafull_schemarq   sinkwriterr   r   r  r   r  r  r  r   r   r   r   *test_partitioning_factory_segment_encoding  s   














$r  c                 C   r  r   r   r  r   r   r   r  Y  r  c                 C   r  r   r  r  r   r   r   r  Y  r<  c              	   C   s  t  }t }tdt fg}tjtt	dg|d}tdt
dfdt fg}tdt fdt fg}tt|t| }tdt
dfdt fg}	tdt fdt fg}
d	}|| ||d
 (}tj||}|| |  W d    n1 sw   Y  W d    n1 sw   Y  t jddd}td}tjj|d}| |||_t||||}| }||ksJ | jdtdt id}|d d  dksJ tjjdd}| |||_t||||}t|  }|d j !tddktddk@ s#J tj|dd}| |||_"t||||}t|  }|d j !tddktddk@ sWJ tjjdd}| |||_t||||}t|  }|d j !tddktddk@ sJ tj|
dd}| |||_"t||||}t|  }|d j !tddktddk@ sJ tjj|	dd}| |||_t||||}t#j$tj%dd | }W d    d S 1 sw   Y  d S )Nr_   r   rc   ztest'; dater  ztest';[ string'ztest%27%3B%20dateztest%27%3B%5B%20string%27zLhive/test%27%3B%20date=2021-05-04 00%3A00%3A00/test%27%3B%5B%20string%27=%24r  r   Tr   r  r&   r   r  r@  r{  z2021-05-04 00:00:00$rz  r  r  r  r  r   )&rd   re   r   r  r;   r9   r>   r   r}   r,   r  r@   ri   rg   rh   r  r	  rn   r
  r   r   r  r  r  r   r<  r   r   r<   r  r  rF   r  r  r   r  r  rW  )r`  r  ro   r   r9   r   r  r  r  partition_schema_enstring_partition_schema_enrq   r  r  r   r   r  r   r  r  r  r   r   r   r   ;test_partitioning_factory_hive_segment_encoding_key_encodedX  s   















$r  c              
   C   s   t g dg dd}tt t dt  t dt  g}tt j	 tj
|| d|d W d    d S 1 s=w   Y  d S )Nr   yNr   r  r  rV   rW   rX   r  r  )r;   r   r   r   r9   r<   r@   r  r  rW  r  r  r   r   r   r   r   /test_dictionary_partitioning_outer_nulls_raises  s   $"r  c                 C   sV   t g dg dd}tt t|| d W d    d S 1 s$w   Y  d S )Nr  r  rV   zbasename-{i}.arrow)r;   r   r  r  r  r   r  )r  r   r   r   r   test_positional_keywords_raises  s   "r  c                 C   s   d}t t d|d t|d d}tj|d | | d dgd tj|d |d  | d dgd tj| d dgd	}|d jdksHJ tj| d dd
gd	}|d jdks\J tj| d dgd	}|d jdksoJ d S )Ni   r   r   )r   r$   rD  r   r}  twor&   r$   ru   )	r;   r   repeatnparangerm   rt  
read_table
num_chunks)r  
BATCH_SIZEr   r   r   r   test_read_partition_keys_only  s&   


r#  c                    s    t  }t fdd|D S )Nc                    s"   g | ]}t jt j |qS r   )osrr   isdirr  )r[   elbasedirr   r   r]     r^   z _has_subdirs.<locals>.<listcomp>)r$  listdirany)r(  elementsr   r'  r   _has_subdirs  s   
r,  c                 C   sZ   t | D ]%}t j| |}t j|r*t||}t|r%t||| q|| qd S r   )	r$  r)  rr   r  r%  	posixpathr,  _do_list_all_dirsr-   )r(  path_so_farr3  r  true_nestednorm_nestedr   r   r   r.    s   
r.  c                 C   s   g }t | d| |S )Nr  )r.  )r(  r3  r   r   r   _list_all_dirs  s   r2  c                 C   s    t t| }|t |ksJ d S r   )r   r2  )r  expected_directoriesactual_directoriesr   r   r   _check_dataset_directories  s   r5  c              
   C   sh   t g dg dd}tt t dt  t dt  g}tj|| d|d t| g d d S )	Nr  r  rV   rW   rX   r  r  )zx/xzy/yr  )	r;   r   r   r   r9   r<   r@   r  r5  r  r   r   r   (test_dictionary_partitioning_inner_nulls  s   $r6  c              
   C   sl   t g dg dd}tt t dt  t dt  gd d}tj|| d|d t| g d	 d S )
N)r   Nr  r  rV   rW   rX   r  r  r  )za=x/b=xz	a=xyz/b=yz	a=z/b=xyz)	r;   r   r   r  r9   r<   r@   r  r5  r  r   r   r   test_hive_partitioning_nulls  s   r7  c                  C   s0  t dt  fdt  fg} ddg}t| }t|tjs J tj| dd}t|tjs/J tj|d}t|tjs=J t	
t t  W d    n1 sQw   Y  t	j
tdd tj| d W d    n1 snw   Y  t	j
tdd tj| | d W d    n1 sw   Y  tj| d	d
}t|tjsJ tj| dd	d}t|tjsJ tjd	d
}t|tjsJ t	
t tj|d	d
 W d    n1 sw   Y  t	j
tdd tj|d	d W d    n1 sw   Y  t	
t tj| dd
 W d    d S 1 sw   Y  d S )Nr   r   inferr  )field_nameszExpected listr   zCannot specify bothr   r~  )r  r  zCannot specify 'field_names')r9  r  unsupported)r;   r9   r   r~   r   r   rH   r   r  r  r  rF  r  )r9   r  r   r   r   r   test_partitioning_function	  s@   

$r;  c                 C   s   t t dt t  t  t dt t  t  g}tjj	|d}tj
dd| |d}|jj|ks7J | }|dj|jd sIJ |d dgd	 d
gd	  ks\J |dj|jd sjJ |d dgd	 dgd	  ks}J d S )Nr   r   rc   r   r   r   r   r   r   r   r   ru   r  r  )r;   r9   r<   ry   r~   r   r@   r   r   r  rN   r   r   r'  rw   r  typesr^  )ro   r9   r   rN   r   r   r   r   *test_directory_partitioning_dictionary_key(	  s   &*r>  c           	      C   s.  t t dt t  t  t dt t  t  g}tjj|d}tj	dd| |d}|j
j|ks7J | }ttdd}ttd	d
}|dj|jd sWJ |djD ]}|j }|  ||ksnJ q]|dj|jd	 s}J |djD ]}|j }|  ||ksJ qd S )Nr   r   rc   r   r   r<  i  i  r      r   )r;   r9   r<   ry   r~   r   r   r  r  rN   r   r   ri   r,   r'  rw   r  r=  chunksr^  sort)	r   r9   r   rN   r   year_dictionarymonth_dictionaryr   r  r   r   r   %test_hive_partitioning_dictionary_key=	  s.   

rD  c                 C   sL   |d u rt tddgd dgd  d}| d }tj|||d ||fS )	N	   r!  rz   r  r   rV   r  r  r;   r   r,   rm   rn   )base_dirr   r  rr   r   r   r   r  Y	  s
   $r  c                 C   s   t tddgd dgd  d}| d }t|| t tdddgd dgd  d}| d	 }t|| ||f||ffS )
NrE  r!  rz   r  r   rV   ztest1.parquetr  ztest2.parquetrF  )rG  r   r   r   r   r   r   r   _create_directory_of_filesa	  s   $&rH  c                 C   sD   | | || fD ]}| j|jsJ || |sJ q
d S r   )r  r  r9   r  r   )rN   r   r   picklerr{   r   r   r   _check_datasetk	  s   rJ  c                 K   s   t | tjsJ | t| | gt| gfD ]}tj| fi |}t |tjs'J t|||| qt| j	" tj| j
fi |}t |tjsGJ t|||| W d    d S 1 sYw   Y  d S r   )rH   pathlibPathrZ   r   rN   r  rJ  r
   parentname)rr   r   r   rI  r   r   rN   r   r   r   _check_dataset_from_pathr	  s   "rO  c                 C   s   t | \}}t|||| d S r   r  rO  r  r   r  r   rr   r   r   r   test_open_dataset_single_file	  s   rR  c                 C   s"   t | dd\}}t|||| d S )Nr   r  rP  rQ  r   r   r   test_deterministic_row_order	  s   rS  c                 C   s(   t | \}}t|}t| ||| d S r   )rH  r;   concat_tablesrO  )r  r   r  tablesri  r   r   r   r   test_open_dataset_directory	  s   
rV  c           
         s   t | \}\}}t|}t||gtt|t|gg}| fdd|D 7 }|D ]}|j|js7J ||}	|	|sCJ q,d S )Nc                    s   g | ]
}   |qS r   r  )r[   r{   r  r   r   r]   	  s    z3test_open_dataset_list_of_files.<locals>.<listcomp>)	rH  r;   rT  r   rN   rZ   r9   r  r   )
r  r   r  rU  r   r   r   datasetsrN   r3  r   rW  r   test_open_dataset_list_of_files	  s   

rY  c                 C   s   t | \}}t|}t|}|j|jsJ tj|t d}|j|js*J t	t
 tj|t d W d    d S 1 sDw   Y  d S )Nr  )r  r   r   rN   r9   r  rd   r   r  r  r  re   )r  r   rr   fspathdataset1dataset2r   r   r   #test_open_dataset_filesystem_fspath	  s   
"r]  c                 C   s   | d }|   t|\}}||}t|}tj|t d}tjt|t|d}	|	|
|}
||||  krP||	  krP||
ksSJ  J d S )Nsingle-filer  )mkdirr  relative_tor   rN   rd   r   rZ   r	   r  r  r   )r  r   r  rq   r   rr   relative_pathd1d2d3d4r   r   r   test_construct_from_single_file	  s   


rf  c                 C   s   | d }|   t|\}}t|}tj|t d}tj|jt| d}||}	||}
||}|	|
  kr@|ksCJ  J |||fD ]}|	|
|}|||	ks[J qHd S )Nsingle-directoryr  )r_  rH  r   rN   rd   r   rN  r	   r   r  r  )r  r   r  rq   rU  r   rb  rc  rd  t1t2t3r{   restoredr   r   r   $test_construct_from_single_directory	  s   



rl  c                    s    d }|   t|\}} fdd|D }t  t|}||}t|ttt|ks3J W d    n1 s=w   Y  tj|t	 d}||}	t|}
||
}tj|t
 d}||}||	  krx|  krx|ks{J  J d S )Nzlist-of-filesc                    s   g | ]}|  qS r   )r`  r   r  r   r   r]   	  r  z5test_construct_from_list_of_files.<locals>.<listcomp>r  )r_  rH  r
   r   rN   r   r   sumrj   r	   rd   r   )r  r   rq   rU  r   relative_pathsrb  rh  rc  ri  rd  rj  re  t4r   rm  r   !test_construct_from_list_of_files	  s    






*rq  c                 C   sJ   ddg}t jtdd tj|| d W d    d S 1 sw   Y  d S )Nr   z!subdir/1/xxx/doesnt-exist.parquetzdoesnt-existr   r  )r  r  r  r   rN   )ro   r  r   r   r   -test_construct_from_list_of_mixed_paths_fails	  s   "rr  c                 C   s   t jddg| d}t jd| d}t ||g}t|t jsJ tt| dks+J | }t|dks7J |jdks>J t|j	dksGJ |j	D ]}|j
ddgksUJ qJd S )	Nr   r   r  r   rz   r  r   ru   )r   rN   rH   UnionDatasetr   ri   rF   r   rM  childrenr  )ro   rW   rX   rN   r   childr   r   r   (test_construct_from_mixed_child_datasets

  s$   
rv  c                  C   s6   t jg dd} |  }|jdksJ |jdksJ d S )Nr  r  r   )r   rN   r   r   rM  )emptyr   r   r   r   test_construct_empty_dataset 
  s   rx  c               	   C   sf   t jg dtdt fdt fgd} tjtdd | 	  W d    d S 1 s,w   Y  d S )Nr  rW   r   r9   zMultiple matches for .*a.* in r   )
r   rN   r;   r9   r>   r@   r  r  rF  r   )rw  r   r   r   *test_construct_dataset_with_invalid_schema'
  s   



"rz  c                    s|  t j| tdt  d}t j| tdt  d}tjjtt	dgdgd tjjtt	dgdgd}t
jtdd	 t ||g W d    n1 sQw   Y  d
}t
jt|d	 t g d W d    n1 sqw   Y  d}t
jt|d	 t d  W d    n1 sw   Y  d}t
jt|d	 t  fddt	dD  W d    n1 sw   Y  d}t
jt|d	 t g  W d    n1 sw   Y  d}t
jt|d	 t  |g W d    n1 sw   Y  d}t
jt|d	 t  dg W d    n	1 sw   Y  d}t
jt|d	 t  dg W d    d S 1 s7w   Y  d S )Nr+  r  /schemar   rW   r  rX   z"Expected.*FileSystemDatasetFactoryr   zExpected a list of path-like or dataset objects, or a list of batches or tables. The given list contains the following types: intr  zbExpected a path-like, list of path-likes or a list of Datasets instead of the given type: NoneTypezcExpected a path-like, list of path-likes or a list of Datasets instead of the given type: generatorc                 3       | ]} V  qd S r   r   r  batch1r   r   rx  U
      z<test_construct_from_invalid_sources_raise.<locals>.<genexpr>rU   zEMust provide schema to construct in-memory dataset from an empty listzFItem has schema
b: int64
which does not match expected schema
a: int64z}Expected a list of path-like or dataset objects, or a list of batches or tables. The given list contains the following types:r   zCExpected a list of tables or batches. The given list contains a int)r   r   rd   r   r   r;   r&  r   r}   r,   r  r  r  rN   rF  InMemoryDataset)r   child1child2batch2ro  r   r}  r   )test_construct_from_invalid_sources_raise0
  sd   $r  c                 C   s   t jjt tdgdgd}t j|j|g}t j|g}t	j
g dt g d }|t g ks5J |||g|g|fD ]6}t	
|}| ||ksNJ tt| dksZJ t|  |ksfJ t jt| |kstJ q>d S )Nr   rW   r  r  ry  r   )r;   r&  r   r}   r,   RecordBatchReaderrl   r9   rA   r   rN   r   r   r   ri   rF   r/   r   )r   rt   r   r   dataset_tablesourcerN   r   r   r   test_construct_in_memoryr
  s   
r  r   c              	      s   t jjt tdgdgd t j gd} fddd ffddd f fdd jffD ]2\}}tj	j| || d	}|
 ksFJ tjt j|d
 |
  W d    n1 s]w   Y  q0d S )Nr   rW   r  z#OneShotFragment was already scannedc                      s   t j j gS r   )r;   r  rl   r9   r   rt   r   r   r  
  s    z$test_scan_iterator.<locals>.<lambda>c                      s   t  S r   )r   r   r   r   r   r  
  s    c                      s    fddt dD S )Nc                 3   r|  r   r   r  r  r   r   rx  
  r  z7test_scan_iterator.<locals>.<lambda>.<locals>.<genexpr>r   r,   r   r  r   r   r  
  r  r9   r   r   )r;   r&  r   r}   r,   rA   rl   r9   r   rV  r   r  r  rW  )r   r   r   r9   r   r   )rt   r   r   test_scan_iterator
  s$   

r  c                 C   s   t tddgd dgd  d}| d }|  tdD ]}|d	|  }|  t|d| d|d
  q|dt jdgd dgd  dgd  t 	 d}||fS )NrE  r!  rz   r  r   rV   zdataset-partitionedrU   zpart=r  r   r   r   ru   rv   )
r;   r   r,   r_  rm   rn   rx  append_columnr}   r   )r(  r   rr   r7   r   
full_tabler   r   r   _create_partitioned_dataset
  s   $,r  c           
      C   sj  t | \}}|ddg}t|||| tjt|tjddd}|j|js*J t	|  tjdtjddd}|j|jsCJ W d    n1 sMw   Y  tjt|dd}|j|jsdJ tjt|tjt
dt
 fgddd}|jt
dt
 }|j|sJ | }|dt
jdgd	 d
gd	  dgd	  t
 d}	||	sJ d S )NrW   rX   r   r~  r   zdataset-partitioned/r   r   rU   r   ru   rv   )r  r  rO  r   rN   rZ   r   r9   r  r
   r;   r~   r-   r<   r   r  r}   )
r  r   r  r  rr   r   rN   r?  r3  ro  r   r   r   'test_open_dataset_partitioned_directory
  s8   

,r  c                 C   s   t | \}}tt|}|j|jsJ tjt|t d}|j|js*J t|  tjdt d}W d    n1 sBw   Y  |j|jsPJ t	
t tjt|t d W d    d S 1 slw   Y  d S )Nr  r  )r  r   rN   rZ   r9   r  rd   r   r
   r  r  r  re   )r  r   rr   r[  r\  dataset3r   r   r   test_open_dataset_filesystem
  s   
"r  c                 C   sP   t | \}}tjtdd tj|gdd W d    d S 1 s!w   Y  d S )Nz format 'blabla' is not supportedr   blablar  )r  r  r  rF  r   rN   )r  ri  rr   r   r   r   $test_open_dataset_unsupported_format
  s   "r  c                 C   s`   t | \}}t|}t||g}t|tjsJ |||}||||ks.J d S r   )r  r   rN   rH   rs  r  r  r   )r  r   r  ri  rr   rN   unionr`  r   r   r   test_open_union_dataset
  s   
r  c                 C   sT   t jd| dd}tjtdd t j|gdd W d    d S 1 s#w   Y  d S )Nr+  r   r,  zcannot pass any additionalr   r  )r   rN   r  r  rF  )r   ru  r   r   r   .test_open_union_dataset_with_additional_kwargs
  s   "r  c                   C   s|   t t tjddd W d    n1 sw   Y  t jtjdd tjddd W d    d S 1 s7w   Y  d S )Nzi-am-not-existing.arrowr  r  zcannot be relativer   zfile:i-am-not-existing.arrow)r  r  r  r   rN   r;   rW  r   r   r   r   #test_open_dataset_non_existing_file
  s   "r  r   rq   r   r  r  partition_keysr  BCr  )DEFr  )r   NrU   )r  Nr  )Nru   rU   c                    sl  t tddgd dgd  d}d |d v pd |d v }|d	kr&|r&d S |d	kr9tjjd
dg d}d}d }n|rDtjj |d}ntjj d}d}|rR|}nd}| d }	|	  |\}
}|
D ]!}|D ]}|	||pn||pq| }|jdd t	
||d  qfqbtjt|	|d} fdd}|jt d
||
d t d||d }|j|sJ d S )NrE  r!  rz   r  r   rV   r   r   rq   part1part2r  z{0}/{1})r  r  zpart1={0}/part2={1}__HIVE_DEFAULT_PARTITION__rN   T)parentsr  r  c                    sH    rt | trt nt }tt |S t | tr t S t S r   )rH   rZ   r;   r@   r   ry   )r   
value_typer  r   r   r   ?  s   z/test_partition_discovery.<locals>.expected_type)r;   r   r,   r   r   r  r  r_  r   rm   rn   rN   rZ   r9   r-   r<   r  )r  r   r  r  r  r   has_nullfmt
null_valuebasepath
part_keys1
part_keys2r  r  rr   rN   r   r?  r   r  r   test_partition_discovery  sT   $r  c           	      C   sV  t tddgdtdd}tj|dgjdd}tj	|| |d	d
 tj
| d	tjjddd}t |d |d  d}| |sIJ t| d }|j|jd|d d saJ |j}|||}| |suJ |||}|j|jd|d d sJ |j|jd |d d  sJ |j|sJ d S )Nr  r  r   r   r   r|  r   r   r~  r  r   r   Tr  r  r|  )r|  r   r   rc   )r;   r   r  r  r,   r   r   r  r9   r  rN   r  r  rd  r   r  ri   rF   r  r  r  r  )	r  r  r   r   rN   ro  rO   	part_exprrk  r   r   r   4test_dataset_partitioned_dictionary_type_reconstructM  s,      r  c              	   C   s   ddl m} | d \}}}}d| d| d| d| d	}||\}}|d td	g d
i}	|d}
t|	|
 W d    n1 sHw   Y  |	|||||||fS )Nr   
FileSystem
connections3://:z5@mybucket/data.parquet?scheme=http&endpoint_override=z&allow_bucket_creation=TruemybucketrW   r  zmybucket/data.parquet)	r   r  from_urirg   r;   r   rh   rm   rn   )	s3_serverr  rA  rB  rC  rD  r@  rd   rr   r   rs   r   r   r   r?  l  s   
r?  c                 C   s^   | \}}}}}}}}t j|dd}|||sJ t j|d|d}|||s-J d S )Nr   r  r   r   )r   rN   r   r  )r?  r   r   rr   rd   r@  ri  rN   r   r   r   test_open_dataset_from_uri_s3  s
   r  c           
      C   sP   | \}}}}}}}}t d}||}tj|d|d}	||	|s&J d S )Nr  r   r  )rd   r   r3  r   rN   r   r  )
r?  r   r   rr   r   r@  ri  r   finfosrN   r   r   r    test_open_dataset_from_fileinfos  s
   

r  c                 C   s   | \}}}}}}}}t d}ddlm}	m}
 |j||dd| d| id}tj|d|d	}| 	|s8J |
|	|}tj|d|d	}| 	|sOJ d S )
Ns3fsr   )FSSpecHandlerr   endpoint_urlzhttp://r  )r   secretclient_kwargsr   r  )
r  importorskipr   r  r   S3FileSystemr   rN   r   r  )r?  r   rr   ri  rA  rB  rC  rD  r  r  r   rd   rN   r   r   r   $test_open_dataset_from_uri_s3_fsspec  s   
	r  c                 C   sD  ddl m} | d \}}}}d}d}d| d| d| d	| d
| d| d}||\}	}|dks4J |	| tdg di}
|	|}t|
| W d    n1 sXw   Y  t	j
|dd}| |
smJ d||||}g d}|D ]\}}||}t	j
||dd}| |
sJ q{tjtjdd |d	}t	j
d|d W d    n1 sw   Y  d}d}||}tt}t	j
d|d W d    n1 sw   Y  t|j|d||ksJ d}||}tt}t	j
d|d W d    n	1 sw   Y  t|j|d||ks J d S )Nr   r  r  theirbucketnested/folder/data.parquetr  r  @r   z?scheme=http&endpoint_override=z&allow_bucket_creation=truez&theirbucket/nested/folder/data.parquetrW   r  r   r  3s3://{}:{}@{{}}?scheme=http&endpoint_override={}:{}))ztheirbucket/nested/folder/z/data.parquet)ztheirbucket/nested/folderdata.parquet)ztheirbucket/nested/folder/data.parquet)ztheirbucket/nestedr  )r  z/nested/folder/data.parquet)r  r  r,  zMissing bucket namer   z'/theirbucket/nested/folder/data.parquetr  zThe path component of the filesystem URI must point to a directory but it has a type: `{}`. The path component is `{}` and the given filesystem URI is `{}`ztheirbucket/doesnt/existr  NotFoundFile)r   r  r  rg   r;   r   rh   rm   rn   r   rN   r   r  r   r  r  rW  rF  rZ   r$   )r  r  rA  rB  rC  rD  bucketrr   r@  rd   r   rs   rN   templaterl  prefixr  excr   r   r   -test_open_dataset_from_s3_with_filesystem_uri  s\   




"r  c                 C   sD   t | \}}td}|d}tj||d}|j|js J d S )Nfsspecfiler  )r  r  r  r   r   rN   r9   r  )r  r   rr   r  r   rN   r   r   r   test_open_dataset_from_fsspec  s
   

r  c           	      C   s   t d}tdg di}| d }t|| |d}|| d ds)J t	
 }tt|}|||}||jsCJ |||}|j|jsRJ d S )Nr  rW   r  r  r  r   )r  r  r;   r   rm   rn   r   lsendswithr   r   rd   r   r  r<  r  r9   r   rv  )	r  r  r   rr   	fsspec_fsr   r   r9   rO   r   r   r   test_file_format_inspect_fsspec  s   

r  c                 C   s  | d }t ddgd tdd}tj|dgjdd	}tj|||d
d tjt dt dfgdd	}tj	|d
|d}t
dtdk}|j||d}|d g dks]J dd l}t
d|dddk}|j||d}|d g dksJ d S )Ntest_partition_timestamps
2012-01-01z
2012-01-02r   r   )datesr  r  r   r~  r  r  r  r  r   r  )r   rU   r   rL  rE  r   i  r   )r;   r   r,   r   r   r  r9   r  r  rN   r<   r0   	Timestampr   r'  r^  r(   )r  r   rr   r   r   rN   r2  r(   r   r   r   test_filter_timestamp  s$   
r  c                 C   sh   t dt jg dt  di}t| |\}}tt|}tddk}t	|j
||ddks2J d S )NrW   )r   r   ru   rU   rz   r   rv   ru   r   rU   )r;   r   r}   r~   r  r   rN   rZ   r<   r   r   )r  r   r   ri  rr   rN   filter_r   r   r   test_filter_implicit_cast8  s
    r  c                 C   s^   t dg di}t| |\}}tt|}|j|tdtd kd}|j	dks-J d S )Nr  )rW   rX   Nr   r   )
r;   r   r  r   rN   rZ   r   r<   r  r   )r  r   r   ri  rr   rN   r   r   r   test_filter_equal_nullC  s   r  c           	      C   s   t g ddd tdD dd tddD d}t| |\}}tt|}tt	d	t 
d
dg}|j||djdksBJ tt	ddk}|j||djdksXJ tt	dt	d}|j|d|id}|d  g dksyJ d S )N)rW   rX   NrW   r|   c                 S   s   g | ]
}t  d dd|qS i  r   r  r   r   r   r   r]   T      z2test_filter_compute_expression.<locals>.<listcomp>r   c                 S   s   g | ]	}t  d d|qS r  r  r   r   r   r   r]   U  r   r   r  r  r  rW   rX   r   rU   r  ru   r  r   r&   r  )r;   r   r,   r  r   rN   rZ   r  is_inr<   r}   r   r   hourdays_betweenr^  )	r  r   r   ri  rr   rN   r  r   r3  r   r   r   test_filter_compute_expressionP  s   r  c                 C   s   t j| tdt  d}t |g}t| dksJ tdd | D s*J | d 	|
 s7J |
 	|
 sBJ t| t jsLJ d S )Nr+  r  r   c                 s   s    | ]	}t |tjV  qd S r   )rH   r;   r%  )r[   r  r   r   r   rx  n  s    z%test_dataset_union.<locals>.<genexpr>r   )r   r   rd   r   r   UnionDatasetFactoryr   r!  r  r  r<  rH   r   r$  )r   ru  r   r   r   r   test_dataset_unione  s   
r  c                 C   s  t jd|dd}t jd|dddgd}t jd|dd	d}|j|j  kr*|jks-J  J t |||g}t|t js=J d
}tjt|d t j||g|d W d    n1 sZw   Y  tdt	 fdt
 fdt fdt fdt fdt fdt fg}|j|sJ | j|sJ t ||g}tdt	 fdt
 fdt fdt fdt fdt fg}|j|sJ | j|sJ tdt fdt fdt	 fg}t j||g|d}| j|sJ tdt fdt fdt fg}t j||g|d}| j|s$J tjtddgd dgd  dgg dd}t| |d\}	}
t |
}tjtjdd t ||g W d    d S 1 scw   Y  d S )Nr+  r   r,  r{  weekr%   r   r   r   /hiver   z$cannot pass any additional argumentsr   r  r"   r#   r$   r   r   rc   rP  rE  r!  rz   r  r   	abcdefghj)r"   r$   r#   r  r   zUnable to merge)r   rN   r9   rH   rs  r  r  rF  r;   r=   r>   r?   r@   r   r  r   r   r,   r  r;  )r  r   r  r  child3	assembledmsgr?  r   ri  rr   child4r   r   r   &test_union_dataset_from_other_datasetst  st   

"






	











 
$r  c                 C   sJ   d}t jt|d tjg d| d W d    d S 1 sw   Y  d S )Nz8points to a directory, but only file paths are supportedr   )r+  r{  r  r  )r  r  IsADirectoryErrorr   rN   )r   r  r   r   r   4test_dataset_from_a_list_of_local_directories_raises  s   "r  c              
   C   s   t t jd| dt jd| dt jd| dg}tdt fdt fdt fdt fg}|j|s8J t t jd| dt jd| dt jd| d	d
g}tdt fdt fdt fdt fdt	 fdt	 fg}|j|s{J d S )Nr+  r  r{  r  r"   r#   r$   r%   r   )r   r   r   r   )
r   rN   r;   r9   r=   r>   r?   r@   r  r   )r   rN   r?  r   r   r   &test_union_dataset_filesystem_datasets  s4   









r  c                    s  t g dg dd}t|d  d fdd	}d }|}||||jd |j}|}||| t dd	g}t jg dg dgd
dgd}||| t d	g}t jg dgdgd}||| t d	dg}t jg dt jg dddgddgd}||| t ddg}tjtd |d}t j|d 	d|d
 gdd
gd}||| t dt 
t  fdg}tjtd |d}|j|sJ tjtdd  | W d    d S 1 sw   Y  d S )Nr  r  r  r  rV   r  c                    s\   t jtd | d}|d ur|j|sJ n|j| s J  |}||s,J d S )Nr  rc   )r   rN   rZ   r9   r  r   )r9   ro  r?  rN   r3  r   r  r   r   rJ    s   
z-test_specified_schema.<locals>._check_dataset)r?  )rX   r?   )rW   r>   rX   rW   r  )r|   r   NNNr   rv   r|   )rW   r   rc   z#Unsupported cast from int64 to listr   r   )r;   r   rm   rn   r9   r}   r   rN   rZ   r  list_r   r  r  r  r  r   )r  r   r   rJ  r9   ro  rN   r   r  r   test_specified_schema  sL   






"r  c                 C   s   | d }t dg di}t|| t dt  fg}tjt|gd |d}|j	|s1J |
|}tjtdd | }|  W d    d S 1 sQw   Y  d S )Nr  rW   r  d   rc   z#Unsupported cast from int64 to nullr   )r;   r   rm   rn   r9   r  r   rN   rZ   r  r   r  r  r  rZ  r[  )r  r   fnr   r9   rN   r   r   r   r   r   test_incompatible_schema_hang  s   

"r  c           	      C   s   t t jg dddt jg dddd}t| d }t |}t ||j}|| d  |	  W d    n1 s@w   Y  t
j|t
 d	}||}||sZJ t| d
D ]}t
j||d	}||}||suJ q`d S )Nr  r~   rv   r  r?   rV   z
test.arrowr   r  )r  arrow)r;   r   r}   rZ   output_streamRecordBatchFileWriterr9   write_batchr   r
  r   rN   r  r   r  rQ   )	r  r   r   rr   r  r  rN   r3  
format_strr   r   r   test_ipc_format(  s$   


r  c              	   C   s  ddl m} ttjg dddtjg dddd}t| d	 }||| tj|t	 d
}t
| }t|d tjsAJ ||}|jdd ||sSJ t| tj|dd
}||}|jdd ||spJ |j|dgd}|jdd ||dgsJ |j|dtdd id}|jdd |tdtjg dddisJ ||dksJ |j|tddkddksJ d S )Nr   orcr  r~   rv   r  r?   rV   test.orcr  Tr9  r  rX   r&   b2ru   )r  r  g333333?rU   rW   r   r   )r   r  r;   r   r}   rZ   rn   r   rN   r
  ri   rF   rH   FileFragmentr   r=  r  rQ   r  r<   r   )r  r   r  r   rr   rN   r  r3  r   r   r   test_orc_format>  s:   

$r  c                 C   s6  ddl m} ttjg dddtjg dddd}t| d	 }||| tj|d
d}t	|
|}t|dks>J |d jdksGJ |d |
 d sTJ t	|j
|dd}t|dkseJ |d jdksnJ |d |dd
 d sJ |d jdksJ |d |dd
 d sJ d S )Nr   r  r  r~   rv   r  r?   rV   r  r  r  r   rU   ru   )
batch_size)r   r  r;   r   r}   rZ   rn   r   rN   ri   r   r   r   r  rx  )r  r   r  r   rr   rN   r3  r   r   r   test_orc_scan_optionse  s"   "&r
  c                  C   sh   z	ddl m}  W d S  ty3   tjtdd tjddd W d    Y d S 1 s+w   Y  Y d S w )Nr   r
  z'not built with support for the ORC filer   r  r  r  )r  r
  r  r  r  rF  r   rN   r  r   r   r   test_orc_format_not_supported|  s   &r  c                  C   s   t jtdd tjtdtdiddd W d    n1 s!w   Y  t } t jtdd | 	  W d    d S 1 sAw   Y  d S )Nz9Writing datasets not yet implemented for this file formatr   rW   r   r  z/tmp)r   rG  )
r  r  r  r   r  r;   r   r,   r
  make_write_options)ofr   r   r   +test_orc_writer_not_implemented_for_dataset  s   
"r  c                 C   s   t t jg dddt jg dddd}t| d }| j|dd	 tj|t d
}|	|}|
|s:J t| tj|dd
}|	|}|
|sQJ d S )Nr  r>   rv   r  r?   rV   test.csvFr#   r  r  )r;   r   r}   rZ   r  to_csvr   rN   r  r   r  rQ   )r  r   r   rr   rN   r3  r   r   r   test_csv_format  s   

r  compression)bz2gziplz4zstdc                 C   s   t j|st| d ttjg dddtjg dddd}t	 }|dkr.|nd	}t
| d
|  }|j||d}| jdd}||d W d    n1 s[w   Y  tj|t d}	||	}
|
|suJ d S )Nz support is not builtr  r>   rv   r  r?   rV   r  gzz	test.csv.r  Fr  rL  r  )r   Codecis_availabler  skipr;   r   r}   rd   r   rZ   rh   r  r  writer[  r   rN   r  r   r  )r  r  r   r   r   suffixrr   r  csv_strrN   r3  r   r   r   test_csv_format_compressed  s   
r!  c              	   C   s  t | d }t|d}|d W d    n1 sw   Y  tj|dd}||}|tdt	g dis=J tj|tj
tjjdd	d
d}||}|tdt	ddgiscJ tj|tj
tjjdgdd
d}||}|tdt	g disJ d S )Nr  wzskipped
col0
foo
bar
r  r  skipped)col0r  r  r   )r  r  r$  r  r  r]  )r#  r$  r  r  )rZ   rG   r  r   rN   r   r  r;   r   r}   r  r  r  )r  r   rr   r  rN   r3  r   r   r   test_csv_format_options  s*   



"


r&  c              
   C   s   t | d }t|d}|d W d    n1 sw   Y  tj|tjtjjdddd}|	|}g d}|j
|ks@J |ttd	gtd
gtdgtd	gds_J d S )Nr  r"  z1,a,true,1
T)autogenerate_column_namesr  r  )f0r  r  r  r   rW   )rZ   rG   r  r   rN   r  r;   r  r  r   r]  r  r   r}   )r  r   rr   r  rN   r3  expected_column_namesr   r   r   (test_csv_format_options_generate_columns  s   





r*  c           	   	   C   s*  t | d }t|d}|d W d    n1 sw   Y  tj|dd}tjjdgdd}tj|t	jj
d	d
d}|j||d}|t	dt	g disTJ tj|d}tj||d}||}|t	dt	g diswJ t }|j||d}|t	dt	g disJ d S )Nr  r"  zcol0
foo
spam
MYNULL
r  r  MYNULLT)null_valuesr  r  r  )r  r  )fragment_scan_optionsr$  )r  spamNr  )r  r.  r+  )rZ   rG   r  r   rN   r   r  r  r  r;   r  r   r  r   r}   r  )	r  r   rr   r  rN   r  r   r3  r_  r   r   r   test_csv_fragment_options  s.   
"
"r/  c                 C   s   t t jg dddt jg dddd}t| d }| jdd	d
d dd}t|d}|| W d    n1 sAw   Y  t	j
|t	 d}||}||s[J t| t	j
|dd}||}||srJ d S )Nr  r>   rv   r  r?   rV   	test.jsonrecordsorientr   rE  },{}
{r"  r  r	  )r;   r   r}   rZ   r  to_jsonreplacerG   r  r   rN   r  r   r  rQ   r  r   r   rr   rs   r  rN   r3  r   r   r   test_json_format  s    

r9  c                 C   s  t t jg dddt jg dddd}t| d }| jdd	d
d dd}t|d}|| W d    n1 sAw   Y  t	j
tdd tj|tjt jjdddd}W d    n1 shw   Y  tj|tjt jjdddd}||}||sJ d S Nr  r>   rv   r  r?   rV   r0  r1  r2  r   rE  r4  r5  r"  ztry to increase block sizer   rz   r  r  r  @   )r;   r   r}   rZ   r  r6  r7  rG   r  r  r  rF  r   rN   r  r	  r  r   r  r8  r   r   r   test_json_format_options  s(    



r<  c           	      C   s*  t t jg dddt jg dddd}t| d }| jdd	d
d dd}t|d}|| W d    n1 sAw   Y  t	j
tdd tjt jjddd}tj|t|d}W d    n1 smw   Y  tjt jjddd}tj|t|d}||}||sJ d S r:  )r;   r   r}   rZ   r  r6  r7  rG   r  r  r  rF  r   r  r	  r  rN   r  r   r  )	r  r   r   rr   rs   r  r   rN   r3  r   r   r   test_json_fragment_options.  s,    
r=  c              	   C   s   t | d }dD ]^\}}t|d}|| W d    n1 s!w   Y  tdt fdt fg}tjdgdgd|d	}tjj|d
}t	j
|d}	t	j||	d}
|
j|s]J |
 |sfJ qd S )Nr  ))latin-1s   a,b
un,lphant)utf16s    a , b 
 u n ,  l  p h a n t wbrW   rX   un
   éléphantrV   rc   encodingr  r  )rZ   rG   r  r;   r9   r@   r   r  r  r   r  rN   r  r   )r  r   rr   rD  
input_rowsr  r?  r@  r  r   dataset_transcodedr   r   r   test_encodingE  s"   rG  c           
      C   s  t | d }t|d}|d W d    n1 sw   Y  tdt fdt fg}tjdgdgd|d	}tj|d
|d}t	j
tjjdd || W d    n1 s\w   Y  tjjdd}tj|d}tj||d}	|	j|s}J |	 |sJ d S )Nr  r@  s   ,b
un,lphant   érX   rA  rB  )rH  rX   rc   r  ry  zinvalid UTF8r   r>  rC  r  r  )rZ   rG   r  r;   r9   r@   r   r   rN   r  r  r   r=  rW  r   r  r  r  r  )
r  r   rr   r  r?  r@  rN   r  r   rF  r   r   r   test_column_names_encoding^  s&   rI  c                 C   sT  ddl m} ttjg dddtjg dddd}| d	 }|  ||t|d
  tj|t	 d}|
|}||sBJ t| tj|dd}|
|}||sYJ |j
|ddgd}|jddgkskJ |j
|ddgd}|jddgks}J ||t|d dd tt |
tj|dd W d    d S 1 sw   Y  d S )Nr   )r  r  r~   rv   r  r?   rV   feather_datasetr  r  r  rX   rW   r&   zdata1.featherr   version)pyarrow.featherr  r;   r   r}   r_  rZ   r   rN   r  r   r  rQ   r]  r  r  rF  )r  r   r  r   r(  rN   r3  r   r   r   test_feather_formatv  s,   

"rN  )r  r  brotlic                 C   s  t t jdgd ddt jg dd ddd}t j|s#t  | d	 }|  t	 }| d
 }|  tj
|t|d ||jd dd |dkrtjtdd |j|d}W d    n1 sdw   Y  tjtdd t |}|j|d}W d    d S 1 sw   Y  d S |j|d}tj
|t|d ||d tj|t	 d}	||	}
|
|sJ |d d }| j}|d d }| j}||k sJ d S )Nr   ,  r~   rv   r  r  r?   rV   feather_dataset_compressedfeather_dataset_uncompressedz
data.arrowr  r   file_optionsrO  zCompression typer   r  part-0.arrow)r;   r   r}   r  r  r  r  r_  r   r  r  rZ   r  r  rF  rN   r   r  statst_size)r  r  r   r   r(  r   uncompressed_basedirwrite_optionscodecrN   r3  compressed_filecompressed_sizeuncompressed_fileuncompressed_sizer   r   r   test_feather_format_compressed  sX   







r_  c                 C   sp   g }t dD ]}t|gd dd t dD d}tj|t| |d qt| d }tj|j||d ||fS )zO
    Creates a simple (flat files, no nested partitioning) Parquet dataset
    rz   r   c                 S   s   g | ]}t   qS r   r  r  r   r   r   r]     r  z2_create_parquet_dataset_simple.<locals>.<listcomp>r  metadata_collector	_metadata)r,   r;   r   rm   rt  rZ   write_metadatar9   )	root_pathra  r7   r   metadata_pathr   r   r   _create_parquet_dataset_simple  s   $
rf  c                 C   s\   | d }t |\}}t|}|j|jsJ t|jdks!J | }|jdks,J d S )Nrq  rz   (   )	rf  r   parquet_datasetr9   r  r   r  r   r   )r  rd  re  r   rN   r3  r   r   r   test_parquet_dataset_factory  s   
ri  win32z'Results in FileNotFoundError on Windows)reasonc           	      C   s   t d}| d }t|\}}|d}tt|}tj||d}|j	
|j	s,J t|jdks5J | }|jdks@J d S )Nr  rq  r  r  rz   rg  )r  r  rf  r   rd   r   r  r   rh  r9   r  r   r  r   r   )	r  r  rd  re  r   r  r   rN   r3  r   r   r   #test_parquet_dataset_factory_fsspec  s   

rl  c                 C   s   | d }t dgd tjdd}g }tj|t||d t|d }tj|j	||d t
|}|j	|j	s<J | }|jdksGJ d S )Nrq  r   r   r  r`  rb  )r;   r   r  r  randnrm   rt  rZ   rc  r9   r   rh  r  r   r   )r  rd  r   ra  re  rN   r3  r   r   r   &test_parquet_dataset_factory_roundtrip  s   	

rn  c           	   	   C   s   g }t dD ]-}tdtt |d |d d i}| | d }tj|||d |d | d qt| d }t|j	|| t
|}| }|d }|tt dd	ks]J d S )
Nr   r  r   rR   r`  rE  rb  r   r  )r,   r;   r   ri   rm   rn   set_file_pathrZ   rc  r9   r   rh  r   r'  r^  )	r  	metadatasr7   r   
table_pathre  rN   scanned_tablescanned_colr   r   r   "test_parquet_dataset_factory_order   s   
rt  c                 C   s   | d }t |\}}t|dd   t|}|j|js#J t|j	dks,J t
t |  W d    d S 1 sAw   Y  d S )Ntest_parquet_dataset_invalid	*.parquetr   rz   )rf  ri   globunlinkr   rh  r9   r  r   r  r  r  r  r   )r  rd  re  r   rN   r   r   r   $test_parquet_dataset_factory_invalid6  s   

"ry  c                 C   sz   t t| d}t|d j }g }|D ]}t|j}|t	|
|  || q| d }tj|||d |S )Nrv  r   rb  r`  )ri   r-  rglobrm   rg  r9   to_arrow_schemar  ro  rZ   r`  r-   rc  )rd  parquet_pathsr9   ra  rr   r  re  r   r   r   _create_metadata_fileD  s   r}  c              	   C   sr   t jt tdt tjdt tddgdgg dd}|ddi}t	j
|t| d	gd
 t| |fS )Nr  rW   rX   r   r  r  r   r$   r   r}  )r;   r   r}   r,   r  r  rm  r  rC   rm   rt  rZ   r}  )rd  r   r   r   r   #_create_parquet_dataset_partitionedV  s   r~  c                 C   s   | d }t |\}}tjdd}tj||d}|j|js J t|jdks)J | }|j	dks4J |
 djdd	}|
 }tj|| d S )
N(test_parquet_dataset_factory_partitionedr   r~  r  ru   r  r  Tdrop)r~  r   r   rh  r9   r  r   r  r   r   r  sort_valuesreset_indexr0   testingassert_frame_equal)r  rd  re  r   r   rN   r3  ro  r   r   r   r  a  s   r  c                 C   sh   | d }t |\}}tj|dd}|j|jsJ d|jjv s"J t| }d|d jjv s2J d S )N%test_parquet_dataset_factory_metadatar   r     keyr   )	r~  r   rh  r9   r  r  ri   rF   rv  )r  rd  re  r   rN   r  r   r   r   r  u  s   r  c           
      C   sX  |\}}| d }t |\}}||g tj|tjdd|d}W d    n1 s*w   Y  |g  t| }W d    n1 sDw   Y  |g  t|tddk W d    n1 sdw   Y  |g  |d tddk W d    n1 sw   Y  |g  |d  }	|	d   W d    d S 1 sw   Y  d S )N#test_parquet_dataset_lazy_filteringr   r~  )r   r   r     r   )	rf  r   rh  r   ri   rF   r<   r  r  )
r  r   rd   r   rd  re  ri  rN   r  rg_fragmentsr   r   r   r    s.   




"r  c                 C   sp   t dg di}| d }|| t|}||j}|j|dgdj}d|jv s-J |j|dds6J d S )NrW   r  r  r&   s   pandasTr  )	r0   r1   r  r   rN   r   r9   r  r  )r  r   rD   rr   rN   r9   rY  r   r   r   test_dataset_schema_metadata  s   

r  c                 C   s   t dt jg dddi}t|t| d  t dt  fg}tj	| d d|d}|j
|tddkd	}|d |d d
dsIJ t| d }|j
|tddk|d}|d |d d
dsoJ d S )Nr|  r  r   rv   r  r   ry  ru   r   r>   r   r  )r;   r   r}   rm   rn   rZ   r9   r>   r   rN   r   r<   r  r  rx  ri   rF   )r  r   r   r9   rN   filteredrO   r   r   r   test_filter_mismatching_schema  s   
"&r  c                 C   s   t d ttdd}t| d }tj||dgd tj	|dd}|
|}|j
|dgd	}|d|ds>J d S )
Nza a b brz   r  r5  r   r}  r   r  r&   )r;   r   r"  ri   r,   rZ   rm   rt  r   rN   r   r'  r  )r  r   r   rr   rN   all_cols	part_onlyr   r   r   +test_dataset_project_only_partition_columns  s   
r  c                 C   s   t dtjg dddi}| d }|j|dd tj|dtdt	 fgd	}t
dtg dt	 i}|||sBJ d S )
Nr|  r  objectdtypez(test_dataset_project_null_column.parquetr   r  r   ry  )r0   r1   r  r}   r  r   rN   r;   r9   r>   r   r   r  )r  r   rD   r  rN   ro  r   r   r    test_dataset_project_null_column  s   r  c                 C   s   ddl m} tg dg dg dd}||| d  tj| d dd	}|j|td
tdj	dddtddkdd}tg dtj
g dddg dd}||s\J tjtdd |j|d
d
id W d    d S 1 sxw   Y  d S )Nr   r  r  )r  r"  r#  rM  r  r  r  r  r  r  r   Fsafer  rW   )	A_renamedB_as_intC_is_ar&   rv   )TFFzExpected an Expressionr   )r   r  r;   r   r  r   rN   r   r<   r  r}   r  r  r  r  )r  r   r  r   rN   r3  ro  r   r   r   test_dataset_project_columns  s$   
"r  c           	      C   sr  t | \}}t|}t|jtjsJ t| \}}t|}t|jtjs(J tj|dd}|j}|d us8J t|tjs@J |jt	dt	
 fgksOJ t|jdksXJ |jd t	g dt	
 ksiJ tjt	dt	
 fgdd}t|tjsJ t|jdksJ tdd	 |jD sJ tj||d}|j}t|tjsJ |jt	dt	
 fgksJ t|jdksJ td
d	 |jD sJ tj|dd}tjt| |j|j|jd}|jd u sJ | d }t|\}}tj|dd}|j}|d usJ t|tjsJ |jt	dt	 fgksJ t|jdks'J t|jd  ddhks7J d S )Nr   r  r   r   r   )r   r   ru   r~  c                 s   rw  r   r   r   r   r   r   rx  +  ry  z6test_dataset_preserved_partitioning.<locals>.<genexpr>c                 s   rw  r   r   r   r   r   r   rx  2  ry  r   zdata-partitioned-metadatarW   rX   )r  r   rN   rH   r   r   r  r  r9   r;   r   r   r  r}   r  r  ri   rF   r   r   r~  rh  r@   r   r^  )	r  ri  rr   rN   r  r   r\  rd  re  r   r   r   #test_dataset_preserved_partitioning  sL   

" $r  c                 C   s   t t dt  t dt t  t  g}t jg dtt	dd|d}t
| d }tj||dgd t| d }|d |d ksOJ |d|ds\J d S )	Nr|  r   )NNrW   rW   rz   r  rc   r5  r}  )r;   r9   r<   r>   ry   r   r@   r   ri   r,   rZ   rm   rt  r   r'  r^  r  )r  r9   r   rr   actual_tabler   r   r   +test_write_to_dataset_given_null_just_worksJ  s    

r  c                 C   s2   dd l m} |j| ||dfgd}|| |S )Nr   	ascending)r   )pyarrow.computecomputesort_indicesSortOptionsr   )tabsort_colr  sorted_indicesr   r   r   _sort_table_  s
   r  c                 C   st   |p|}t j| |d|dd t|d}t|t|ksJ t j|d|d}t| |t|  |s8J d S )Nr  Fr   r   r   *r  )	r   r  ri   rz  r   rN   r  r   r  )rN   rG  expected_filesr  base_dir_pathr   
file_pathsr\  r   r   r   _check_dataset_roundtripf  s   
r  c                 C   s   | d }|   t|}t|}| d }|d g}t|t||d| | d }|d g}t|||d| | d }|d g}t|  t|d|d| W d    n1 sUw   Y  | d }|   t|}t|}| d	 }|d g}t|t||d| d S )
Nr^  zsingle-file-targetrU  rW   zsingle-file-target2zsingle-file-target3z./single-file-target3rg  zsingle-directory-target)r_  r  r   rN   r  rZ   r
   rH  )r  rq   ri  rN   targetr  r   r   r   test_write_datasety  s0   







r  c                 C   s   | d }t |}tjdd}tj||d}| d }|d |d d |d |d d g}tjtd	t fgdd}t|t||d
||d | d }|d |d d |d |d d g}ttd	t fg}t|t||d
||d d S )Npartitionedr   r~  r  zpartitioned-hive-targetpart=arU  part=br   r  partitioned-dir-targetrW   rX   )	r~  r   r   rN   r;   r9   r@   r  rZ   )r  rq   ri  r   rN   r  expected_pathsr  r   r   r   test_write_dataset_partitioned  s4   
r  c                    s   t g dg dd}tj| ddgd tj ddgd}|j} fdd|D }|h d	ks3J | }||s>J d S )
Nr  r  rV   r  rX   r  c                    "   h | ]}t t| jqS r   rZ   rK  rL  r`  rM  r  rm  r   r   r         z6test_write_dataset_with_field_names.<locals>.<setcomp>>   r   r  r  r;   r   r   r  rN   r  r   r  r  r   r  r  partitioning_dirsr  r   rm  r   #test_write_dataset_with_field_names  s   

r  c                    s   t g dg dd}tj| ddgdd tj ddd}|j} fd	d
|D }|h dks3J | }||s>J d S )Nr  r  rV   r  rX   r   )r   r   partitioning_flavorr  c                    r  r   r  r  rm  r   r   r     r  z;test_write_dataset_with_field_names_hive.<locals>.<setcomp>>   b=xb=yb=zr  r  r   rm  r   (test_write_dataset_with_field_names_hive  s   

r  c                 C   s   t g dg dg dd}tj|| ddgd tj| ddgd}t 5}tj|jddgd	|ddgd tj|ddgd}| }t	|
 |d

 ksSJ W d    d S 1 s^w   Y  d S )Nr  r  r  rM  r  rX   r  r|   r&   rW   )r;   r   r   r  rN   r  r  r   r   r  r,  drop_columnsr  r   rN   tempdir2r  r  r   r   r   test_write_dataset_with_scanner  s"   



"r  c           	         sH  t  G fdddt}t|t ttdt	 g}tj
tttdg|d dd}dd	 fd
d}tjj| |d	dt jfddd}|  z;t fdd}d}d}| dk r|kr~|kr|d	}n}td | dk sq|sJ W d  |  d S d  |  w )Nc                       s   e Zd Z fddZdS )z6test_write_dataset_with_backpressure.<locals>.GatingFsc                    s       | jj||dS )Nr  )waitr   rh   )r   rr   r  consumer_gater   r   rh     s   zItest_write_dataset_with_backpressure.<locals>.GatingFs.open_output_streamN)r   r   r   rh   r   r  r   r   GatingFs  s    r  r6   r  rc   r          Tc                   3   s:    k rs	d S t d d7  V  k sd S d S )Ng{Gz?r   )r  sleepr   )rt   batches_readend
keep_goingr   r   counting_generator  s   
z@test_write_dataset_with_backpressure.<locals>.counting_generatorr  c                      s   t jtd dS )Nr   r  )r   r  rZ   r   )	gating_fsr   r  r   r   r    s    z6test_write_dataset_with_backpressure.<locals>.<lambda>)r  c                      s   t     S r   )r  r   )startr   r   duration  r   z6test_write_dataset_with_backpressure.<locals>.durationFr   r  )	threadingEventr   rd   r   r   r;   r9   r<   r   rk   r}   ri   r,   r   rV  rl   Threadr  r  r  r   r  )	r  r  r9   min_backpressurer  write_threadr  
last_valuebackpressure_probably_hitr   )	rt   r  r  r  r  r  r   r  r  r   $test_write_dataset_with_backpressure  sJ   	




r  c                 C   s   t g dg dd}tj|| ddgd tj| ddgd}t ,}tj||ddgd tj|ddgd}| }t|	 |	 ksGJ W d    d S 1 sRw   Y  d S )Nr  r  rX   r|   r  rX   r  )
r;   r   r   r  rN   r  r  r   r  r,  r  r   r   r   test_write_dataset_with_dataset@  s   

"r  c           	      C   s  | d }t g dg dd}tjt t dt  gdd}dd	 }tj|||d
d t g dg dd}t	t j
 tj|||d
d W d    n1 sTw   Y  t ddgi}|d d }tj|| tj|||d
dd t g dg dd}tj| d
|d }||| | sJ tj|||d
dd t g dg dd}tj| d
|d }||| | rJ d S )Nr   r  r  r  r|   r   )r9   r  c                 S   s>   |   djdd}|  djdd}||sJ d S )NrX   Tr  )r  r  r  r  )rh  ri  df1df2r   r   r   compare_tables_ignoring_orderX  s   zGtest_write_dataset_existing_data.<locals>.compare_tables_ignoring_orderr  r  rM  r  rX   ezc=2z	foo.arrowoverwrite_or_ignore)r   r   existing_data_behavior)r  r   rW   rX   r|   )ru   r   ru   rU   rz   r  delete_matching)r   rW   rX   r|   r  )r;   r   r   r   r9   r<   r>   r  r  r  rW  r   r  r  rN   r   exists)	r  rq   r   r   r  extra_table
extra_fileoverwrittenreadbackr   r   r    test_write_dataset_existing_dataQ  sV   



r  rz   r   r   c                    s    fddt | D S )Nc                    s   g | ]}t  qS r   )r  randintr  r  r  r   r   r]         z._generate_random_int_array.<locals>.<listcomp>r  r4  r  r  r   r  r   _generate_random_int_array  s   r  c                 C   sN   g }g }t | D ]}|t|d|d |dt|  qtj||d}|S )Nr   r  r|   r6   r  )r,   r-   r  rZ   r;   rk   )num_of_columnsnum_of_recordsr6   r]  r7   rk   r   r   r   _generate_data_and_columns  s   r  c                 C   s   t tt| d| S )Nz**/*.)r   ri   rK  rL  rw  base_directoryr   r   r   r   _get_num_of_files_generated  s   r  c                    s   | d }d d}d}d}t ||}tj||d |d t|}|  d }t||ks.J g }t|D ]\}	}
|t|
 }tj|dd}|	|
 jd	  q4|t|ksXJ |t|ks`J t fd
d|D smJ d S )Nr   r   ru   #   r   )r   max_rows_per_filemax_rows_per_groupr   r  r   c                 3   s    | ]}| kV  qd S r   r   )r[   file_rowcountr  r   r   rx    s    z7test_write_dataset_max_rows_per_file.<locals>.<genexpr>)r  r   r  r$  r)  r   rf   rZ   rN   r-   r   shapern  r  )r  rq   r  r  r  rk   files_in_direxpected_partitionsresult_row_combinationri  f_filef_pathrN   r   r  r   $test_write_dataset_max_rows_per_file  s2   

r  c                    s   | d }d}d}d g d} fdd|D }|d }t j||||d	d
 t|}t|D ]>\}}	|t|	 }
t j|
d	d}| }| }t|D ] \}}|j	}|t
|d k re||krb||ksdJ qK||kskJ qKq.d S )Nr   r  rI  ru   )
r   r   r   r   r   rz   rz   rz   rz   rz   c                    s   g | ]}t  |qS r   )r  )r[   r  r  r   r   r]     s
    z9test_write_dataset_min_rows_per_group.<locals>.<listcomp>min_rows_groupr   )r6   rG  min_rows_per_groupr  r   r  r   )r   r  r$  r)  rf   rZ   rN   r   r   r   r   )r  rq   r  r  record_sizesrecord_batchesdata_sourcer  ri  r  r  rN   r   batchesr  rt   rows_per_batchr   r  r   %test_write_dataset_min_rows_per_group  s8   

r  c                 C   s   | d }d}d}d}t ||}|d }tj|||dd t|}g }|D ]"}	|t|	 }
tj|
dd}| }| }|D ]}|	|j
 q>q%|dd	gksPJ d S )
Nr   r  ru      max_rows_groupr   )r6   rG  r  r   r  rO  )r  r   r  r$  r)  rZ   rN   r   r   r-   r   )r  rq   r  r  r  rk   r	  r  batched_datar  r  rN   r   r
  rt   r   r   r   %test_write_dataset_max_rows_per_group  s.   
r  c                 C   s:  | d }d}d}ddg}t jg dg dg|d}t jg d	g d
g|d}t jg dg dg|d}t jg dg dg|d}t j||||g}	tjt || t  fgdd}
|d }tj|	||
|d dd }|||||\}}||ks{J |d }d}tj|	||
||dd |||||\}}||ksJ d S )Nr   r   r   c1c2)r   ru   rU   rz   r   r   )rW   rX   r|   r{   r  rW   r  )r   r  rL  rI  r   r   )rW   rX   r|   r{   r  r|   )rE  r   rP  rO  r   r   )rW   rX   r|   r{   r  r{   )r?  r  r  rH  r   r   )rW   rX   r|   r{   r  rX   r   r~  default)r6   rG  r   r   c                 S   s(   t | |d}ttj|| }||fS )Nr  )r  r   r;   r  unique)r	  rk   r   col_idnum_of_files_generatednumber_of_partitionsr   r   r   _get_compare_pair  s
   z<test_write_dataset_max_open_files.<locals>._get_compare_pairmax_1rU   F)r6   rG  r   r   max_open_filesr   )	r;   rk   rA   rl   r   r   r9   r@   r  )r  rq   r   partition_column_idr]  record_batch_1record_batch_2record_batch_3record_batch_4r   r   data_source_1r  r  r  data_source_2r  r   r   r   !test_write_dataset_max_open_files  sh   




r"  c                 C   s   | d }t |}tj|tjjddd}| d }|d |d d |d |d d g}tjt|jd	gd	t	ddgid
}t
|t||d||d d S )Nr  Tr  r  r  rW   rU  rX   r   r  r  )r~  r   rN   r  r  r   r;   r9   r<   r}   r  rZ   )r  rq   ri  rN   r  r  r   r   r   r   #test_write_dataset_partitioned_dictA  s&   

r#  c                    s   | d }t |}tj|dd}tjtdt fgdd}| d }g   fdd}tj||d	|d
|d |d d |d d h}tt	t
j }||ksOJ | d }	tj||	d	|dd tj|d	|d}
tj|	d	|d}|
 | sxJ d S )Nr  r   r  r   r~  partitioned1c                    s     | j d S r   )r-   rr   written_filepaths_writtenr   r   file_visitorh  s   z4test_write_dataset_use_threads.<locals>.file_visitorr  Tr   r   r   r)  r  part-0.featherr  partitioned2Fr  r  )r~  r   rN   r   r;   r9   r@   r  r   rj   rK  rL  r   r  )r  rq   ri  rN   r   target1r)  r  paths_written_settarget2result1result2r   r'  r   test_write_dataset_use_threads[  s4   

r2  c                 C   s   t dtdi}|jdd}tj|| dddd t| jdd	d  }d
}|D ]}t	|}||ks;J d| |}q*d S )NrW   rl  ru   )max_chunksizer   T)r   r   preserve_orderFr   rE  z!Sequence expected to be ordered: )
r;   r   r,   r   r   r  rN   r   to_numpyr  )r  r   r
  seqprevitemcurrr   r   r   -test_write_dataset_use_threads_preserve_order  s   
r:  c           
         s  t jt tdt dd tdD t dgd dgd  gg dd}| d	 }tj||d
dd t|d}|d g}t|t|ksIJ tj	|dd
 }||sYJ | d }|d |d d |d |d d g}g  g  fdd}tjt dt  fgdd}tj||dd
||d t|d}t|t|ksJ dd  D }|ksJ tj	|d|d}|
 |sJ t dksJ  D ]}	t|	|v sJ qd S )Nr  c                 s   r  r   r  r  r   r   r   rx    ry  z#test_write_table.<locals>.<genexpr>rW   r   rX   r  r  singledat_{i}.arrowr  basename_templater   r  zdat_0.arrowr  r  r  r  r  c                    s     | j  | j d S r   )r-   rr   r4  r%  visited_pathsvisited_sizesr   r   r)    s   z&test_write_table.<locals>.file_visitorr   r   r~  )r   r>  r   r)  c                 S   s   g | ]}t j|qS r   )r$  rr   getsizer1  r   r   r   r]     r  z$test_write_table.<locals>.<listcomp>r  ru   )r;   r   r}   r,   r   r  ri   rz  r   rN   r   r  r   r9   r@   r   rK  rL  )
r  r   rG  r  r  r3  r)  r   actual_sizesvisited_pathr   r?  r   test_write_table  sN   "

rE  c                 C   s  t jt tdt dd tdD t dgd dgd  gg dd}t |gd	 }| d
 }tj||dd t|dt|d gksJJ tj	|dd
 |sXJ | d }tj|g|dd t|dt|d gksuJ tj	|dd
 |sJ | d }tj| |dd t|dt|d gksJ tj	|dd
 |sJ | d }tj||g|dd t|dt|d gksJ tj	|dd
 t |gd	 sJ d S )Nr   c                 s   r  r   r  r  r   r   r   rx    ry  z6test_write_table_multiple_fragments.<locals>.<genexpr>rW   r   rX   r  r  ru   r;  r  r  r  r+  r  zsingle-listmultiplezmultiple-table)r;   r   r}   r,   rT  r   r  r   rz  rN   r   r  r   )r  r   rG  r   r   r   #test_write_table_multiple_fragments  s:   "  

rG  c                 C   s,  t jt tdt dd tdD t dgd dgd  gg dd}| d	 }tjd
d | D ||jddd tj|dd	 }|
|sLJ | d }t j|j| }tj||ddd tj|dd	 }|
|ssJ | d }t|}tj||ddd tj|dd	 }|
|sJ d S )Nr  c                 s   r  r   r  r  r   r   r   rx    ry  z&test_write_iterable.<locals>.<genexpr>rW   r   rX   r  r  inmemory_iterablec                 s   s    | ]}|V  qd S r   r   )r[   rt   r   r   r   rx    r  r<  r  )r9   r>  r   r  inmemory_readerr=  inmemory_pycapsule)r;   r   r}   r,   r   r  r   r9   rN   r   r  r  rl   r   )r  r   rG  r3  r   streamr   r   r   test_write_iterable  s2   "
rL  c                 C   s2  t jt tdt dd tdD t dgd dgd  gg dd}t|}| d	 }tj|||d
d |tj|dd}|	|sKJ | d }tj|j|dgd|d
d |tj|dd}|	|
dgsrJ tjtdd tj||||jd
d W d    d S 1 sw   Y  d S )Nr  c                 s   r  r   r  r  r   r   r   rx    ry  z%test_write_scanner.<locals>.<genexpr>rW   r   rX   r  r  dataset_from_scannerr  r  r  dataset_from_scanner2r  r&   zCannot specify a schemar   )r9   r   )r;   r   r}   r,   r   rN   r  r   r   r  r  r  r  rF  r9   )r  r   r   rN   rG  r3  r   r   r   test_write_scanner  s4   "
"rO  c                 C   s   t jt tdt dgd dgd   gddgd}t|dgj}| d }tj	||d	|d
 tj
jdgdd}tj|d|d
 }||sNJ d S )Nr  rW   r   rX   r|  r   r  rN   r  r  Tr  r  )r;   r   r}   r,   rd  r   r   r  r9   r  r   r  rN   r   r  )r  r   r   rG  partitioning_readr3  r   r   r   !test_write_table_partitioned_dict  s(   rQ  c              	   C   s  t jt jtdddt tjdddddt tdd	gd
gg dd}| d }tj	||dd t
|d}|d g}t|t|ksJJ tj|dd }||sZJ dD ]w}t }|j|d}dt|v spJ | d|  }tj	||||d t|d }	|dkrdnd}
|	j|
ksJ tj|dd }|j}|dkr|d|dt  }|dv r|d|dt d}||}||sJ q\d S )Nr  r  rv   r  zdatetime64[D]r  zdatetime64[ns]rW   rX   r   r  r  rh  r   r  r  part-0.parquet)1.02.42.6rK  z(<pyarrow.dataset.ParquetFileWriteOptionsparquet_dataset_versionrS  rS  rU  r   )rS  rT  r   r  )r;   r   r}   r,   r  r  r  r  r   r  ri   rz  r   rN   r   r  r   r  r  rm   read_metadataformat_versionr9   r<   	with_typer>   r  r  )r  r   rG  r  r  r3  rL  r   optsmetaexpected_versionr9   ro  r   r   r   test_write_dataset_parquet3  sD   	

r]  c                 C   s  t jt tdt dd tdD t dgd dgd  gg dd}| d	 }tj||d
d t|d}|d g}t|t|ksHJ tj	|d
d
 }||sXJ tjtjj|jjdd}|jdd}| d }tj||||d tj	||d
 }||sJ d S )Nr  c                 s   r  r   r  r  r   r   r   rx  b  ry  z)test_write_dataset_csv.<locals>.<genexpr>rW   r   rX   )r  r  chr1r  csv_datasetr  r  r  z
part-0.csvr%  r  F)include_headercsv_dataset_noheaderrS  )r;   r   r}   r,   r   r  ri   rz  r   rN   r   r  r  r   r  r  r9   r  r  )r  r   rG  r  r  r3  r   rZ  r   r   r   test_write_dataset_csv`  s*   "


rb  c                    s   t jt tdt dd tdD t dgd dgd  gg dd}d	  fd
d}| d }tj||d|d  s?J d S )Nr  c                 s   r  r   r  r  r   r   r   rx  }  ry  z:test_write_dataset_parquet_file_visitor.<locals>.<genexpr>rW   r   rX   r  r  Fc                    s&   | j d ur| j jdkrd d S d S d S )NrU   T)r  rM  r%  visitor_calledr   r   r)    s
   
z=test_write_dataset_parquet_file_visitor.<locals>.file_visitorrh  r   )r   r)  )r;   r   r}   r,   r   r  )r  r   r)  rG  r   rc  r   'test_write_dataset_parquet_file_visitorz  s   "
re  c           	         s   dd t dD }dd t dD }t||dgd dgd  d}| d	 }tjtd
t fgdd}g  d  fdd}tj||d|d|d |d d |d d h}tt	t
j }||ksfJ d uslJ jdkssJ d S )Nc                 S   s    g | ]}|gd  D ]}|q	qS r   r   r[   r   r8  r   r   r   r]     s     z?test_partition_dataset_parquet_file_visitor.<locals>.<listcomp>rz   c                 S   s$   g | ]}|gd  D ]}|d  q	qS r   r   rf  r   r   r   r]     s   $ rW   r  rX   r  r  r   r   r~  c                    s   | j r| j  | j d S r   )r  r-   rr   r%  r(  sample_metadatar   r   r)    s   zAtest_partition_dataset_parquet_file_visitor.<locals>.file_visitorr   Tr*  r  rR  r  ru   )r,   r;   r   r   r   r9   r@   r  r   rj   rK  rL  rM  )	r  f1_valsf2_valsr   rd  r   r)  r  r.  r   rg  r   +test_partition_dataset_parquet_file_visitor  s.   

rk  c                 C   sd   t dtjdddgi}|d jjdksJ tj|| dd t	| d }|d jjdks0J d S )NrW   r  zEurope/Brussels)tzr   r  rR  )
r;   r   r0   r  rw   rl  r   r  rm   r   )r  r   r3  r   r   r   (test_write_dataset_arrow_schema_metadata  s
   rm  c                 C   sb   ddl m} tdg di}|ddi}tj|| dd || d	 j}|j	ddiks/J d S )
Nr   r  rW   r  r     valuer  r  r+  )
r   r  r;   r   rC   r   r  r   r9   r  )r  r  r   r9   r   r   r   "test_write_dataset_schema_metadata  s   ro  c                 C   sV   t dg di}|ddi}tj|| dd t| d j}|jddiks)J d S )NrW   r  r  rn  r   r  rR  )	r;   r   rC   r   r  rm   r   r9   r  )r  r   r9   r   r   r   *test_write_dataset_schema_metadata_parquet  s
   rp  c                 C   sL  | \}}}}}}}}d ||||}tjttdtdd tdD tdgd dgd  gg dd	}tjtd
t fgdd}	tj	|d|d|	d tj
d|ddd }
|
|scJ | d}tj	||d|	d tj
d|ddd }
|
|sJ | d}tj	|d|d|	d tj
d|ddd }
|
|sJ d S )Nr  r  c                 s   r  r   r  r  r   r   r   rx    ry  z(test_write_dataset_s3.<locals>.<genexpr>rW   r   rX   r  r  r   r   r~  zmybucket/datasetr  r  r  zmybucket/dataset2r  r  r  zmybucket/dataset3)r   r;   r   r}   r,   r   r   r9   r@   r  rN   r   r  )r?  ri  rd   rA  rB  rC  rD  uri_templater   r   r3  r@  r   r   r   test_write_dataset_s3  sP   "


rr  aC  {
    "Version": "2012-10-17",
    "Statement": [
        {
            "Effect": "Allow",
            "Action": [
                "s3:PutObject",
                "s3:ListBucket",
                "s3:GetObjectVersion"
            ],
            "Resource": [
                "arn:aws:s3:::*"
            ]
        }
    ]
}c           	   	   C   s  ddl m} | d \}}}}t| tdd |dd| d| dd}tjttd	td
d td	D tdgd dgd  gg dd}tj	t
dt fgdd}tj|d|dd|dd tjd|ddd }||suJ tj|d|dd|dd tjd|ddd }||sJ tjtdd tj|d|dddd W d    n1 sw   Y  |d d| d| ddd!}tjtd"d tj|d|dddd W d    d S 1 sw   Y  d S )#Nr   )r  r  test_dataset_limited_user
limited123r  http)rC  rD  endpoint_overrideschemer  c                 s   r  r   r  r  r   r   r   rx  2  ry  z1test_write_dataset_s3_put_only.<locals>.<genexpr>rW   r   rX   r  r  r   r   r~  zexisting-bucketr  Fr  )r   r   rg   r   r  r  r  Tz&Bucket 'non-existing-bucket' not foundr   znon-existing-bucket)r   r   rg   r  limited)rC  rD  rv  rw  allow_bucket_creationz(Access Denied|ACCESS_DENIED))r   r  r   _minio_put_only_policyr;   r   r}   r,   r   r   r9   r@   r  rN   r   r  r  r  r>  )	r  r  rA  rB  ri  rd   r   r   r3  r   r   r   test_write_dataset_s3_put_only  s~   "	"r{  c              
   C   s   t dd d gi}t|| d  t t dt t  t  g}t	j
j| d g|t	 t d}||}|j|ks@J d S )NrW   r  )r   r9   r   r   )r;   r   rm   rn   r9   r<   ry   r   r@   r   r  r  r   rd   r   r   )r  r   r   r9   fsdsr   r   r   $test_dataset_null_to_dictionary_casti  s   
r}  c                 C   s   t g dg dd}tj|| d dd tj| d dd}t g dg dd	}tj|| d
 dd tj| d
 dd}||dd}| t g dg dg ddksZJ |j|dddd}| dt g dg dg ddks{J d S )Nr   ru   r  rW   rX   r  colAr  rh  r  r  c   ru   r   Zr  r  )colBcol3ri  r  r  r  r  Nr  r  r  
full outer)	join_typer   ru   r  r  rW   rX   r  Nr  r  Nr  r;   r   r   r  rN   r  r   r+  r  rh  ds1ri  ds2r3  r   r   r   test_dataset_join|  s0   
r  c                 C   s   t g dg dd}tj|| d dd tj| d dd}t g dg dd	}tj|| d
 dd tj| d
 dd}||d}| t g dg dg ddksYJ |j|dddd}| dt g dg dg ddkszJ d S )Nr~  r  r  rh  r  r  r  r  )r  r  ri  r  r  r  r  _rr  right_suffixr  r  r  r  r  r   r   r   test_dataset_join_unique_key  s0   
r  c                 C   s   t g dg dg dd}tj|| d dd tj| d dd}t g dg d	g d
d}tj|| d dd tj| d dd}|j|dddd}| dt jg dg dg dg dg dgg ddksnJ d S )Nr~  r   r  <   r  )r  r  colValsrh  r  r  r  r  r  r   r  ri  r  r  r  r  r  )r   r  r  Nr  )r   r  Nr  r  )r  r  r  colB_r	colVals_rr  r  r  r   r   r   test_dataset_join_collisions  s0   r  c                 C   s   t jg dg dd}tj|| d dd tj| d dd}t jg dg dg d	d
}tj|| d dd tj| d dd}|j|dddddd}| dt 	g dg dg ddksfJ d S )N)r   r   r   r  rL  )rW   rX   rW   rX   r  r  rh  r  r  )ru   rE  r  )rW   rX   g)r  r#  g      @)r  r  colCri  r  r  r   r  r  onby	toleranceright_onright_by)r  NNNN)r  r  r  )
r;   rA   from_pydictr   r  rN   	join_asofr   r+  r   r  r   r   r   test_dataset_join_asof  s,   r  c                 C   s   t g dg dg dd}tj|| d dd tj| d dd}t g dg d	g d
g dd}tj|| d dd tj| d dd}|j|dddgdd}| dt g dg dg dg ddksmJ d S )Nr~  r  r  )r  r  r  rh  r  r  r  r  r  r  )r  r  r  r  ri  r  r  r  r   r  r  r  )Nr  Nr  r  r  r  )r;   r   r   r  rN   r  r   r+  r  r   r   r   "test_dataset_join_asof_multiple_by  s0   r  c                 C   s   t dg di}tj|| d dd tj| d dd}t g dg dd}tj|| d	 dd tj| d	 dd}|j|dg d
d}| t g dg ddksVJ d S )Nr  r  rh  r  r  r  r  )r  r  ri  r   r  )r  r  r  )r  r  )r;   r   r   r  rN   r  r   r  r   r   r   test_dataset_join_asof_empty_by  s$   
r  c              	   C   s   t g dg dg dg dd}tj|| d dd tj| d dd}t g d	g d
g dg dg dd}tj|| d dd tj| d dd}d}tjt|d |j|dddgddddgd W d    d S 1 sqw   Y  d S )Nr~  r  r  r  r  rh  r  r  r  r  )r  r  rP  r  r  )r  r  colUniqr  r  ri  zXColumns {'colVals'} present in both tables. AsofJoin does not support column collisions.r   r  r  r  r   r  )	r;   r   r   r  rN   r  r  rF  r  )r  rh  r  ri  r  r  r   r   r   !test_dataset_join_asof_collisions(  s2   "r  dstyperd   memc                 C   s  t g dg dd}|dkr$tj|| d dd tj| d dd}n|dkr.t|}nt|td	d
k tddk}|dkrItj	ntj
}t||sSJ | t dgdgdkscJ |dt dgdgdkstJ |td	dk td	dkjtd	dkd}| t dgdgdksJ tj|| d dd tj| d dd}| t dgdgdksJ |jtt ddgddgdddd}| dt dd gddgddgdksJ tt |d  W d    n	1 sw   Y  tt |  W d    n	1 sw   Y  |jd}	|td	d
k |	}
|
 t d	ddgiksGJ tt j ||	  W d    d S 1 saw   Y  d S )Nr   ru   r  rI  rW   rX   r  r  r  rd   rh  r  r  r  r  rU   r  rW   r   r   rI  r  r   ru   rX   r  r   r  r  r  zright outerkeysr  r  )r  r  r  )r;   r   r   r  rN   r  r   r  r<   r  r  rH   r   r   r   r  r+  r  r  r  rF  rF   r9   ry  replace_schemarW  )r  r  rh  r  r3  ro  r2r  joinedschema_without_col2	newschemar   r   r   test_dataset_filterH  s   $




$r  c           
      C   s  t g dg dd}t g dg dd}|dkrCtj|| d dd	 tj| d dd	}tj|| d
 dd	 tj| d
 dd	}n|dkrRt|}t|}ntt||ftddk tddkB }|	 t g dg ddks|J |j
tt ddgddgdddd}|	 dt g dg dg ddksJ |tddk }|tddk }	tjtdd t||	f W d    d S 1 sw   Y  d S )Nr  r  r  )rE  r   rP  )hr7   lrd   rh  r  r  ri  r  r  rU   rE  )r   ru   rE  )rW   rX   r  r   r  rW   rX   r  r  z
left outerr  )r   r  N)r  r  r  zcurrently not supportedr   )r;   r   r   r  rN   r  r   r  r<   r   r  r+  r  r  rF  )
r  r  rh  ri  r  r  filtered_union_dsr  filtered_ds1filtered_ds2r   r   r   test_union_dataset_filter  sP   

"r  c                 C   s   | d }t |\}}t|}| }|jdksJ |tddk }| jdks-J t	t
 |  W d    d S 1 sBw   Y  d S )Ntest_parquet_dataset_filterrg  r  ru   r  )rf  r   rh  r   r   r   r  r<   r  r  rF  rF   )r  rd  re  ri  rN   r3  filtered_dsr   r   r   r    s   

"r  c                 C   s   t jt tdgdgd}t|}dtdi}|j|d}tj|| dgdd t	j
tdd	 tj|| dgdd W d
   d
S 1 sGw   Y  d
S )z
    Ensure the projected schema is used to validate partitions for scanner

    https://issues.apache.org/jira/browse/ARROW-17228
    r  original_columnr  renamed_columnr&   r  r  z0'Column original_column does not exist in schemar   N)r;   r   r}   r,   r   rN   r<   r   r  r  r  KeyError)r  r   table_datasetr'   r   r   r   r   4test_write_dataset_with_scanner_use_projected_schema  s    



"r  r   )r  r   c              
   C   s   |dkr	t d tddgddgd dddgdd	id gd
ddg dddigd
gd}tj|| d |d tj| d |d}|jg dd}| dd ddgd d	dd gddddg ddd dgddgkskJ d S )Nr   zpyarrow.parquetabc123qrs456r   ru   buttonr  r  )rw   elementvaluesstructsscrollwindow)NrU   rz   fizzbuzz)user_ida.dotted.fieldinteractionr   r  )r  zinteraction.typezinteraction.valueszinteraction.structsr  r&   )r  r  )r  rw   r  r  r  )	r  r  r;   r   r   r  rN   r   r^  )r  r   r   r  r   r   r   test_read_table_nested_columns  s2   



r  c                 C   s   ddl m} | d }tjtg dt tg dt gddg}|j||ddgd	d
 |j|dd	t	t
dt t
dt gd  }||dksWJ |d }tt|}dd |D }tt|}||ksxJ d S )Nr   r5  zslash-writer-xr   ru   rU   rz   r   )experiment/A/f.csvzexperiment/B/f.csvr  zexperiment/C/k.csvzexperiment/M/i.csvexp_idexp_metar  r   )r6   rG  r   r   r  )r  r   r   r9   r   c                 S   s   g | ]
}d t |dd qS )z	exp_meta=r  r  r   r1  r   r   r   r]   ,  r  z5test_dataset_partition_with_slash.<locals>.<listcomp>)r   rN   r;   rA   r   r}   r   r  r  r9   r<   r   r  r+  r'  r^  r-  r   r$  r)  )tmpdirr   rr   dt_tabler   r  encoded_pathsr  r   r   r   !test_dataset_partition_with_slash  sB   
r  c                 C   s   t t jdt  ddt jdt  ddg}g dg dg}t jj||d}t|| d	  tj	| d	 d
d}|
 j|sBJ tj|| d d
d tj	| d d
d}|
 j|s_J tj||g| d d
d tj	| d d
d}|
 j|s~J d S )Nr   F)nullabler  Tr  Nr   Nrc   	nulltest1r   r  	nulltest2	nulltest3)r;   r9   r<   r>   rA   r   rm   rt  r   rN   r   r  r  )r  schema_nullableri  r   rN   r   r   r   'test_write_dataset_preserve_nullability2  s   r  c                 C   sP  t t jdt  ddidt dt  g}t t dt  t dt  g}g dg dg}t jj||d}t jj||d}tj||g| d	 d
d tj| d	 d
d}|	 jj
|ddscJ tj||g| d d
d tj| d d
d}|	 jj
|ddsJ tj||g| d d
|d tj| d d
d}|	 jj
|ddsJ d S )Nr   s   foos   barr  r  r  r  rc   test1r   r  Tr  test2test3ry  )r;   r9   r<   r>   rA   r   r   r  rN   r   r  )r  schema_metadataschema_no_metari  r   table_no_metarN   r   r   r   *test_write_dataset_preserve_field_metadataJ  s,   r  c              
   C   s   dD ]n}dD ]i}t t dt  t dt  g}g dg dg}t jj||d}t }| d|  }tj||d|j	||d	d
d tj
|dd}|jD ]}	t|	}
|
dd}|j|u seJ |j||@ u snJ qOqqd S )N)TFr   r  r  r  rc   write_page_index_r   )write_statisticswrite_page_indexr  )r   rT  r  r  r   )r;   r9   r<   r>   rA   r   r   r   r  r  rN   r  rm   rW  r  r'  has_offset_indexhas_column_index)r  r  r  r9   ri  r   r   rG  r  r  r  ccr   r   r   #test_write_dataset_write_page_indexg  s:   


r  c                 C   s  t jt g dt g dgddgd}|dkr-tj|| d dd	 tj| d dd	}n|d
kr7t|}nt|d 	 g dg ddksMJ |dg 	 g dg ddksbJ |
tddk d 	 g dg ddks~J t jjt jg dt  dt g dgddgd}t|}|dg}| 	 }|d g dksJ |d g dksJ |dg}| 	 }|d g dksJ |d g dksJ d S )N)rU   r   rz   ru   r   )rX   rW   rX   rW   r|   r  r  r  rd   rh  r  r  r  )rW   rW   rX   rX   r|   r  )r  r  )r  
descending)r|   rX   rX   rW   rW   )r   rz   rU   ru   r   rz   )rW   rW   rX   r  )r   rL  rL  r  rv   )r  carr  foobarrW   rX   )rW   r  )r  rL  rL  r   )r  r  r  r  )rW   r  )r;   r   r}   r   r  rN   r  r+  r   r,  r   r  r<   rA   r   r>   )r  r  r   r   
sorted_tabsorted_tab_dictr   r   r   test_dataset_sort_by  sV   
r  c                 C   s  t dg di}t j }|jdd}| d }tj||||d tjdd}t jj|d}tj||d	 }||ks=J | d
 }t	|| t
| }	t|	dksTJ |	d }
t|
 }|d |d kshJ |d |d |d< |d< |
| tjdd}t jj|d}tj||d	 }||ksJ |t dg diksJ tjtdd tj||d	 }W d   dS 1 sw   Y  dS )zwCheck that checksum verification works for datasets created with
    ds.write_dataset and read with ds.dataset.to_tablerW   r  T)write_page_checksumcorrect_dir)r6   rG  r   rT  r  )default_fragment_scan_optionsr  corrupted_dirr   r      $   F)r   rU   ru   rz   zCRC checksum verificationr   N)r;   r   rN   r   r  r   r  r  r   r   ri   iterdirr   	bytearray
read_byteswrite_bytesr  r  r>  )r  
table_origpq_write_formatrY  original_dir_pathpq_scan_opts_crcpq_read_format_crctable_checkcorrupted_dir_pathcorrupted_file_path_listcorrupted_file_pathbin_datapq_scan_opts_no_crcpq_read_format_no_crctable_corruptri  r   r   r   1test_checksum_write_dataset_read_dataset_to_table  sn   


"r  c                  C   s   d} d}t t}tjjd W d    n1 sw   Y  | t|jv s0|t|jv s0J tj }d}t jt|d |d W d    d S 1 sOw   Y  d S )NzImake_write_options() should be called on an instance of ParquetFileFormatzqdescriptor 'make_write_options' for 'pyarrow._dataset_parquet.ParquetFileFormat' objects doesn't apply to a 'int'+   z;make_write_options\(\) takes exactly 0 positional argumentsr   )	r  r  r  r;   rN   r   r  rZ   r$   )msg_1msg_2excinfopformatr  r   r   r   test_make_write_options_error  s    
"r  c                 C   st   zdd l m} W n ty   td Y nw d}d}| j|j||j|d }|	 dddgiks8J d S )Nr   zsubstrait NOT enableds   
SOhttps://github.com/apache/arrow/blob/main/format/substrait/extension_types.yaml	
u64
	u32

str"i
i64
f64
str
const
struct
a
b
group
key7
:
Z
b
:

:
b
*
bs3  
/functions_comparison.yaml
SOhttps://github.com/apache/arrow/blob/main/format/substrait/extension_types.yamlequal:any1_any1	
u64
	u32

"
 "
("i
i64
f64
str
const
struct
a
b
group
key7
:
Z
b
:

:
b
*
brk  rZ   4)
pyarrow.substrait	substraitr  r  r  r   BoundExpressionsfrom_substraitr   r,  )rN   psr4  	filteringr3  r   r   r   test_scanner_from_substrait  s   

r   r  r   )rz   r   r   (	  r   r(   r$  rK  r-  r  sysr  rX  r  r  shutilr   urllib.parser   numpyr  r  r  r   r;   r  r  r  pyarrow.csvrM  r   rd   pyarrow.jsonpyarrow.libr   pyarrow.tests.utilr   r   r   r	   r
   r   r0   r  rN   r   pyarrow.parquetr   rm   mark
pytestmarkr   r8   rE   rQ   fixturero   r   r   r   r   r  r  r5  rB  rG  rN  ra  rj  rm  rp  rq  rv  r  r  parametrizerZ   tupler  r  r  r  r  r  r  r  r   r*  r0  s3rK  ra  rc  rp  ru  r{  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r#  r,  r.  r2  r5  r6  r7  r;  r>  rD  r  rH  rJ  rO  rR  rS  rV  rY  r]  rf  rl  rq  rr  rv  rx  rz  r  r  r  r  r  r  r  r  r  r  r  r  r?  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r
  r  r  r  r!  r&  r*  r/  r9  r<  r=  rG  rI  rN  r_  rf  ri  skipifplatformrl  rn  rt  ry  r}  r~  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r"  r#  r2  r:  rE  rG  rL  rO  rQ  r]  rb  re  rk  rm  ro  rp  rr  rz  r{  r}  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r  r   r   r   r   r   <module>   s  
"
#

 
3

.
G
B,

0



 ;
)

9'H
8
-
 
<


(
8*




)



## 
d
U%












	B

$



	9

B

B 9& /
'=#K0$#D&/$+#
/L

N-%
 0I