o
    'i'G                     @   s"  d dl Z d dlZd dlmZ zd dlmZ d dlmZm	Z	m
Z
mZ W n ey-   dZY nw zd dlZd dlmZ W n eyG   d ZZY nw e jjZe jjdd Zdd Zdd	 Ze jjd
d Ze jjdd Ze jje jdde gdd Ze jje jjdd Ze jje jdde gdd Ze jje jjdd Z e jje jjdd Z!e jje jjdd Z"e jj#dd Z$dd Z%dd  Z&d!d" Z'd#d$ Z(e jd%ej)d&fej)d'fej*d(fej*d)fge jd*d+d,gd-d. Z+e jd/g d0d1d2 Z,d3d4 Z-dS )5    N)fs)_read_table_test_dataframe_test_table_range_integersc           
      C   s   t d}d|d< tjj|dd}t }tj||jdd}g }tdD ]}||d< tjj|dd}|	| |
|  q#|  | }tt|}tj|d	d
}	t| |	 d S Nd   r   	unique_idFpreserve_index2.6version
   Tignore_index)r   paTablefrom_pandasBufferOutputStreampqParquetWriterschemarangewrite_tableappendcopyclosegetvaluer   BufferReaderpdconcattmassert_frame_equal	to_pandas
tempdirdfarrow_tableoutwriterframesibufresultexpected r0   g/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/pyarrow/tests/parquet/test_parquet_writer.py#test_parquet_incremental_file_build,   s    
r2   c              	   C   s   t dt  t dt  g}t |}t dgt dgg}t j|ddg}| d }tj	||dddd	+}t
t || W d    n1 sNw   Y  W d    d S W d    d S 1 sfw   Y  d S )
NPOSdesc   blazsimple_validate_schema.parquetr   snappyspark)r   compressionflavor)r   fielduint32stringr   arrayr   from_arraysr   r   pytestraises
ValueErrorr   )r&   simple_fieldssimple_schemasimple_from_arraysimple_tablepathwr0   r0   r1    test_validate_schema_write_tableG   s$   
"rI   c                 C   s   t t ttdt g}td | W d    n1 s"w   Y  t t t| d d  W d    d S 1 s@w   Y  d S )Nx	some_path)	r@   rA   	TypeErrorr   r   r;   int32r   r   )r&   some_schemar0   r0   r1   test_parquet_invalid_writer]   s   "rO   c           
      C   s   t d}d|d< tjj|dd}t }tj||jdd)}g }tdD ]}||d< tjj|dd}|	| |
|  q$W d    n1 sIw   Y  | }tt|}tj|d	d
}	t| |	 d S r   )r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r0   r0   r1   test_parquet_writer_context_objg   s"   

rP   c              
   C   s"  t d}d|d< tjj|dd}t }d}zDtj||jdd1}g }td	D ]"}||d< tjj|dd}|	| |
|  |d
krIt|q'W d    n1 sTw   Y  W n tyt } zt||ksjJ W Y d }~nd }~ww | }	tt|	}
tj|dd}t|
 | d S )Nr   r   r	   Fr
   zArtificial Errorr   r   r      Tr   )r   r   r   r   r   r   r   r   r   r   r   r   rB   	Exceptionstrr   r   r   r    r!   r"   r#   r$   )r&   r'   r(   r)   
error_textr*   r+   r,   er-   r.   r/   r0   r0   r1   .test_parquet_writer_context_obj_with_exception   s>   
rV   
filesystemc           	      C   s  t d}tjj|dd}tjj|dd}t| d }t| d }tj||j|dd}|	| W d    n1 s:w   Y  t
| }t|| tj||j|dd}|| W d    n1 sew   Y  t
| }t|| tj||j|dd}|| W d    n1 sw   Y  t
| }t|| tj||j|dd}|| W d    n1 sw   Y  t
| }t|| d S )Nr   Fr
   zdata_table.parquetzdata_batch.parquetr   rW   r   )r   r   r   r   RecordBatchrS   r   r   r   r   r   r$   r"   r#   write_batchwrite)	r&   rW   r'   tablebatch
path_table
path_batchr*   r.   r0   r0   r1   "test_parquet_writer_write_wrappers   sJ   



r`   c                    s   dd  fdd}|d d d |d d |d d d |d d d | d  d d |d d |d d d d S )	Ni   i   c           	         s   t jjt| dgdgd}|d u rt|d  n
tj|d |d td }|d u r1n|}|j|ks:J t| }t	|d D ]}|
|j|ksQJ qE| ||d   }|dkrl|
|d j|ksjJ d S |
|d j|ksxJ d S )NbrJ   )namesztest.parquet)row_group_sizer5   r   )r   r   r?   r   r   r   read_metadatanum_row_groupsminr   	row_groupnum_rows)		data_size
chunk_sizeexpect_num_chunksr\   metadataexpected_chunk_sizelatched_chunk_size	chunk_idx	remainderabs_max_chunk_sizedefault_chunk_sizer&   r0   r1   check_chunk_size   s,   
z8test_parquet_writer_chunk_size.<locals>.check_chunk_size   r      r5   r0   )r&   rt   r0   rq   r1   test_parquet_writer_chunk_size   s   rw   c                 C   s~   t d}tjj|dd}t| d }tj||j|dd}|| W d    n1 s,w   Y  t	|
 }t|| d S )Nr   Fr
   zdata.parquetr   rX   )r   r   r   r   rS   r   r   r   r   r   r$   r"   r#   )r&   rW   r'   r\   rG   r*   r.   r0   r0   r1   $test_parquet_writer_filesystem_local   s   
rx   c                 C   s|   t d}tjj|dd}| \}}}tj||j|dd}|| W d    n1 s+w   Y  t|	 }t
|| d S )Nr   Fr
   r   rX   r   r   r   r   r   r   r   r   r   r$   r"   r#   s3_example_fsr'   r\   r   urirG   r*   r.   r0   r0   r1   !test_parquet_writer_filesystem_s3  s   

r}   c                 C   s~   t d}tjj|dd}| \}}}tj||jdd}|| W d    n1 s*w   Y  t||d	 }t
|| d S )Nr   Fr
   r   r   rW   ry   rz   r0   r0   r1   %test_parquet_writer_filesystem_s3_uri  s   
r   c                 C   s   t d}tjj|dd}| \}}|d }tj||j|dd}|| W d    n1 s.w   Y  t||d	 }t
|| d S )Nr   Fr
   z/test.parquetr   rX   r~   ry   )s3_example_s3fsr'   r\   r   	directoryrG   r*   r.   r0   r0   r1   #test_parquet_writer_filesystem_s3fs+  s   
r   c                  C   sZ   t d} t }tjtdd tjt	 | j
|d W d    d S 1 s&w   Y  d S )Nr   zspecified path is file-likematchr~   )r   r   LocalFileSystemr@   rA   rB   r   r   r   r   r   )r\   rW   r0   r0   r1   ,test_parquet_writer_filesystem_buffer_raises=  s   "r   c                 C   s   t dg di}| d }t||j}|| W d    n1 s$w   Y  t|}d|jv s5J |jd s<J | d }tj||jdd}|| W d    n1 sYw   Y  t|}|jd u sjJ d S )Na)r5   ru   rv   ztest_with_schema.parquets   ARROW:schemaztest_without_schema.parquetF)store_schema)r   r\   r   r   r   r   rd   rl   )r&   r\   path1r*   metapath2r0   r0   r1    test_parquet_writer_store_schemaI  s   

r   c                 C   s   t jt jg ddgdg}| d }t||j}|| |ddd |dd	d
 W d    n1 s9w   Y  t	|}|j
j
}|d dksOJ |d dksWJ |d dks_J d S )NrM   typef0zmetadata.parquet1rJ   )key1key223)r   key3s   key1   1s   key2   2s   key3   3)r   r   r?   r>   r   r   r   r   add_key_value_metadataParquetFilerl   )r&   r\   rG   r*   readerrl   r0   r0   r1   -test_parquet_writer_append_key_value_metadata^  s   

r   c                 C   sp  t dtdi}tj|| d ddd tj|| d dddd	 tj|| d
 dddddd	 t| d }t| d }t| d
 }||sKJ ||sRJ t| d }t| d }t| d
 }|j|jksoJ |j|jkswJ t|jD ]9}|	|}	|	|}
|	|}|	j
|
j
ksJ |	j
|j
ksJ |	j|
jk sJ |	j|jk sJ |
j|jk sJ q|d S )Nr   i zunchunked.parquetFPLAIN)use_dictionarycolumn_encodingzchunked-default.parquetT)r   r   use_content_defined_chunkingzchunked-custom.parquet      min_chunk_sizemax_chunk_size)r   r\   r   r   r   
read_tableequalsrd   re   rg   rh   total_byte_size)r&   r\   	unchunkedchunked_defaultchunked_customunchunked_metadatachunked_default_metadatachunked_custom_metadatar,   rg_unchunkedrg_chunked_defaultrg_chunked_customr0   r0   r1   %test_parquet_content_defined_chunkingm  sJ   


r   c              	   C   s   t dtdi}| d }d}tjt|d ddd}tj|||d	 W d    n1 s.w   Y  dd
ddfddidfddidfg}|D ]#\}}tjt|d tj|||d	 W d    n1 sew   Y  qGtj||d
d	 ddd}tj|||d	 dddd}tj|||d	 d S )Nr   r   zchunked-invalid.parquetz2max_chunk_size must be greater than min_chunk_sizer   r   r   r   )r   T)r   unknown_optionzEUnknown options in 'use_content_defined_chunking': {'unknown_option'}r   zEMissing options in 'use_content_defined_chunking': {'max_chunk_size'}r   zEMissing options in 'use_content_defined_chunking': {'min_chunk_size'}r5   )r   r   
norm_level)	r   r\   r   r@   rA   rR   r   r   rB   )r&   r\   rG   msgcdc_optionscasesr0   r0   r1   0test_parquet_content_defined_chunking_parameters  s6   

r   ztime_type, time_unitsmsusnsutc_flag_valFTc           
      C   s   | d }g d}t dt j|||di}t d||fg}tj|||d}|| W d    n1 s8w   Y  tj||d}	|	jdd |		|sQJ d S )	Nztime_adjusted_to_utc.parquet)r   {   '  iQ time_colr   )wherer   write_time_adjusted_to_utcr   Tfull)
r   r\   r>   r   r   r   r   r   validater   )
r&   r   	time_type	time_unitfilenametime_valuesr\   r   r*   r.   r0   r0   r1   ,test_arrow_writer_props_time_adjusted_to_utc  s&   
r   max_rows_per_page)r5   r   r     Nc                 C   s   | d }t t jg dt  dt jg dt  dd}t dt  fdt  fg}tj|||d}|| W d    n1 sEw   Y  tj	||d	}|j
d
d ||s^J d S )Nzmax_rows_per_page.parquet)r5   ru   rv      rQ         r   )g      &@g      (@g      *@g      ,@g      .@g      0@g      1@)rJ   yrJ   r   r   r   r   r   Tr   )r   r\   r>   int8float16r   r   r   r   r   r   r   )r&   r   r   r\   r   r*   r.   r0   r0   r1   #test_writer_props_max_rows_per_page  s(   

r   c              	   C   s   t dt tdi}t }g }dD ]0}|  d| d}tj||j|d}|	| W d    n1 s7w   Y  |
|| q|d j|d jksQJ d S )	NrJ   i@B )r   r   z/max_rows_per_page_z.parquetr   r   r5   )r   r\   r>   r   r   r   r   r   r   r   r   get_file_infosize)r&   r\   local
file_infosmax_rowsrG   r*   r0   r0   r1   -test_writer_props_max_rows_per_page_file_size  s"   r   ).r@   pyarrowr   r   pyarrow.parquetparquetr   pyarrow.tests.parquet.commonr   r   r   r   ImportErrorpandasr    pandas.testingtestingr"   mark
pytestmarkr2   rI   rO   rP   rV   parametrizer   r`   large_memoryrw   rx   s3r}   r   r   numpyr   r   r   r   r   time32time64r   r   r   r0   r0   r0   r1   <module>   s   



()
.)!
