o
    .i                     @   s   d dl Z d dlmZmZmZ d dlZd dlmZ ddl	m
Z
mZmZmZ ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZ G dd deZG dd dZdS )    N)BinaryIOOptionalUnion   )DatasetFeatures
NamedSplitconfig)get_writer_batch_size)query_table)_PACKAGED_DATASETS_MODULES)Parquet)tqdm)NestedDataStructureLikePathLike   )AbstractDatasetReaderc                       s`   e Zd Z						ddee dee dee dede	de	d	ee
 f fd
dZdd Z  ZS )ParquetDatasetReaderNFpath_or_pathssplitfeatures	cache_dirkeep_in_memory	streamingnum_procc           
   	      sd   t  j|f||||||d| t|tr|n| j|i}td d }	td||||	d|| _d S )N)r   r   r   r   r   r   parquetr   )r   
data_filesr   hash )super__init__
isinstancedictr   r   r   builder)
selfr   r   r   r   r   r   r   kwargsr   	__class__r   G/home/ubuntu/.local/lib/python3.10/site-packages/datasets/io/parquet.pyr       s,   
zParquetDatasetReader.__init__c                 C   s\   | j r| jj| jd}|S d }d }d }d }| jj||||| jd | jj| j|| jd}|S )N)r   )download_configdownload_modeverification_mode	base_pathr   )r   r+   	in_memory)r   r#   as_streaming_datasetr   download_and_preparer   
as_datasetr   )r$   datasetr)   r*   r+   r,   r   r   r(   read1   s$   
zParquetDatasetReader.read)NNNFFN)__name__
__module____qualname__r   r   r   r   r   strboolintr    r2   __classcell__r   r   r&   r(   r      s.    r   c                
   @   s`   e Zd Z		ddedeeef dee dee	 fddZ
defd	d
ZdededefddZdS )ParquetDatasetWriterNr1   path_or_buf
batch_sizestorage_optionsc                 K   s0   || _ || _|pt|j| _|pi | _|| _d S )N)r1   r;   r
   r   r<   r=   parquet_writer_kwargs)r$   r1   r;   r<   r=   r>   r   r   r(   r    J   s
   

zParquetDatasetWriter.__init__returnc                 C   s   | j r| j ntj}t| jtttjfr@t	j
| jdfi | jpi }| jd||d| j}W d    |S 1 s9w   Y  |S | jd| j|d| j}|S )Nwb)file_objr<   r   )r<   r	   DEFAULT_MAX_BATCH_SIZEr!   r;   r6   bytesosr   fsspecopenr=   _writer>   )r$   r<   bufferwrittenr   r   r(   writeX   s   
zParquetDatasetWriter.writerA   c           
      K   s   d}| dd}| jjj}tj|fd|i|}ttdt| j|dddD ]}t	| jj
t||| | jjd}	||	 ||	j7 }q&|  |S )	zWrites the pyarrow table as Parquet to a binary file handle.

        Caller is responsible for opening and closing the handle.
        r   r;   Nschemabaz"Creating parquet from Arrow format)unitdesc)tablekeyindices)popr1   r   arrow_schemapqParquetWriterhf_tqdmrangelenr   _dataslice_indiceswrite_tablenbytesclose)
r$   rA   r<   r>   rI   _rK   writeroffsetbatchr   r   r(   rG   b   s$   


zParquetDatasetWriter._write)NN)r3   r4   r5   r   r   r   r   r   r8   r"   r    rJ   rG   r   r   r   r(   r:   I   s    


r:   ) rD   typingr   r   r   rE   pyarrow.parquetr   rT    r   r   r   r	   arrow_writerr
   
formattingr   packaged_modulesr    packaged_modules.parquet.parquetr   utilsr   rV   utils.typingr   r   abcr   r   r:   r   r   r   r(   <module>   s    8