o
    .i)                     @   s   d dl Z d dlZd dlmZmZmZ d dlZddlmZm	Z	m
Z
mZ ddlmZ ddlmZ ddlmZ ddlmZmZ d	d
lmZ G dd deZG dd dZdS )    N)BinaryIOOptionalUnion   )DatasetFeatures
NamedSplitconfig)query_table)Json)tqdm)NestedDataStructureLikePathLike   )AbstractDatasetReaderc                       sj   e Zd Z							ddee dee dee dede	de	d	ee d
ee
 f fddZdd Z  ZS )JsonDatasetReaderNFpath_or_pathssplitfeatures	cache_dirkeep_in_memory	streamingfieldnum_procc	           
   	      s^   t  j|f||||||d|	 || _t|tr|n| j|i}td||||d|	| _d S )N)r   r   r   r   r   r   )r   
data_filesr   r    )super__init__r   
isinstancedictr   r   builder)
selfr   r   r   r   r   r   r   r   kwargs	__class__r   D/home/ubuntu/.local/lib/python3.10/site-packages/datasets/io/json.pyr      s,   
zJsonDatasetReader.__init__c                 C   s\   | j r| jj| jd}|S d }d }d }d }| jj||||| jd | jj| j|| jd}|S )N)r   )download_configdownload_modeverification_mode	base_pathr   )r   r(   	in_memory)r   r    as_streaming_datasetr   download_and_preparer   
as_datasetr   )r!   datasetr&   r'   r(   r)   r   r   r%   read0   s$   
zJsonDatasetReader.read)NNNFFNN)__name__
__module____qualname__r   r   r   r   r   strboolintr   r/   __classcell__r   r   r#   r%   r      s4    	 r   c                   @   sn   e Zd Z			ddedeeef dee dee dee	 f
ddZ
d	efd
dZdd Zded	efddZdS )JsonDatasetWriterNr.   path_or_buf
batch_sizer   storage_optionsc                 K   s\   |d ur|dkrt d| d|| _|| _|r|ntj| _|| _d| _|p'i | _|| _	d S )Nr   z	num_proc z must be an integer > 0.zutf-8)

ValueErrorr.   r8   r	   DEFAULT_MAX_BATCH_SIZEr9   r   encodingr:   to_json_kwargs)r!   r.   r8   r9   r   r:   r>   r   r   r%   r   I   s   	

zJsonDatasetWriter.__init__returnc                 C   sV  | j dd }| j dd}| j d|dkrdnd}d| j vr)|dv r)d| j d< t| jtttjfr5d	nd }| j d
|}|dvrJtd| d|sW| j	| j
jk rWtdt| jtttjfrtj| jdfd
|i| jpni }| jd|||d| j }W d    |S 1 sw   Y  |S |rtd| d| jd| j||d| j }|S )Nr8   orientrecordslinesTFindex)r   tableinfercompression)NrE   gzipbz2xzz&`datasets` currently does not support z compressionzOutput JSON will not be formatted correctly when lines = False and batch_size < number of rows in the dataset. Use pandas.DataFrame.to_json() instead.wb)file_objr@   rB   zUThe compression parameter is not supported when writing to a buffer, but compression=z1 was passed. Please provide a local path instead.r   )r>   popr   r8   r3   bytesosr   NotImplementedErrorr9   r.   num_rowsfsspecopenr:   _write)r!   _r@   rB   default_compressionrF   bufferwrittenr   r   r%   write]   sB   


zJsonDatasetWriter.writec                 C   sh   |\}}}}t | jjt||| j | jjd}| jdd ||d|}|ds.|d7 }|	| j
S )N)rD   keyindices)r8   r@   rB   
r   )r
   r.   dataslicer9   _indices	to_pandasto_jsonendswithencoder=   )r!   argsoffsetr@   rB   r>   batchjson_strr   r   r%   _batch_json~   s   
zJsonDatasetWriter._batch_jsonrK   c              
      s  d}| j du s| j dkr0ttdt| j| jdddD ]}| | f}|||7 }q|S t| j| j}}	t	| j 9}
t|

| j fddtd||	D ||	 r^||	 d n||	 ddd	D ]	}|||7 }qfW d   |S 1 s{w   Y  |S )
zWrites the pyarrow table as JSON lines to a binary file handle.

        Caller is responsible for opening and closing the handle.
        r   Nr   bazCreating json from Arrow format)unitdescc                    s   g | ]}| fqS r   r   ).0rd   rB   r@   r>   r   r%   
<listcomp>   s    z,JsonDatasetWriter._write.<locals>.<listcomp>)totalri   rj   )r   hf_tqdmrangelenr.   r9   rg   rX   multiprocessingPoolimap)r!   rK   r@   rB   r>   rW   rd   rf   rP   r9   poolr   rl   r%   rS      s6   

	
zJsonDatasetWriter._write)NNN)r0   r1   r2   r   r   r   r   r   r5   r   r   rX   rg   rS   r   r   r   r%   r7   H   s,    

!r7   )rr   rN   typingr   r   r   rQ    r   r   r   r	   
formattingr
   packaged_modules.json.jsonr   utilsr   ro   utils.typingr   r   abcr   r   r7   r   r   r   r%   <module>   s    9