o
    .i                     @   sv   d dl Z d dlmZ d dlmZ d dlZd dlZd dlm	Z	 ej
jeZeG dd dejZG dd dejZdS )	    N)	dataclass)Optional)
table_castc                       s4   e Zd ZU dZdZeej ed<  fddZ	  Z
S )ArrowConfigzBuilderConfig for Arrow.Nfeaturesc                    s   t    d S N)super__post_init__self	__class__ Y/home/ubuntu/.local/lib/python3.10/site-packages/datasets/packaged_modules/arrow/arrow.pyr	      s   zArrowConfig.__post_init__)__name__
__module____qualname____doc__r   r   datasetsFeatures__annotations__r	   __classcell__r   r   r   r   r      s   
 r   c                   @   s>   e Zd ZeZdd Zdd ZdejdejfddZ	d	d
 Z
dS )Arrowc                 C   s   t j| jjdS )N)r   )r   DatasetInfoconfigr   r
   r   r   r   _info   s   zArrow._infoc           	         s  | j jstd| j j d j_ | j j}g }| D ]j\}}t|tr*|g} fdd|D }| j	j
du r|tj|D ]<}t|d$}ztj|}W n ttjjfyb   tj|}Y nw W d   n1 smw   Y  tj|j| j	_
 |tj|d|id q|S )	z-We handle string, list and dicts in datafilesz=At least one data file must be specified, but got data_files=Tc                    s   g | ]}  |qS r   )
iter_files).0file
dl_managerr   r   
<listcomp>)   s    z+Arrow._split_generators.<locals>.<listcomp>Nrbfiles)name
gen_kwargs)r   
data_files
ValueErrordownload_configextract_on_the_flydownload_and_extractitems
isinstancestrinfor   	itertoolschainfrom_iterableopenpaipcopen_streamOSErrorlibArrowInvalid	open_filer   r   from_arrow_schemaschemaappendSplitGenerator)	r   r    r&   splits
split_namer#   r   freaderr   r   r   _split_generators   s.   
zArrow._split_generatorspa_tablereturnc                 C   s    | j jd urt|| j jj}|S r   )r.   r   r   arrow_schema)r   rC   r   r   r   _cast_table7   s   zArrow._cast_tablec           
      #   s   t tj|D ]~\}}t|dm}zGztj|}W n ttj	j
fy;   tj|  fddt jD }Y nw t |D ]\}}tj|g}| d| | |fV  q@W n tyw }	 ztd| dt|	 d|	   d }	~	ww W d    n1 sw   Y  q	d S )Nr"   c                 3   s    | ]}  |V  qd S r   )	get_batch)r   irA   r   r   	<genexpr>F   s    z)Arrow._generate_tables.<locals>.<genexpr>_zFailed to read file 'z' with error z: )	enumerater/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   rangenum_record_batchesTablefrom_batchesrF   r'   loggererrortype)
r   r#   file_idxr   r@   batches	batch_idxrecord_batchrC   er   rI   r   _generate_tables>   s.    zArrow._generate_tablesN)r   r   r   r   BUILDER_CONFIG_CLASSr   rB   r3   rO   rF   rY   r   r   r   r   r      s    r   )r/   dataclassesr   typingr   pyarrowr3   r   datasets.tabler   utilslogging
get_loggerr   rQ   BuilderConfigr   ArrowBasedBuilderr   r   r   r   r   <module>   s    	