o
    8wi                     @   s   d dl Z d dlmZ d dlmZ d dlZd dlZd dlm	Z	 d dl
mZ ejjeZeG dd dejZG dd	 d	ejZdS )
    N)	dataclass)Optionalrequire_storage_cast)
table_castc                   @   s@   e Zd ZU dZdZeej ed< dZ	e
ed< dZee
 ed< dS )	XmlConfigzBuilderConfig for xml files.Nfeatureszutf-8encodingencoding_errors)__name__
__module____qualname____doc__r   r   datasetsFeatures__annotations__r	   strr
    r   r   ^/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/datasets/packaged_modules/xml/xml.pyr      s
   
 r   c                   @   s>   e Zd ZeZdd Zdd ZdejdejfddZ	d	d
 Z
dS )Xmlc                 C   s   t j| jjdS )N)r   )r   DatasetInfoconfigr   )selfr   r   r   _info   s   z	Xml._infoc                    s   | j jstd| j j d j_ | j j}g }| D ]!\}}t|tr*|g} fdd|D }|	t
j|d|id q|S )a  The `data_files` kwarg in load_dataset() can be a str, List[str], Dict[str,str], or Dict[str,List[str]].

        If str or List[str], then the dataset returns only the 'train' split.
        If dict, then keys should be from the `datasets.Split` enum.
        z=At least one data file must be specified, but got data_files=Tc                    s   g | ]}  |qS r   )
iter_files).0file
dl_managerr   r   
<listcomp>,   s    z)Xml._split_generators.<locals>.<listcomp>files)name
gen_kwargs)r   
data_files
ValueErrordownload_configextract_on_the_flydownload_and_extractitems
isinstancer   appendr   SplitGenerator)r   r   r#   splits
split_namer    r   r   r   _split_generators   s   
zXml._split_generatorspa_tablereturnc                 C   sd   | j jd ur&| j jj}tdd | j j D r||}|S t||}|S |tdt	 iS )Nc                 s   s    | ]}t | V  qd S )Nr   )r   featurer   r   r   	<genexpr>3   s    z"Xml._cast_table.<locals>.<genexpr>xml)
r   r   arrow_schemaallvaluescastr   paschemastring)r   r/   r9   r   r   r   _cast_table0   s   


zXml._cast_tablec              	   c   s    | j jd urt| j jndg}ttj|D ]8\}}t|| j j| j j	d!}|
 }tjjt|gg|d}|| |fV  W d    n1 sKw   Y  qd S )Nr3   )r	   errors)names)r   r   list	enumerate	itertoolschainfrom_iterableopenr	   r
   readr8   Tablefrom_arraysarrayr;   )r   r    pa_table_namesfile_idxr   fr3   r/   r   r   r   _generate_tables=   s   zXml._generate_tablesN)r   r   r   r   BUILDER_CONFIG_CLASSr   r.   r8   rE   r;   rK   r   r   r   r   r      s    r   )r@   dataclassesr   typingr   pyarrowr8   r   datasets.features.featuresr   datasets.tabler   utilslogging
get_loggerr   loggerBuilderConfigr   ArrowBasedBuilderr   r   r   r   r   <module>   s    