o
    .i                     @   sz   d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlm	Z	 dZ
dZg dZg dZd	efd
dZG dd deZdS )    N)ArgumentParser	Namespace)BaseDatasetsCLICommand)
get_loggerz><<<<<<< This should probably be modified because it mentions: z=======
>>>>>>>
)TextEncoderConfigByteTextEncoderSubwordTextEncoderencoder_configmaybe_build_from_corpus
manual_dir))z
tfds\.coredatasets)ztf\.io\.gfile\.GFileopen)ztf\.([\w\d]+)zdatasets.Value('\1'))ztfds\.features\.Text\(\)zdatasets.Value('string'))ztfds\.features\.Text\(zdatasets.Value('string'),)z+features\s*=\s*tfds.features.FeaturesDict\(zfeatures=datasets.Features()ztfds\.features\.FeaturesDict\(zdict()zThe TensorFlow Datasets AuthorszDThe TensorFlow Datasets Authors and the HuggingFace Datasets Authors)ztfds\.z	datasets.)zdl_manager\.manual_dirzself.config.data_dir)zself\.builder_configzself.configargsc                 C   s   t | j| jS )zz
    Factory function used to convert a model TF 1.0 checkpoint in a PyTorch checkpoint.

    Returns: ConvertCommand
    )ConvertCommand	tfds_pathdatasets_directory)r    r   M/home/ubuntu/.local/lib/python3.10/site-packages/datasets/commands/convert.pyconvert_command_factory*   s   r   c                   @   s8   e Zd ZedefddZdedefddZdd	 Zd
S )r   parserc                 C   sB   | j ddd}|jdtddd |jdtdd	d |jtd
 dS )z
        Register this command to argparse so it's available for the datasets-cli

        Args:
            parser: Root parser to register command-specific arguments
        convertzHConvert a TensorFlow Datasets dataset to a HuggingFace Datasets dataset.)helpz--tfds_pathTzQPath to a TensorFlow Datasets folder to convert or a single tfds file to convert.)typerequiredr   z--datasets_directoryz(Path to the HuggingFace Datasets folder.)funcN)
add_parseradd_argumentstrset_defaultsr   )r   train_parserr   r   r   register_subcommand4   s   z"ConvertCommand.register_subcommandr   r   c                 G   s   t d| _|| _|| _d S )Nzdatasets-cli/converting)r   _logger
_tfds_path_datasets_directory)selfr   r   r   r   r   r   __init__K   s   

zConvertCommand.__init__c              	      s>  t j| jrt j| j}nt j| jrt j| j}ntdt j| j}| j	
d| d|  g }g }i }t j| jrHt |}nt j| jg}|D ]w}| j	
d|  t j||}t j||}	t j|r~d|v s~d|v s~d|vr| j	
d qRt|d	d
}
|
 }W d    n1 sw   Y  g }d}d}g }|D ]}| d v rqd v rqd v rqd v rd nWd v rd qd v rd nId v r܈ dd n>t fddtD rd}tt fddt}|tt| d  |  |t qtD ]\}}t||  qd v r<td }|dd |d d!D  d"|d   d# v sKd$ v sKd v rTtd%    d& v r[d}|  q|sid'|v r|dd}t j||}t j||}	t j!|dd( | j	
d)|  |"t#$|| n||	 |r||	 t|	d*d	d
}
|
%| W d    n	1 sw   Y  | j	
d+|	  qR|D ]:}z#t j|}||dd }| j	
d,| d|  t&'|| W q t(y   | j	)d-| d. Y qw |r|D ]}| j	*d/| d0 qd S d S )1NzA--tfds_path is neither a directory nor a file. Please check path.zConverting datasets from z to zLooking at file r%   _testz.pyzSkipping filezutf-8)encodingFz!import tensorflow.compat.v2 as tfz
@tfds.corezbuilder=selfz-import tensorflow_datasets.public_api as tfdszimport datasets
zimport tensorflow zfrom absl import loggingzfrom datasets import logging
	getLoggerr   c                 3   s    | ]}| v V  qd S Nr   ).0
expressionout_liner   r   	<genexpr>       z%ConvertCommand.run.<locals>.<genexpr>Tc                    s   |  v S r*   r   )er-   r   r   <lambda>   s    z$ConvertCommand.run.<locals>.<lambda>
tensorflow_datasetsz/from\stensorflow_datasets.*import\s([^\.\r\n]+)c                 s   s    | ]}|  V  qd S r*   )strip)r+   impr   r   r   r/      r0      ,zfrom . import ztf.ztfds.zError converting GeneratorBasedBuilderwmt)exist_okzAdding directory wzConverted in zMoving z#Cannot find destination folder for z. Please copy manually.z!You need to manually update file z4 to remove configurations using 'TextEncoderConfig'.)+ospathisdirr"   abspathisfiledirname
ValueErrorr#   r!   infolistdirbasenamejoinr   	readlinesreplaceanyTO_HIGHLIGHTlistfilterappendHIGHLIGHT_MESSAGE_PREr   HIGHLIGHT_MESSAGE_POST
TO_CONVERTresubmatchextendgroupsplitr5   makedirsupdatedictfromkeys
writelinesshutilcopyKeyErrorerrorwarning)r$   abs_tfds_pathabs_datasets_pathutils_fileswith_manual_updateimports_to_builder_map
file_namesf_name
input_fileoutput_fileflines	out_lines
is_builderneeds_manual_updatetfds_importsline	to_removepatternreplacementrT   dir_name
output_dir
utils_filedest_folder	file_pathr   r-   r   runQ   s   
$



 



zConvertCommand.runN)	__name__
__module____qualname__staticmethodr   r    r   r%   rz   r   r   r   r   r   3   s
    r   )r=   rR   r]   argparser   r   datasets.commandsr   datasets.utils.loggingr   rO   rP   rK   rQ   r   r   r   r   r   r   <module>   s    		