o
    5ti                     @   s  d Z ddlZddlZddlZddlZddlZddlZddlZddlZddl	m
Z
 ddlmZ ddlmZmZmZmZmZ ddlmZ ddlmZmZ ddlmZ dd	lmZmZ dd
lmZ ddlm Z  ddl!m"Z"m#Z# ddl$m%Z% ddl&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z- ddl.m/Z/ e/e0Z1e2e3 dg Z4e#j5dfde6deeee6f  fddZ7deeee ee% f  fddZ8dee6 de6fddZ9de6dee6ee6 f fddZ:de6de6fd d!Z;d"e6dee6e6e6e6f fd#d$Z<de6d%e6d&ee6e6e6e6f d'ee deee6e6f  f
d(d)Z=de6d*e6d+e6d,e6d-eee6e6f  d.eee6e6f  d/ee de6fd0d1Z>d2e6d-eee6e6f  d.eee6e6f  d3e6d4e6de6d/edee6e6f fd5d6Z?e
G d7d8 d8Z@G d9d: d:ZAG d;d< d<eAZBG d=d> d>eAZCG d?d@ d@eAZD						dPdAe6dBee6 dCeee6e f  d'ee d/ee dDee6 d3ee6 de@fdEdFZE							G			dQdAe6dHee6 dBee6 dIeFdJeFdKee6 dLee6 dMeGd'ee d/ee dCeee6e f  de%fdNdOZHdS )RzAccess datasets.    N)	dataclass)Path)ListOptionalTupleTypeUnion)urlparse)DownloadConfigDownloadMode)DatasetBuilder)_EXTENSION_TO_MODULE_hash_python_lines)FileLock)Version   )SCRIPTS_VERSIONconfig)EvaluationModule)cached_path
head_hf_s3
hf_hub_urlinit_hf_modulesis_relative_pathrelative_to_absolute_pathurl_or_path_join)
get_loggerzipnamehf_modules_cachec                 C   st   t |}tj|| }tj|dd tjtj|ds8ttj|dd	 W d   |S 1 s3w   Y  |S )a]  
    Create a module with name `name` in which you can add dynamic modules
    such as metrics or datasets. The module can be imported using its name.
    The module is created in the HF_MODULE_CACHE directory by default (~/.cache/huggingface/modules) but it can
    be overriden by specifying a path to another directory in `hf_modules_cache`.
    Texist_ok__init__.pywN)r   ospathjoinmakedirsexistsopen)r   r   dynamic_modules_path r+   D/home/ubuntu/.local/lib/python3.10/site-packages/evaluate/loading.pyinit_dynamic_modules8   s   	
r-   returnc                 C   sT   t | }t}d}|j D ]\}}t|tr't||r't	|r"q|} |S q|S )zMImport a module at module_path and return its main class, a Metric by defaultN)
	importlibimport_moduler   __dict__items
isinstancetype
issubclassinspect
isabstract)module_pathmodulemain_cls_typemodule_main_clsr   objr+   r+   r,   import_main_classJ   s   

 r=   
file_pathsc              	   C   s   g }| D ]}t j|r|tt|d q|| qg }|D ]}t|dd}||	  W d   n1 s=w   Y  q#t
|S )zt
    Convert a list of scripts or text files provided in file_paths into a hashed filename in a repeatable way.
    z
*.[pP][yY]utf-8encodingN)r$   r%   isdirextendlistr   rglobappendr)   	readlinesr   )r>   to_use_files	file_pathlinesfr+   r+   r,   files_to_hash[   s   rL   url_pathc                 C   s   t | }d}|jdv r[|jdkr[d| v r+| ds!td|  d| dd} | |fS |jd	d }d
|v r;|d
n|df\}}|d\}}d| d| d| d} | d| }| |fS )zMConvert a link to a file on a github repo in a link to the raw github object.N)httphttpss3z
github.comblob.pyzExternal import from github at z) should point to a file ending with '.py'rawr   z/tree/master/zhttps://github.com/z	/archive/z.zip-)r	   schemenetlocendswith
ValueErrorreplacer%   split)rM   parsedsub_directorygithub_path	repo_infobranch
repo_owner	repo_namer+   r+   r,   convert_github_urlo   s   
rd   resource_typec                 C   sF   t jst jr!zt| | d |dkd W dS  ty   Y dS w dS dS )z1Update the download count of a dataset or metric.rR   dataset)filenamerf   N)r   HF_EVALUATE_OFFLINEHF_UPDATE_DOWNLOAD_COUNTSr   	Exception)r   re   r+   r+   r,   increase_load_count   s   rk   rI   c           	         s  g }t | dd}||  W d   n1 sw   Y  td|  d g }d}|D ]}td|}t|dkr?| }|rBq.tjd	|tj	d
  du r]tjd|tj	d
  du r]q. 
drt fdd|D rnq. 
dr 
d}t|\}}|d 
d||f q. 
dr|d 
d 
ddf q. 
dr 
d}|d 
d|df q.|d 
d 
ddf q.|S )a  Find whether we should import or clone additional files for a given processing script.
        And list the import.

    We allow:
    - library dependencies,
    - local dependencies and
    - external dependencies whose url is specified with a comment starting from "# From:' followed by the raw url to a file, an archive or a github repository.
        external dependencies will be downloaded (and extracted if needed in the dataset folder).
        We also add an `__init__.py` to each sub-folder of a downloaded folder so the user can import from them in the script.

    Note that only direct import in the dataset processing script will be handled
    We don't recursively explore the additional import to download further files.

    Example::

        import tensorflow
        import .c4_utils
        import .clicr.dataset-code.build_json_dataset  # From: https://raw.githubusercontent.com/clips/clicr/master/dataset-code/build_json_dataset
    r?   r@   Nz	Checking z for additional imports.Fz[\s\S]*?"""[\s\S]*?r   z=^import\s+(\.?)([^\s\.]+)[^#\r\n]*(?:#\s+From:\s+)?([^\r\n]*))flagszQ^from\s+(\.?)([^\s\.]+)(?:[^\s]*)\s+import\s+[^#\r\n]*(?:#\s+From:\s+)?([^\r\n]*)c                 3   s"    | ]}|d    dkV  qdS )r      N)group).0impmatchr+   r,   	<genexpr>   s     zget_imports.<locals>.<genexpr>   externalrm   internallibrary)r)   rC   rG   loggerdebugrefindalllenrr   	MULTILINErn   anyrd   rF   )	rI   rJ   rK   importsis_in_docstringlinedocstr_start_matchrM   r^   r+   rq   r,   get_imports   sJ   





 r   	base_pathr   download_configc                 C   sb  g }g }|  }|jdu rd|_|D ]V\}}}}	|dkr$|||f q|| kr9td|  d| d| d| d	|d	krEt||d
 }
n|dkrL|}
ntdt|
|d}|	duratj||	}|||f qt	 }|D ]#\}}zt
|}W qn ty   |dkrdn|}|||f Y qnw |rtd|  ddd |D  dddd |D  d|S )a  
    Download additional module for a module <name>.py at URL (or local path) <base_path>/<name>.py
    The imports must have been parsed first using ``get_imports``.

    If some modules need to be installed with pip, an error is raised showing how to install them.
    This function return the list of downloaded modules as tuples (import_name, module_file_path).

    The downloaded modules can then be moved into an importable directory with ``_copy_script_and_other_resources_in_importable_dir``.
    NzDownloading extra modulesrw   zError in the z script, importing relative z module but z: is the name of the script. Please change relative import zl to another name and add a '# From: URL_OR_PATH' comment pointing to the original relative import file path.rv   rR   ru   zWrong import_typer   sklearnzscikit-learnzTo be able to use z0, you need to install the following dependenciesc                 S   s   g | ]\}}|qS r+   r+   ro   lib_namelib_pathr+   r+   r,   
<listcomp>      z0_download_additional_modules.<locals>.<listcomp>z using 'pip install  c                 S   s   g | ]\}}|qS r+   r+   r   r+   r+   r,   r     r   z' for instance')copydownload_descrF   rZ   r   r   r$   r%   r&   setr/   r0   ImportErroradd)r   r   r   r   local_importslibrary_importsimport_typeimport_nameimport_pathr^   url_or_filenamelocal_import_pathneeds_to_be_installedlibrary_import_namelibrary_import_pathlibr+   r+   r,   _download_additional_modules   s\   
r   importable_directory_pathsubdirectory_nameoriginal_local_pathr   additional_filesdownload_modec              	   C   sd  t j||}t j|| d }|d }	t|	 |tjkr*t j|r*t| t j	|dd t j|d}
t j|
sSt
|
d W d   n1 sNw   Y  t j	|dd t j|d}
t j|
s|t
|
d W d   n1 sww   Y  t j|st|| |dd d	 }t j|s||d
}t
|ddd}t|| W d   n1 sw   Y  |D ]@\}}t j|rt j||d }t j|st|| qt j|rt j||}t j|st|| qtd| |D ]\}}t j||}t j|rt||st|| q|W  d   S 1 s+w   Y  dS )a  Copy a script and its required imports to an importable directory

    Args:
        name (str): name of the resource to load
        importable_directory_path (str): path to the loadable folder in the dynamic modules directory
        subdirectory_name (str): name of the subdirectory in importable_directory_path in which to place the script
        original_local_path (str): local path to the resource script
        local_imports (List[Tuple[str, str]]): list of (destination_filename, import_file_to_copy)
        additional_files (List[Tuple[str, str]]): list of (destination_filename, additional_file_to_copy)
        download_mode (Optional[DownloadMode]): download mode

    Return:
        importable_local_file: path to an importable module with importlib.import_module
    rR   z.lockTr    r"   r#   Nr   z.json)zoriginal file pathzlocal file pathr?   r@   zError with local import at )r$   r%   r&   r   r   FORCE_REDOWNLOADr(   shutilrmtreer'   r)   copyfiler\   jsondumpisfilerB   copytreeOSErrorfilecmpcmp)r   r   r   r   r   r   r   importable_subdirectoryimportable_local_file	lock_pathinit_file_path	meta_pathmeta	meta_filer   r   full_path_local_import	file_nameoriginal_pathdestination_additional_pathr+   r+   r,   2_copy_script_and_other_resources_in_importable_dir  s^   

&r   
local_pathr*   module_namespacec              	   C   s   t j|||dd}t|jddd t|jd jdd t| gdd |D  }t	|
dd	 ||| |||d
}	td|	  dt j|||dd||
dd	 g}
|
|fS )NrU   --T)parentsr!   r"   r    c                 S   s   g | ]}|d  qS )r   r+   )ro   locr+   r+   r,   r   t  r   z+_create_importable_file.<locals>.<listcomp>)r   r   r   r   r   r   r   z#Created importable dataset file at .)r$   r%   r&   r[   r   mkdirparenttouchrL   r   r\   rx   ry   basename)r   r   r   r*   r   r   r   r   hashr   r8   r+   r+   r,   _create_importable_fileh  s$   		&r   c                   @   s   e Zd ZU eed< eed< dS )ImportableModuler8   r   N)__name__
__module____qualname__str__annotations__r+   r+   r+   r,   r     s   
 r   c                   @   s   e Zd ZdefddZdS )_EvaluationModuleFactoryr.   c                 C   s   t N)NotImplementedError)selfr+   r+   r,   
get_module  s   z#_EvaluationModuleFactory.get_moduleN)r   r   r   r   r   r+   r+   r+   r,   r     s    r   c                   @   sR   e Zd ZdZ				ddededee dee dee f
d	d
Zde	fddZ
dS )LocalEvaluationModuleFactoryzRGet the module of a local metric. The metric script is loaded from a local script.metricsNr%   module_typer   r   r*   c                 C   s4   || _ || _t|j| _|pt | _|| _|| _d S r   )	r%   r   r   stemr   r
   r   r   r*   )r   r%   r   r   r   r*   r+   r+   r,   __init__  s   
z%LocalEvaluationModuleFactory.__init__r.   c              	   C   sp   t | j}t| jtt| jj|| jd}| jr| jnt	 }t
| j|g || j| j| jd\}}t  t||S )Nr   r   r   r   r   r   r   r*   r   r   r   )r   r%   r   r   r   r   r   r   r*   r-   r   r   r   r/   invalidate_cachesr   )r   r   r   r*   r8   r   r+   r+   r,   r     s&   



z'LocalEvaluationModuleFactory.get_module)r   NNN)r   r   r   __doc__r   r   r
   r   r   r   r   r+   r+   r+   r,   r     s$    
r   c                   @   sr   e Zd ZdZ					ddededeeeef  dee dee	 d	ee fd
dZ
defddZdefddZdS )HubEvaluationModuleFactoryz?Get the module of a metric from a metric repository on the Hub.r   Nr   r   revisionr   r   r*   c                 C   sN   || _ || _|| _|pt | _|| _|| _| j ddksJ t|dd d S )NrU   r   metric)re   )	r   r   r   r
   r   r   r*   countrk   )r   r   r   r   r   r   r*   r+   r+   r,   r     s   	z#HubEvaluationModuleFactory.__init__r.   c                 C   sF   t | j| jdd d |d}| j }|jd u rd|_t||dS )NrU   r   rR   r%   r   r   zDownloading builder scriptr   )r   r   r\   r   r   r   r   )r   r   rI   r   r+   r+   r,   download_loading_script  s
    

z2HubEvaluationModuleFactory.download_loading_scriptc           	   
   C   s   | j ptdt}td|rd| }z| |}W n) tyC } z| j d u r7tdtdkr7d}| |}n|W Y d }~nd }~ww t|}t	| j
t| j
d|d|| jd}| jr^| jnt }t||g || j| j
| jd\}}t  t||S )	NHF_SCRIPTS_VERSIONz\d*\.\d*\.\d*vmain r   r   r   )r   r$   getenvr   rz   rr   r   FileNotFoundErrorr   r   r   r   r   r*   r-   r   r   r   r/   r   r   )	r   r   r   errr   r   r*   r8   r   r+   r+   r,   r     s@   


z%HubEvaluationModuleFactory.get_module)r   NNNN)r   r   r   r   r   r   r   r   r
   r   r   r   r   r   r+   r+   r+   r,   r     s,    
r   c                   @   s>   e Zd ZdZ		ddededee fddZd	efd
dZdS )CachedEvaluationModuleFactoryz
    Get the module of a metric that has been loaded once already and cached.
    The script that is loaded from the cache is the most recent one with a matching name.
    r   Nr   r   r*   c                 C   s*   || _ || _|| _| j ddksJ d S )NrU   r   )r   r   r*   r   )r   r   r   r*   r+   r+   r,   r     s   z&CachedEvaluationModuleFactory.__init__r.   c              	      s   j rj nt }tj|jj tj r#dd t D nd }|s2t	dj d|  fdd}t
||dd }td	tj | d
t|| dj d dtj|jj|jdd g}t  t||S )Nc                 S   s   g | ]
}t |d kr|qS )@   )r|   )ro   hr+   r+   r,   r     s    z<CachedEvaluationModuleFactory.get_module.<locals>.<listcomp>zMetric z is not cached in c                    s&   t  |  jdd d   jS )Nr   r   rR   )r   r   r\   statst_mtime)module_hashr   r   r+   r,   _get_modification_time  s   $zHCachedEvaluationModuleFactory.get_module.<locals>._get_modification_time)keyr   z3Using the latest cached version of the module from z (last modified on z() since it couldn't be found locally at z&, or remotely on the Hugging Face Hub.r   r   )r*   r-   r$   r%   r&   r   r   rB   listdirr   sortedrx   warningtimectimer   r\   r/   r   r   )r   r*   hashesr   r   r8   r+   r   r,   r     s,   
$
z(CachedEvaluationModuleFactory.get_module)r   N)	r   r   r   r   r   r   r   r   r   r+   r+   r+   r,   r     s    
r   r%   r   r   force_local_pathc                 K   s  |du rt di |}t|ptj}d|_d|_ttdd | tj	d
dd }|ds4|d }tj| |}	| |rXtj| rOt| ||d S td	t|  tj|	rgt|	||d S t| r]| dd
kr]|s]zX| ddkr|du rdD ] }
ztd|
 d|  ||||d W   W S  ty   Y qw ttd| d|  ||||d W S | dd
krt| ||||d W S W dS  ty\ } z| ddkrdD ],}
ztd|
 d|  |d W   W  Y d}~S  ty } zW Y d}~qd}~ww n/| dd
kr?zt| dd|d W W  Y d}~S  ty> } zW Y d}~nd}~ww t|ttfsJ|dtdt|	 d|  ddd}~ww tdt|	 d)a  
    Download/extract/cache a metric module.

    Metrics codes are cached inside the the dynamic modules cache to allow easy import (avoid ugly sys.path tweaks).

    Args:

        path (str): Path or name of the metric script.

            - if ``path`` is a local metric script or a directory containing a local metric script (if the script has the same name as the directory):
              -> load the module from the metric script
              e.g. ``'./metrics/accuracy'`` or ``'./metrics/accuracy/accuracy.py'``.
            - if ``path`` is a metric on the Hugging Face Hub (ex: `glue`, `squad`)
              -> load the module from the metric script in the github repository at huggingface/datasets
              e.g. ``'accuracy'`` or ``'rouge'``.

        revision (Optional ``Union[str, datasets.Version]``):
            If specified, the module will be loaded from the datasets repository at this version.
            By default:
            - it is set to the local version of the lib.
            - it will also try to load it from the master branch if it's not available at the local version of the lib.
            Specifying a version that is different from your local version of the lib might cause compatibility issues.
        download_config (:class:`DownloadConfig`, optional): Specific download configuration parameters.
        download_mode (:class:`DownloadMode`, default ``REUSE_DATASET_IF_EXISTS``): Download/generate mode.
        force_local_path (Optional str): Optional path to a local path to download and prepare the script to.
            Used to inspect or modify the script folder.
        dynamic_modules_path (Optional str, defaults to HF_MODULES_CACHE / "datasets_modules", i.e. ~/.cache/huggingface/modules/datasets_modules):
            Optional path to the directory in which the dynamic modules are saved. It must have been initialized with :obj:`init_dynamic_modules`.
            By default the datasets and metrics are stored inside the `datasets_modules` module.
        download_kwargs: optional attributes for DownloadConfig() which will override the attributes in download_config if supplied.

    Returns:
        ImportableModule
    NTc                 S   s   | S r   r+   )xr+   r+   r,   <lambda>a  s    z+evaluation_module_factory.<locals>.<lambda>rU   r   rR   )r   r*   z!Couldn't find a metric script at r   r   )r   
comparisonmeasurementz	evaluate-)r   r   r   r*   r   )r*   z!Couldn't find a module script at z
. Module 'z/' doesn't exist on the Hugging Face Hub either.r   r+   )r
   r   REUSE_DATASET_IF_EXISTSextract_compressed_fileforce_extractrD   filterr[   r$   sepr\   rY   r%   r&   r   r   r   r   r   r   r   r   ConnectionErrorrj   r   r3   )r%   r   r   r   r   r   r*   download_kwargsrg   combined_pathcurrent_typee1e2r+   r+   r,   evaluation_module_factory/  s   ,&

 
r  Fconfig_name
process_idnum_process	cache_direxperiment_idkeep_in_memoryc              
   K   s   t |	pt j}	t| ||
||	d}t|j}|d	|||||||jd|}|r;||jkr;td| d|  d|j d|j|d |S )
a	  Load a [`~evaluate.EvaluationModule`].

    Args:

        path (`str`):
            Path to the evaluation processing script with the evaluation builder. Can be either:
                - a local path to processing script or the directory containing the script (if the script has the same name as the directory),
                    e.g. `'./metrics/rouge'` or `'./metrics/rouge/rouge.py'`
                - a evaluation module identifier on the HuggingFace evaluate repo e.g. `'rouge'` or `'bleu'` that are in either `'metrics/'`,
                    `'comparisons/'`, or `'measurements/'` depending on the provided `module_type`
        config_name (`str`, *optional*):
            Selecting a configuration for the metric (e.g. the GLUE metric has a configuration for each subset).
        module_type (`str`, default `'metric'`):
            Type of evaluation module, can be one of `'metric'`, `'comparison'`, or `'measurement'`.
        process_id (`int`, *optional*):
            For distributed evaluation: id of the process.
        num_process (`int`, *optional*):
            For distributed evaluation: total number of processes.
        cache_dir (`str`, *optional*):
            Path to store the temporary predictions and references (default to `~/.cache/huggingface/evaluate/`).
        experiment_id (`str`):
            A specific experiment id. This is used if several distributed evaluations share the same file system.
            This is useful to compute metrics in distributed setups (in particular non-additive metrics like F1).
        keep_in_memory (`bool`):
            Whether to store the temporary results in memory (defaults to `False`).
        download_config ([`~evaluate.DownloadConfig`], *optional*):
            Specific download configuration parameters.
        download_mode ([`DownloadMode`], defaults to `REUSE_DATASET_IF_EXISTS`):
            Download/generate mode.
        revision (`Union[str, evaluate.Version]`, *optional*):
            If specified, the module will be loaded from the datasets repository
            at this version. By default it is set to the local version of the lib. Specifying a version that is different from
            your local version of the lib might cause compatibility issues.

    Returns:
        [`evaluate.EvaluationModule`]

    Example:

        ```py
        >>> from evaluate import load
        >>> accuracy = load("accuracy")
        ```
    )r   r   r   r   )r	  r
  r  r  r  r  r   zNo module of module type 'z' not found for 'zD' locally, or on the Hugging Face Hub. Found module of module type 'z
' instead.r   Nr+   )	r   r   r  r=   r8   r   r   	TypeErrordownload_and_prepare)r%   r	  r   r
  r  r  r  r  r   r   r   init_kwargsevaluation_moduleevaluation_clsevaluation_instancer+   r+   r,   load  s,   :

r  )NNNNNN)
NNr   r   NNFNNN)Ir   r   r/   r6   r   r$   rz   r   r   dataclassesr   pathlibr   typingr   r   r   r   r   urllib.parser	   datasetsr
   r   datasets.builderr   datasets.packaged_modulesr   r   datasets.utils.filelockr   datasets.utils.versionr   r   r   r   r9   r   utils.file_utilsr   r   r   r   r   r   r   utils.loggingr   r   rx   rD   keysALL_ALLOWED_EXTENSIONSMODULE_NAME_FOR_DYNAMIC_MODULESr   r-   r=   rL   rd   rk   r   r   r   r   r   r   r   r   r   r  intboolr  r+   r+   r+   r,   <module>   s"  $	
"	I
=
W

+D2	
 	
