o
    Ni,                     @   sj  d Z ddlZddlmZ ddlZddlmZmZmZm	Z	m
Z
 ddlZddlm  mZ ddlZdZdZdZdgZeeeee
e f f Zeee	eeee	eeeef f f f f ZG d	d
 d
eZdedeeeef fddZdee defddZdededefddZej j!" defddZ#G dd deZ$ej j!" dd Z%dd Z&d"ddZ'd"d d!Z(dS )#zyUtil to generate the dataset documentation content.

Used by tensorflow_datasets/scripts/documentation/build_catalog.py

    N)futures)DictListTupleUnionSet   2   zFhttps://github.com/tensorflow/datasets/tree/master/tensorflow_datasetswmt_translatec                   @   sN   e Zd ZdZejdZdZdd Z	dd Z
dejjd	efd
dZdd ZdS )VisualizationDocUtilz?Small util which generate the path/urls for the visualizations.zvisualization/z7https://storage.googleapis.com/tfds-data/visualization/c                 C   s   |j jddd S )N/-z.png)info	full_namereplaceselfbuilder r   o/home/ubuntu/.local/lib/python3.10/site-packages/tensorflow_datasets/scripts/documentation/document_datasets.py	_get_name9   s   zVisualizationDocUtil._get_namec                 C   s   | j | | S N)BASE_URLr   r   r   r   r   get_url<   s   zVisualizationDocUtil.get_urlr   returnc                 C   s   |  |}d| dS )zReturns the <img> html tag.z
<img src="z$" alt="Visualization" width="500px">)r   )r   r   urlr   r   r   get_html_tag?   s   
z!VisualizationDocUtil.get_html_tagc                 C   s$   t j| j| |}tjj|S r   )	ospathjoin	BASE_PATHr   tfiogfileexists)r   r   filepathr   r   r   has_visualizationD   s   z&VisualizationDocUtil.has_visualizationN)__name__
__module____qualname____doc__tfdscoregcs_pathr    r   r   r   DatasetBuilderstrr   r&   r   r   r   r   r   3   s    r   r   r   c                 C   s^   t jj| std|  d| d^}}}t|dks J |r(tt|nd}|||fS )z=Extracts the `(ds name, config, version)` from the full_name.zParsing builder name string zd failed.The builder name string must be of the following format:`dataset_name[/config_name]/version`r       )	r+   r,   
registeredis_full_name
ValueErrorsplitlennextiter)r   ds_nameoptional_configversionconfigr   r   r   _split_full_nameI   s   

r=   
full_namesc                 C   s<   t dd }| D ]}t|\}}}|| | | q	|S )z5Creates the dict `d['dataset']['config']['version']`.c                   S   
   t tS r   )collectionsdefaultdictsetr   r   r   r   <lambda>Y      
 z%_full_names_to_dict.<locals>.<lambda>)r@   rA   r=   add)r>   full_names_dictr   r9   r<   r;   r   r   r   _full_names_to_dictV   s   rG   registered_dsstable_version_dsc                 C   s   t dd }| D ]A}||v rF| | D ]1}||| v r>| | | D ]}||| | v r4d|| | |< q!d|| | |< q!qd|| |< qq	d||< q	|S )z>Computes the nightly dict from the registered and stable dict.c                   S      t dd S )Nc                   S   r?   r   )r@   rA   boolr   r   r   r   rC   g   rD   z7_build_nightly_dict.<locals>.<lambda>.<locals>.<lambda>r@   rA   r   r   r   r   rC   f       z%_build_nightly_dict.<locals>.<lambda>FTrL   )rH   rI   
nightly_dsdatasetr<   r;   r   r   r   _build_nightly_dict`   s    
rP   c                  C   sl   t jjd} tjj| d}| 	 }W d   n1 s w   Y  t
t jj }t
|}t||S )z$Loads (and caches) the nightly dict.zstable_versions.txtrN)r+   r,   utilsget_tfds_pathr!   r"   r#   GFileread
splitlinesrG   r2   list_full_namesrP   )version_pathfstable_versionsrH   rI   r   r   r   _load_nightly_dict~   s   

r[   c                   @   s   e Zd ZdZdd Zdeejje	f de
fddZdejjde
fdd	Zdejjd
e	de
fddZdejjde
fddZdZdS )NightlyDocUtilzSmall util to format the doc.c                 C   s   t  | _d S r   )r[   _nightly_dict)r   r   r   r   __init__   s   zNightlyDocUtil.__init__r   r   c                 C   s(   t |tjjr|j}n|}| j| du S )z%Returns `True` if the builder is new.T)
isinstancer+   r,   r.   namer]   )r   r   builder_namer   r   r   is_builder_nightly   s   z!NightlyDocUtil.is_builder_nightlyc                 C   s2   t |jj\}}}| |rdS | j| | du S )z$Returns `True` if the config is new.FT)r=   r   r   rb   r]   )r   r   r9   r<   _r   r   r   is_config_nightly   s   
z NightlyDocUtil.is_config_nightlyr;   c                 C   s@   t |jj\}}}| |s| |rdS | j| | | du S )z%Returns `True` if the version is new.FT)r=   r   r   rb   rd   r]   )r   r   r;   r9   r<   rc   r   r   r   is_version_nightly   s   z!NightlyDocUtil.is_version_nightlyc                    s    fdd  | j |j S )z9Returns True if any of the builder/config/version is new.c                    s@   t | tr| S t | trt fdd|  D S td|  )Nc                 3   s    | ]} |V  qd S r   r   ).0xreducer   r   	<genexpr>   s    z=NightlyDocUtil.has_nightly.<locals>.reduce.<locals>.<genexpr>zInvalid nightly_dict value: )r_   rK   dictanyvaluesAssertionError)valuerh   r   r   ri      s
   

z*NightlyDocUtil.has_nightly.<locals>.reduce)r]   r`   r   r   rh   r   has_nightly   s   zNightlyDocUtil.has_nightlyzb<span class="material-icons" title="Available only in the tfds-nightly package">nights_stay</span>N)r'   r(   r)   r*   r^   r   r+   r,   r.   r/   rK   rb   rd   re   rp   iconr   r   r   r   r\      s&    

r\   c                 C   s^   t jjd|  }tjj|d}| }W d   n1 s w   Y  t	j
j|ddgdS )zReturns mako.lookup.Template object to use to render documentation.

  Args:
    tmpl_name: string, name of template to load.

  Returns:
    mako 'Template' instance that can be rendered.
  z*scripts/documentation/templates/%s.mako.mdrQ   Nr/   trim)default_filters)r+   r,   rR   rS   r!   r"   r#   rT   rU   makolookupTemplate)	tmpl_name	tmpl_pathtmpl_ftmpl_contentr   r   r   get_mako_template   s   

r{   c           	         s   t d j   fdd}g } jr1tjtd}t|| j}W d   n1 s,w   Y  t	d}t
 }|j ||t d }t	d}|j ||d	 }|d
 | }|S )z9Doc string for a single builder, with or without configs.zDocument builder %s...c                    s   t j j| dS )Nr<   )r+   r   r`   r|   r   r   r   rC      s    z)document_single_builder.<locals>.<lambda>max_workersNrO   )r   config_buildersvisu_doc_utilnightly_doc_util
schema_org)r   r   r   
)printr`   builder_configsr   ThreadPoolExecutorWORKER_COUNT_CONFIGSlistmapBUILDER_CONFIGSr{   r   render_unicoder\   strip)	r   get_config_builderr   tpooltmplr   out_strschema_org_tmplschema_org_out_strr   r}   r   document_single_builder   s:   r   c           	      C   s   t dd }| sdd t D } tdt|   tjtd}|	tj
| }W d   n1 s2w   Y  td |D ] }|jj}|d	}d
|v rMq=|}|D ]}|| }qQ|| q=|d }|S )z5Get all builders organized by module in nested dicts.c                   S   rJ   )Nc                   S   r?   r   )r@   rA   r   r   r   r   r   rC      rD   z?make_module_to_builder_dict.<locals>.<lambda>.<locals>.<lambda>rL   r   r   r   r   rC      rM   z-make_module_to_builder_dict.<locals>.<lambda>c                 S   s   g | ]}|t vr|qS r   )BUILDER_BLACKLIST)rf   r`   r   r   r   
<listcomp>   s    z/make_module_to_builder_dict.<locals>.<listcomp>z0Creating the vanilla builders for %s datasets...r~   Nz>Vanilla builders built, constructing module_to_builder dict....testingtensorflow_datasets)r@   rA   r+   list_buildersr   r6   r   r   WORKER_COUNT_DATASETSr   r   	__class__r(   r5   append)	datasetsmodule_to_builderr   buildersr   module_namemodulescurrent_mod_ctrmodr   r   r   make_module_to_builder_dict   s.   

r   c              	   C   s   t d t| }tt| }tt}|D ]@}tj	|| }t|dd d}t
d}tjtd}|t|}W d   n1 sDw   Y  dd	 t||D }|||< qt
d
}	|	  }
|
|gS )a  Create dataset documentation string for given datasets.

  Args:
    datasets: list of datasets for which to create documentation.
              If None, then all available datasets will be used.

  Returns:
    - overview document
    - a dictionary of sections. Each dataset in a section is represented by a
    tuple (dataset_name, is_manual_dataset, string describing the datasets
    (in the MarkDown format))
  z"Retrieving the list of builders...c                 S   s   | j S r   )r`   )br   r   r   rC   %  s    z"dataset_docs_str.<locals>.<lambda>)keyrO   r~   Nc                 S   s   g | ]\}}|j |j|fqS r   )r`   MANUAL_DOWNLOAD_INSTRUCTIONS)rf   r   builder_docr   r   r   r   )  s
    z$dataset_docs_str.<locals>.<listcomp>catalog_overview)r   r   sortedr   keysr@   rA   r!   nestflattenr{   r   r   r   r   r   zipr   lstrip)r   r   sectionssection_docssectionr   unused_r   builder_docsr   r   r   r   r   dataset_docs_str  s$   

r   r   ))r*   r@   
concurrentr   r   typingr   r   r   r   r   mako.lookuprt   tensorflow.compat.v2compatv2r!   r   r+   r   r   r   r   r/   FullNamesDictrK   NightlyDictobjectr   r=   rG   rP   r,   rR   memoizer[   r\   r{   r   r   r   r   r   r   r   <module>   s@   ,



4

!