o
    xi{+                     @   s   d Z ddlZddlZddlZddlZddlmZ ddlZ	ddl
mZ ddlZddlmZ ddlmZ ddlmZ dd	 Zd
d Zdd Zdd Zdd Zdd ZdS )a:  Prodigy integration for W&B.

User can upload Prodigy annotated datasets directly
from the local database to W&B in Tables format.

Example usage:

```python
import wandb
from wandb.integration.prodigy import upload_dataset

run = wandb.init(project="prodigy")
upload_dataset("name_of_dataset")
wandb.finish()
```
    N)deepcopy)Image)util)test_missing)	telemetryc                 C   sN   t jddd}t jddd t| dr%|jj| dddd	d
}t|}|S dS )zCreate a named entity visualization.

    Taken from https://github.com/wandb/wandb/blob/main/wandb/plots/named_entity.py.
    spacyzKpart_of_speech requires the spacy library, install with `pip install spacy`requireden_core_web_mdhpart_of_speech requires `en_core_web_md` library, install with `python -m spacy download en_core_web_md`docsentTF)stylepageminifyjupyterN)r   
get_moduler   displacyrenderwandbHtml)r   r   html
wandb_html r   U/home/ubuntu/.local/lib/python3.10/site-packages/wandb/integration/prodigy/prodigy.pynamed_entity!   s   


r   c                 C   sT   t | }| D ]\}}t|tjjrt||i |||< qt || ||< q|S )z@Return a new dictionary by merging two dictionaries recursively.)r   items
isinstancecollectionsabcMappingmergeget)dict1dict2resultkeyvaluer   r   r   r"   9   s   r"   c                 C   s  t | D ]\}}| D ]\}}||vrst|trWt|dkr-t|d tr-t|||< qt|dkrCt|d ttfsCt|||< q|| i ||< t||| |||< qt|trli ||< t|g|| |||< qt|||< q|| }t|trt|dkrt|d tr|durt|||< qt|dkrt|d ttfs|durt|||< q|| i ||< t||| |||< t	|| |||< qt|tri ||< t|g|| |||< t	|| |||< q|durt|||< qq|S )z7Get a schema of the dataset's structure and data types.r   N)
	enumerater   r   listlentypedictappend
get_schemar"   )list_data_dictstructarray_dict_types_iitemkv
cur_structr   r   r   r/   F   sT   



7r/   c                 C   s  |  D ]\}}|| vr6t|tr"||vr"i | |< t| | || qt|tr0||v r0d| |< q| | |< qt| | trtt| | dkoMt| | d t oct| | dkobt| | d ttf  }|rs| | D ]}t||| qjqt| | trt| | || qdS )zStandardize all rows/entries in dataset to fit the schema.

    Will look for missing values and fill it in so all rows have
    the same items and structure.
    Nr   )r   r   r-   standardizer*   r+   )r4   	structurer2   r5   r6   	conditionsub_itemr   r   r   r8      s,   
"&r8   c              	   C   s  t | }t|j}d|jv rd|jv r|d d|v r"|d tj|d}|jdd}tj	d	d
d}|j
dgd}t|D ]\}}d|v rd|v rd|d< ||d }	g }
d|v r|d dur|d D ] }d|v rd|v rd|v r|	|d |d |d }|
| qg|
|	_t|	d|d< d|v r8d|d< d|v r8|d dur8tj|d jdv }d|d v od|d v }|rzttj|d }t||d< W nc tjjy   td|d  d d|d< Y nJw |r/|d dd }zt|}t|}t|}t||d< W n$ tjj y.   td|d  d d|d< Y n
w t|d |d< t|! }|j"|  q@|S )zCreate a W&B Table.

    - Create/decode images from URL/Base64
    - Uses spacy to translate NER span data to visualizations.
    spanstextspans_visualimageimage_visual)columnsrecords)orientr
   r   r   ner)disableNstartendlabelr   )httphttpszdata:z;base64z
Image URL z is invalid.zbase64,   zBase64 string )#pd	DataFramer*   rA   r.   r   Tableto_dictr   r   loadr)   	char_spanentsr   urllibparseurlparseschemer   openrequesturlopenerrorURLErrortermwarnsplitbase64	b64decodeioBytesIObinasciiErrorvaluesadd_data)datatable_dfrA   
main_tablematrixr
   nlpr3   documentdocrR   spancharspanisurlisbase64imimgb64msgbufvalues_listr   r   r   create_table   st   









rv   c           
      C   s   t jdu r	tdtjt jd}d|j_W d   n1 sw   Y  tjddd}|	 }|
| }g }t|i |}t|D ]\}}t|| || q@t|}	t | |	i t d|  d	 dS )
zUpload dataset from local database to Weights & Biases.

    Args:
        dataset_name: The name of the dataset in the Prodigy database.
    Nz2You must call wandb.init() before upload_dataset())runTzprodigy.components.dbzY`prodigy` library is required but not installed. Please see https://prodi.gy/docs/installr   zProdigy dataset `z` uploaded.)r   rw   
ValueErrorwb_telemetrycontextfeatureprodigyr   r   connectget_datasetr/   r)   r8   rv   logtermlog)
dataset_nametel
prodigy_dbdatabaserf   r2   schemai_dtabler   r   r   upload_dataset   s$   


r   )__doc__r^   collections.abcr   r`   rS   copyr   pandasrL   PILr   r   r   wandb.plot.utilsr   wandb.sdk.libr   ry   r   r"   r/   r8   rv   r   r   r   r   r   <module>   s$    @&T