o
    wiO                     @  s  d Z ddlmZ ddlZddlZddlZddlZddlZddlZddl	Z	ddl
mZmZ ddlmZmZ ddlmZ ddlmZ ddlmZ dd	lmZ ddlZddlZddlZddlZddlm  mZ  ddl!Z!dd
l"m#Z# ddlm$Z$ ddl%Z%ddl&m'Z'm(Z( ddl)m*Z* ddl+m,Z, ddl-m.Z.m/Z/ ddl0m1Z1 ddl2m3Z3 ddl4m5Z5m6Z6 ddl7m8Z8m9Z9 e%j:Z:e%j;Z;e%j<j=j>Z>dZ?dZ@dZAdZBdZCdZDdZEdZFdZGdZHeIeJZKeKLejM eNdrdd lOmPZP eKQePd!d!d" neR ZSeSLejM eTd#ZUeSVeU eKQeS eG d$d% d%ZWG d&d' d'ZXG d(d) d)ZYdZd.d/ZZd[d5d6Z[d\d]d9d:Z\d;d< Z]d^d>d?Z^eG d@dA dAZ_eG dBdC dCZ`d_dFdGZad`dadHdIZbdbdKdLZcdcdOdPZdd`dddQdRZededSdTZfdfdXdYZgdS )gzTooling for the W&B Importer.    )annotationsN)IterableIterator)	dataclassfield)datetime)Path)Any)patch)gql)Report)ArtifactCollectionRun)File)	json_util)coalesceremove_keys_with_none_values   )
validation)internal)PathStrPolicy)	Namespacefor_eachzartifact_errors.jsonlzartifact_successes.jsonlzrun_errors.jsonlzrun_successes.jsonl"__ART_SEQUENCE_DUMMY_PLACEHOLDER____RUN_DUMMY_PLACEHOLDER____importer_temp____temp__z./artifacts/srcz./artifacts/dst"WANDB_IMPORTER_ENABLE_RICH_LOGGING)RichHandlerT)rich_tracebackstracebacks_show_localsz)%(asctime)s - %(levelname)s - %(message)sc                   @  sf   e Zd ZU ded< ded< ded< ded< ded< dd
dZdddZedddZedddZ	dS )ArtifactSequencezIterable[wandb.Artifact]	artifactsstrentityprojecttype_namereturnr   c                 C  s
   t | jS N)iterr#   self r.   N/home/ubuntu/.local/lib/python3.10/site-packages/wandb/apis/importers/wandb.py__iter__R   s   
zArtifactSequence.__iter__c                 C  s   d| j  dS )NzArtifactSequence()
identifierr,   r.   r.   r/   __repr__U      zArtifactSequence.__repr__c                 C  s   d | j| j| j| jgS N/)joinr%   r&   r'   r(   r,   r.   r.   r/   r3   X   s   zArtifactSequence.identifier
collectionr   c                 C  s0   |  }t|dd d}t||j|j|j|jS )Nc                 S     t | jdS Nvintversionlstripar.   r.   r/   <lambda>_       z2ArtifactSequence.from_collection.<locals>.<lambda>key)r#   sortedr"   r%   r&   typer(   )clsr9   artsr.   r.   r/   from_collection\   s   z ArtifactSequence.from_collectionN)r)   r   r)   r$   )r9   r   )
__name__
__module____qualname____annotations__r0   r4   propertyr3   classmethodrK   r.   r.   r.   r/   r"   J   s   
 

r"   c                   @  s~  e Zd Zdbd
dZdcddZdcddZdcddZdcddZddddZdeddZ	dfddZ
dgddZdgd d!Zdcd"d#Zdgd$d%Zdhd'd(Zdid*d+Zdid,d-Zdgd.d/Zdgd0d1Zdgd2d3Zdgd4d5Zdgd6d7Zdgd8d9Zdgd:d;Zdgd<d=Zdjd?d@ZdjdAdBZdjdCdDZdjdEdFZdgdGdHZdgdIdJZdkdLdMZ dldOdPZ!dddQdRZ"dddSdTZ#dmdVdWZ$dndYdZZ%dod\d]Z&dpd_d`Z'daS )qWandbRunrunr   src_base_urlr$   src_api_keydst_base_urldst_api_keyr)   Nonec                C  sJ   || _ tj|d|id| _tj|d|id| _d | _d | _d | _d | _d S )Nbase_urlapi_key	overrides)	rT   wandbApiapidst_api_files
_artifacts_used_artifacts_parquet_history_paths)r-   rT   rU   rV   rW   rX   r.   r.   r/   __init__j   s   	
zWandbRun.__init__c                 C  s(   t j|  |  |  }d| dS )Nz	WandbRun(r1   )ospathr8   r%   r&   run_idr-   sr.   r.   r/   r4      s   zWandbRun.__repr__c                 C     | j jS r*   )rT   idr,   r.   r.   r/   ri         zWandbRun.run_idc                 C  rl   r*   )rT   r%   r,   r.   r.   r/   r%      rn   zWandbRun.entityc                 C  rl   r*   )rT   r&   r,   r.   r.   r/   r&      rn   zWandbRun.projectdict[str, Any]c                 C  rl   r*   )rT   configr,   r.   r.   r/   rp      rn   zWandbRun.configdict[str, float]c                 C  s   | j j}|S r*   )rT   summaryrj   r.   r.   r/   rr      s   zWandbRun.summaryIterable[dict[str, float]]c                 c  sZ    | j d u rt|  | _ | j r|  }n
td | j }|D ]	}t|}|V  q!d S )NzHNo parquet files detected; using scan history (this may not be reliable))	re   list_get_parquet_history_paths$_get_rows_from_parquet_history_pathsloggerwarningrT   scan_historyr   )r-   rowsrowr.   r.   r/   metrics   s   


zWandbRun.metrics
str | Nonec                 C  rl   r*   )rT   groupr,   r.   r.   r/   	run_group   rn   zWandbRun.run_groupc                 C  rl   r*   )rT   job_typer,   r.   r.   r/   r      rn   zWandbRun.job_typec                 C  rl   r*   )rT   display_namer,   r.   r.   r/   r      rn   zWandbRun.display_namec                 C  sD   d| j j }d| j jj }||g}| j jpd}d|d | S )NzImported from: zAuthor:  
z
---
)rT   urluserusernamenotesr8   )r-   previous_linkprevious_authorheaderprevious_notesr.   r.   r/   r      s
   zWandbRun.noteslist[str] | Nonec                 C  rl   r*   )rT   tagsr,   r.   r.   r/   r      rn   zWandbRun.tagsIterable[Artifact] | Nonec                 c  H    | j d u rg }| j D ]}t|}|| q|| _ | j E d H  d S r*   )rc   rT   logged_artifacts
_clone_artappend)r-   rc   artrB   r.   r.   r/   r#         
zWandbRun.artifactsc                 c  r   r*   )rd   rT   used_artifactsr   r   )r-   rd   r   rB   r.   r.   r/   r      r   zWandbRun.used_artifactsc                 C     d S r*   r.   r,   r.   r.   r/   
os_version       zWandbRun.os_versionc                 C     |   dS )Npython_metadata_filegetr,   r.   r.   r/   python_version   r5   zWandbRun.python_versionc                 C  r   r*   r.   r,   r.   r.   r/   cuda_version   r   zWandbRun.cuda_versionc                 C  r   r*   r.   r,   r.   r.   r/   program   r   zWandbRun.programc                 C  r   )Nhostr   r,   r.   r.   r/   r      r5   zWandbRun.hostc                 C  r   r*   r.   r,   r.   r.   r/   r      r   zWandbRun.usernamec                 C  r   r*   r.   r,   r.   r.   r/   
executable   r   zWandbRun.executablec                 C  r   r*   r.   r,   r.   r.   r/   	gpus_used   r   zWandbRun.gpus_used
int | Nonec                 C  r   r*   r.   r,   r.   r.   r/   	cpus_used      zWandbRun.cpus_usedc                 C  r   r*   r.   r,   r.   r.   r/   memory_used   r   zWandbRun.memory_usedc                 C  sB   | j jdi d}| j jd}t|| }d u r|S t|S )N_wandbruntime_runtime)rT   rr   r   r   r>   )r-   wandb_runtimebase_runtimetr.   r.   r/   r      s
   zWandbRun.runtimec                 C  s   t | jj d }t|S )Ni  )dtfromisoformatrT   
created_at	timestampr>   )r-   r   r.   r.   r/   
start_time   s   zWandbRun.start_timec                 C  s   |   dd}d| S )NcodePathr   zcode/r   )r-   rh   r.   r.   r/   	code_path   s   
zWandbRun.code_pathc                 C  s   |   di di dS )Nr   valuecli_version)_config_filer   r,   r.   r.   r/   r      s   zWandbRun.cli_version'Iterable[tuple[PathStr, Policy]] | Nonec                 c  s    | j d u r=tj d|   d}g }| j D ]!}|jdkr qd|jv r&q|j|d| j	d}|jdf}|
| q|| _ | j E d H  d S )Nr7   z/filesr   zwandb_manifest.json.deadlistT)exist_okr`   end)rb   r   ROOT_DIRri   rT   filessizer(   downloadr`   r   )r-   	files_dirrb   fresultfile_and_policyr.   r.   r/   r      s   



zWandbRun.filesIterable[str] | Nonec              	   c  sP    |  d}|D ]}t|}| E d H  W d    n1 s w   Y  qd S )Nz^.*output\.log$)_find_all_in_files_regexopen	readlines)r-   	log_filesrh   r   r.   r.   r/   logs  s   

zWandbRun.logsc                 C  sP   |  d }d u ri S t|}t| W  d    S 1 s!w   Y  d S )Nwandb-metadata.json)_find_in_filesr   jsonloadsreadr-   fnamer   r.   r.   r/   r     
   
$zWandbRun._metadata_filec                 C  sP   |  d }d u ri S t|}t|pi W  d    S 1 s!w   Y  d S )Nzconfig.yaml)r   r   yaml	safe_loadr   r.   r.   r/   r     r   zWandbRun._config_fileIterable[dict[str, Any]]c                 c  sd    |    }si V  d S dd |D }dt| }v r'|tdtj}|jddE d H  d S )Nc                 S  s*   g | ]}t |d D ]}t|qqS )z	*.parquet)r   globplread_parquet).0rh   pr.   r.   r/   
<listcomp>+  s
    
zAWandbRun._get_rows_from_parquet_history_paths.<locals>.<listcomp>_stepTnamed)ru   
_merge_dfswith_columnsr   colcastInt64	iter_rows)r-   pathsdfsdfr.   r.   r/   rv   $  s   z-WandbRun._get_rows_from_parquet_history_pathsIterable[str]c                 c  sn    | j d u r/g }| j D ]}|jdkrqt|t d|j d }d u r&q|| q|| _ | j E d H  d S )Nwandb-historyr7   root)re   rT   r   rH   _download_artSRC_ART_PATHr(   r   )r-   r   r   rh   r.   r.   r/   ru   2  s   

z#WandbRun._get_parquet_history_pathsr(   c                 C  s.   |    }r|D ]\}}||v r|  S qd S r*   )r   )r-   r(   r   rh   _r.   r.   r/   r   B  s   zWandbRun._find_in_filesregexc                 c  s6    |    }r|D ]\}}t||r|V  q	d S d S r*   )r   rematch)r-   r   r   rh   r   r.   r.   r/   r   I  s   z!WandbRun._find_all_in_files_regexN)rT   r   rU   r$   rV   r$   rW   r$   rX   r$   r)   rY   rL   )r)   ro   )r)   rq   )r)   rs   )r)   r}   )r)   r   )r)   r   )r)   r   )r)   r   )r)   r   )r)   r   )r)   r   )r(   r$   r)   r}   )r   r$   r)   r   )(rM   rN   rO   rf   r4   ri   r%   r&   rp   rr   r|   r   r   r   r   r   r#   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rv   ru   r   r   r.   r.   r.   r/   rS   i   sL    






































rS   c                   @  s  e Zd ZdZdddddZdd ZddddddZ	ddddZddddd Zdd!d"Z		ddd)d*Z
	+ddd1d2Zdd4d5Zdd6d7Zdd9d:Zdddd;ddBdCZddddFdGZddddIdJZdddKdKdddKdKdKdKdKdKdKdLddYdZZdddd[dd\d]ZddKddd^dd_d`ZdKdKdKddKddaddedfZddgddhdiZddgddkdlZ	+	KdddqdrZdKddsddvdwZdxdy Zdd{d|ZdKd+dKdd}dd~dZdddddddddZddddddZdddddddZdddddddZ dS )WandbImporterzFTransfers runs, reports, and artifact sequences between W&B instances.N)custom_api_kwargsrU   r$   rV   rW   rX   r   dict[str, Any] | Noner)   rY   c                C  st   || _ || _|| _|| _|d u rddi}tjd|d|id|| _tjd|d|id|| _||||d| _d S )NtimeoutiX  rZ   r[   )rU   rV   rW   rX   r.   )	rU   rV   rW   rX   r^   r_   src_apira   run_api_kwargs)r-   rU   rV   rW   rX   r   r.   r.   r/   rf   S  s.   	zWandbImporter.__init__c                 C  s   d| j  d| j dS )Nz<WandbImporter src=z, dst=>)rU   rW   r,   r.   r.   r/   r4   v  s   zWandbImporter.__repr__	namespacerp   rT   rS   r   Namespace | Nonerp   !internal.SendManagerConfig | Nonec             	   C  s   |du rt | | }|du rtjdddddddd}| j| jddd}td| tj	||j
||d |jrztd| g }|j D ]}|jd	krRqJtd
|j t|}|| qJtd| tj	|||j
||d dS dS )zzImport one WandbRun.

        Use `namespace` to specify alternate settings like where the run should be uploaded
        NTmetadatar   mediacodehistoryrr   terminal_outputtruer\   rZ   resumeresumedzImporting run, run=r]   settings_overriderp   z"Collecting history artifacts, run=r   z%Collecting history artifact art.name=z!Importing history artifacts, run=
extra_artsr]   r  rp   )r   r%   r&   r   SendManagerConfigrX   rW   rw   debugsend_runsend_manager_overridesr   rT   r   rH   r(   r   r   )r-   rT   r   rp   r  history_artsr   new_artr.   r.   r/   _import_runy  sT   

zWandbImporter._import_runseqr"   c           	      C  s   t |j|j}t |j|j}| d| d|j }|j}td|d|d|d| z	| j||}W n t	j
tfyN   td|d| Y dS w z|  W dS  t	j
tfyz } ztd|d|d	| W Y d}~dS d}~ww )
zDeletes the equivalent artifact collection in destination.

        Intended to clear the destination when an uploaded artifact does not pass validation.
        r7   zDeleting collection entity=
, project=z, art_type=z, art_name=z"Collection doesn't exist art_type=Nz&Collection can't be deleted, art_type=, e=)r   r%   r&   r'   r(   rw   infora   artifact_collectionr^   	CommError
ValueErrorrx   delete)	r-   r  r   r%   r&   art_typeart_namedst_collectioner.   r.   r/   _delete_collection_in_dst  s,   	z'WandbImporter._delete_collection_in_dstr   c             
   C  sp  |j std|d dS |du rt|j|j}| j| jddd}tj	dd}| 
|| |j d }t|| j}tt|}t|d	D ]d\}}	|	d }|jtkr[t|fi | j}
n2z| }W n	 tyj   Y nw |du r}td
|jd| |}t|}|g}	t|fi | j}
td|d| dt|  tj|
|	|j||d qDtd| | | dS )zImport one artifact sequence.

        Use `namespace` to specify alternate settings like where the artifact sequence should be uploaded
        zArtifact seq=z has no artifacts, skipping.Nr  Tr  )log_artifactsr   r   zRun for art.name=z/ does not exist (deleted?), using run_or_dummy=zUploading partial artifact seq=, r7   r  zFinished uploading seq=)r#   rw   rx   r   r%   r&   rX   rW   r   r	  r  _get_run_or_dummy_from_artr   rt   _make_groups_of_artifacts	enumeratedescriptionART_SEQUENCE_DUMMY_PLACEHOLDERrS   r   	logged_byr  r(   r   r  lenr  r  _remove_placeholders)r-   r  r   r  send_manager_configr   run_or_dummygroups_of_artifactsir~   rT   	wandb_runr  r.   r.   r/   _import_artifact_sequence  sX   


z'WandbImporter._import_artifact_sequencec                 C  s   zt | jj}t||j|j}W n" tjy&   t	
d|d Y d S  ty4   t	d Y d S w |D ]6}|jtkr?q7|jdv rEq7z|jdd W q7 tjym } zdt|v rbt	
d n W Y d }~q7d }~ww d S )	Nzseq=z5 does not exist in dst.  Has it already been deleted?z/Problem getting dst versions (try again later).)r   jobT)delete_aliasesz%cannot delete system managed artifactz%Cannot delete system managed artifact)r   	exp_retry_dst_apir#   rt   r'   r(   r^   r  rw   rx   	TypeError	exceptionr#  r$  rH   r  r$   )r-   r  retry_arts_funcdst_artsr   r  r.   r.   r/   r'  &  s6   



z"WandbImporter._remove_placeholderssrc_artr   r%   r}   r&   Artifactc                 C  s:   t ||j}t ||j}|j}| j| d| d| S r6   )r   r%   r&   r(   ra   	_artifact)r-   r6  r%   r&   r(   r.   r.   r/   _get_dst_artA  s   zWandbImporter._get_dst_artFsrc_rundst_runforce_retrybool
list[dict]c                 C  sZ   g }|r	| d | || }r| dt|  | || }r+| dt|  |S )N__force_retry__z	metadata:zsummary:)r   _compare_run_metadatar$   _compare_run_summary)r-   r:  r;  r<  problemsnon_matching_metadatanon_matching_summaryr.   r.   r/   _get_run_problemsJ  s   
zWandbImporter._get_run_problemsdictc              
   C  s  d}| |}|jdkri S | |}ztj|j| jj}W n> tj	j
y.   ddi Y S  tjy] } z#|jjdkrIdd| iW  Y d }~S d| d	| d
iW  Y d }~S d }~ww t|}i }	|jr|j D ]\}
}|
|vr}|dd|	|
< qm||
 }||kr||d|	|
< qm|	S )Nr   r   zError checkingTimeouti  z
Bad uploadzFile not found: zhttp problemz: (r1   zKEY NOT FOUNDsrcdst)filer   r^   utildownload_file_into_memoryr   ra   r\   urllib3
exceptionsReadTimeoutErrorrequests	HTTPErrorresponsestatus_coder   r   r   items)r-   r:  r;  r   src_fdst_fcontentsr  dst_metanon_matchingksrc_vdst_vr.   r.   r/   r@  \  s:   



 
z#WandbImporter._compare_run_metadatac           
      C  s   i }|j  D ]c\}}t|tr|drq|dv rqt|}|j |}t|}t|tr^t|tr^| D ]%\}}t|trF|drFq7||}	t||	s\||	d|| d| < q7qt||sj||d||< q|S )Nzwandb-client-artifact://)r   r   rH  -)	rr   rU  
isinstancer$   
startswith_recursive_cast_to_dictr   rF  _almost_equal)
r-   r:  r;  rZ  r[  r\  r]  kksvdvr.   r.   r/   rA    s0   



z"WandbImporter._compare_run_summaryIterable[ArtifactSequence]c           
      c  s    t t }d u rtdtd d S |g d  }|jddD ]<}|d }|d }|d }|d	 }| d
| d
| }| j||}	t|	dd d}	t|	dd d}	t	|	||||V  q"d S )NzARTIFACT_ERRORS_FNAME= is empty, returning nothing)
src_entitysrc_projectr(   rH   Tr   rh  ri  r(   rH   r7   c                 S  r:   r;   r=   rA   r.   r.   r/   rC     rD   zBWandbImporter._collect_failed_artifact_sequences.<locals>.<lambda>rE   c                 S  s   | j S r*   )rH   rA   r.   r.   r/   rC     s    )
_read_ndjsonARTIFACT_ERRORS_FNAMErw   r
  uniquer   r   r#   rG   r"   )
r-   r   unique_failed_sequencesr{   r%   r&   r(   _typer  rJ   r.   r.   r/   "_collect_failed_artifact_sequences  s(   z0WandbImporter._collect_failed_artifact_sequences)
namespacesr`   	remappingrp  Iterable[Namespace] | Noner`   
Api | Nonerq  !dict[Namespace, Namespace] | Nonec                C  s   t || j}t ||  }|D ]Y}|r||v r|| }td| zt|j|jdtid}W n# t	yT } zdt
|v rJtd W Y d }~qW Y d }~nd }~ww |D ]}td| |jdd qWqd S )	NzCleaning up, ns=displayNamefilterszCould not find projectz&Could not find project, does it exist?zDeleting dummy run=F)delete_artifacts)r   ra   _all_namespacesrw   r
  rt   runsrh   RUN_DUMMY_PLACEHOLDERr  r$   r3  r  )r-   rp  r`   rq  nsrz  r  rT   r.   r.   r/   _cleanup_dummy_runs  s,   
z!WandbImporter._cleanup_dummy_runsreportr   c          
      C  s  |du rt |j|j}t|j|j}t|j|j}|j}|j}|j}| j}t	d|d| z|
|| W n( tjjy` }	 z|	jjdkrVtd|d|d|	 W Y d}	~	nd}	~	ww t	d|d|d	|d
| |jjtjjd|||||dt|jdd dS )zImport one wandb.Report.

        Use `namespace` to specify alternate settings like where the report should be uploaded
        NzUpserting entity=z	/project=i  zIssue upserting entity=r  zUpserting report entity=r  z, name=z, title=rz  )rm   r(   
entityNameprojectNamer#  ru  rH   specvariable_values)r   r%   r&   r   r(   titler#  ra   rw   r
  create_projectrQ  rO  rR  rS  rT  rx   clientexecutewrr~  UPSERT_VIEWr   dumpsr  )
r-   r~  r   r%   r&   r(   r  r#  r`   r  r.   r.   r/   _import_report  s:   "

zWandbImporter._import_reportsequencec          	      C  s   |d u rt |j|j}| j| jddd}td|d| tjdd}|D ]#}|	  }d u r2q'|D ]}t
|fi | j}tj||j||d q4q'd S )Nr  Tr  z/Using artifact sequence with settings_override=, namespace=)use_artifactsr  )r   r%   r&   rX   rW   rw   r
  r   r	  used_byrS   r   r  r  )	r-   r  r   r  r(  r   r  r,  rT   r.   r.   r/   _use_artifact_sequence  s,   z$WandbImporter._use_artifact_sequenceT)rp  rq  parallelincrementalmax_workerslimitr   r   r   r   r   rr   r   r  r  r  r   r  r   r   r   r   r   rr   r   c             	     s   t d t d t  tt t d tj||d}t dt| j||d t d t	 }t dt|  f	d	d
}t
||||d t d d S )NzSTART: Import runszSetting up for importzCollecting runsrp  r  zValidating runs, len(runs)=skip_previously_validatedrq  zCollecting failed runszImporting runs, len(runs)=c              	     s   t |  |  }d ur|v r| }tj d}td| d|d| j| ||d td| d|d| d S )Nr   zImporting run=r  z	, config=r   zFinished importing run=)r   r%   r&   r   r	  rw   r
  r  )rT   r   rp   	r   r   r   r   r   rq  r-   rr   r   r.   r/   _import_run_wrapped?  s   
 z6WandbImporter.import_runs.<locals>._import_run_wrapped)r  r  zEND: Importing runs)rw   r  _create_files_if_not_exists_clear_fnameRUN_ERRORS_FNAMErt   _collect_runsr&  _validate_runs_collect_failed_runsr   )r-   rp  rq  r  r  r  r  r   r   r   r   r   rr   r   rz  r  r.   r  r/   import_runs  s$   



zWandbImporter.import_runs)rp  r  rq  c                  sR   t d t d j||d}t d  fdd}t|| t d d S )NzSTART: Importing reportszCollecting reportsr  zImporting reportsc                   d   t | j| j} d ur| v r | }td| d| j| |d td| d| d S )NzImporting report=r  r  zFinished importing report=)r   r%   r&   rw   r
  r  )r~  r   rq  r-   r.   r/   _import_report_wrappedc     z<WandbImporter.import_reports.<locals>._import_report_wrappedzEND: Importing reports)rw   r  _collect_reportsr   )r-   rp  r  rq  reportsr  r.   r  r/   import_reportsU  s   



	zWandbImporter.import_reports)rp  r  r  rq  c                  s   t d tt t d tj|d}t d j|| d t d t }t dt|  fdd	}t	|||d
 t 
dt|  fdd}t	|||d
 t d j| d t d dS )zImport all artifact sequences from `namespaces`.

        Note: There is a known bug with the AWS backend where artifacts > 2048MB will fail to upload.  This seems to be related to multipart uploads, but we don't have a fix yet.
        z#START: Importing artifact sequenceszCollecting artifact sequences)rp  zValidating artifact sequences)r  rq  z$Collecting failed artifact sequencesz(Importing artifact sequences, len(seqs)=c                   r  )Nz Importing artifact sequence seq=r  r  z)Finished importing artifact sequence seq=)r   r%   r&   rw   r
  r-  r  r   r  r.   r/   !_import_artifact_sequence_wrapped  r  zRWandbImporter.import_artifact_sequences.<locals>._import_artifact_sequence_wrapped)r  z$Using artifact sequences, len(seqs)=c                   r  )NzUsing artifact sequence seq=r  r  z%Finished using artifact sequence seq=)r   r%   r&   rw   r
  r  r  r  r.   r/   _use_artifact_sequence_wrapped  r  zOWandbImporter.import_artifact_sequences.<locals>._use_artifact_sequence_wrappedzCleaning up dummy runsrp  rq  z!END: Importing artifact sequencesN)rw   r  r  rk  rt   _collect_artifact_sequences_validate_artifact_sequencesro  r&  r   r
  r}  )r-   rp  r  r  rq  seqsr  r  r.   r  r/   import_artifact_sequencesp  s0   



		
z'WandbImporter.import_artifact_sequences)rz  r#   r  rp  r  rq  rz  r#   r  c                C  sd   t d|d|d| |r| j|||d |r!| j||d |r+| j|||d t d d S )NzSTART: Importing all, runs=z, artifacts=z
, reports=)rp  r  rq  r  zEND: Importing all)rw   r  r  r  r  )r-   rz  r#   r  rp  r  rq  r.   r.   r/   
import_all  s&   
zWandbImporter.import_allrq  c             	   C  s4  t |j|j}|d ur||v r|| }|j}|j}|j}z| j| d| d| }W n tjy?   d|d|g}Y nw | ||}|j|j|||d}	|rY||	d< t	}
nt
}
td0 t|
d}|t|	d  W d    n1 s{w   Y  W d    d S W d    d S 1 sw   Y  d S )	Nr7   z(run does not exist in dst at dst_entity=z/dst_project=rh  ri  
dst_entitydst_projectri   rB  z	runs.lockrB   r   )r   r%   r&   rm   ra   rT   r^   r  rE  r  RUN_SUCCESSES_FNAMEfilelockFileLockr   writer   r  )r-   r:  rq  r   r  r  ri   r;  rB  dr   r   r.   r.   r/   _validate_run  s:    "zWandbImporter._validate_runIterable[Run]c          	   
   c  s   t t }d u rtdtd |E d H  d S g }|D ]&}t|j|j}|d ur2||v r2|| }||j|j|j|j|j|d qt	
|}tdt|d |j|dg dd}td	t| | s}||d
   }|g d}|jddD ]}|d V  qd S )NzRUN_SUCCESSES_FNAME=z is empty, yielding all runs)rh  ri  r  r  ri   datazStarting with len(runs)=z in namespacesantir  howonz:After filtering out already successful runs, len(results)=ri   Tr   r  )rj  r  rw   r
  r   r%   r&   r   rm   r   	DataFramer&  r8   is_emptyfilteris_nullrl  r   )	r-   rz  rq  r   r  rr   df2resultsr.   r.   r/   _filter_previously_checked_runs  sF   


z-WandbImporter._filter_previously_checked_runsr  r  download_files_and_comparecheck_entries_are_downloadablec                 C  s$  g }dg}|D ]}t ||jr||||f  S qz	| |||}	W n ty6   |d ||||f Y S w ztd W n& tyd }
 z|d|jd|j	d|jd|
 W Y d }
~
nd }
~
ww |t
||	7 }|rtt
|	 |rtd| zt|t d	|j d
}W n' tjy }
 z|d|jd|j	d|jd|
  W Y d }
~
nd }
~
ww td|	 zt|	t d	|	j d
}W n' tjy }
 z|d|	jd|	j	d|	jd|
  W Y d }
~
nd }
~
ww td|d| t
|| }r|| ||||fS )Nz^job-(.*?)\.py(:v\d+)?$zdestination artifact not foundzComparing artifact manifestsz6Problem getting problems! problem with src_art.entity=z, src_art.project=z, src_art.name=z e=zDownloading src_art=r7   r   z-Invalid download link for src src_art.entity=r  zDownloading dst_art=z-Invalid download link for dst dst_art.entity=z, dst_art.project=z, dst_art.name=z Comparing artifact dirs src_dir=z
, dst_dir=)r   searchr(   r9  	Exceptionr   rw   r
  r%   r&   r   _compare_artifact_manifests_check_entries_are_downloadabler   r   rQ  rR  DST_ART_PATH_compare_artifact_dirs)r-   r6  r  r  r  r  rB  ignore_patternspatterndst_artr  src_dirdst_dirproblemr.   r.   r/   _validate_artifact(  s^   
 
  
z WandbImporter._validate_artifactr  Iterable[WandbRun]r  c                  s@   dd |D }|rt j| d} fdd}t|| d S )Nc                 S  s   g | ]}|j qS r.   rT   )r   r  r.   r.   r/   r   l  s    z0WandbImporter._validate_runs.<locals>.<listcomp>r  c                   s2   t d|  j|  d t d|  d S )NzValidating run=r  zFinished validating run=)rw   r
  r  r  r  r.   r/   r  u  s   z3WandbImporter._validate_runs.<locals>._validate_run)rt   r  r   )r-   rz  r  rq  	base_runsr  r.   r  r/   r  e  s   zWandbImporter._validate_runsc                 c  s    t t }d u rtdtd d S |g d  }|jddD ]&}|d }|d }|d }| j| d	| d	| }t|fi | j	V  q"d S )
NzRUN_ERRORS_FNAME=rg  r  Tr   rh  ri  ri   r7   )
rj  r  rw   r
  rl  r   r   rT   rS   r   )r-   r   unique_failed_runsr{   rh  ri  ri   rT   r.   r.   r/   r  |  s    z"WandbImporter._collect_failed_runsr  c              
   c  s$   t t }d u rtdtd |D ]}|jE d H  qd S |D ]n}|D ]i}zt|| j}W n tjyA   t	d| Y q%w |j
dkrUt|trUtd| q%|j}|j}|j
}t|\}	}
||d |k|d |k@ |d |	k@ |d	 |
k@ |d
 |k@ }t|dkr|V  q%q!d S )NzARTIFACT_SUCCESSES_FNAME=z* is empty, yielding all artifact sequencesz!Failed to get run, skipping: art=r   zSkipping history artifact art=rh  ri  r(   r?   rH   r   )rj  ARTIFACT_SUCCESSES_FNAMErw   r  r#   r   r   rQ  rR  r3  rH   r_  	_DummyRunr
  r%   r&   _get_art_name_verr  r&  )r-   r  r   r  r   r%  r%   r&   rn  r(   verfiltered_dfr.   r.   r/   $_filter_previously_checked_artifacts  sN   





	z2WandbImporter._filter_previously_checked_artifacts)r  r  r  rq  c             	     s   |rt d fdd}| }nt d dd D } fdd}d	d
 |D }	t||	}
|
D ]C\}}}}t|\}}|j|j|||||jd}|rZ||d< t}nt	}t
|d}|t|d  W d    n1 svw   Y  q8d S )NzValidating in incremental modec                  3  s    D ]=} | j s	q| j d }zt| j}W n tjy1   td|jd|jd|j	 Y qw |j
dkr=t|tr=q| V  qd S )Nr   z)Validate Artifact http error: art.entity=z, art.project=z, art.name=r   )r#   r   r   rQ  rR  rw   r3  r%   r&   r(   rH   r_  r  )r  r   r%  )r-   r  r.   r/   filtered_sequences  s*   

zFWandbImporter._validate_artifact_sequences.<locals>.filtered_sequencesz"Validating in non-incremental modec                 S  s   g | ]
}|j D ]}|qqS r.   )r#   )r   r  r   r.   r.   r/   r     s    z>WandbImporter._validate_artifact_sequences.<locals>.<listcomp>c                   s   | \}}}d urt || }v r| }|j}|j}td|d|d| j||| d}td|d|d| |S )NzValidating art=z	, entity=r  )r  r  zFinished validating art=)r   r%   r&   rw   r
  r  )argsr   r%   r&   r   remapped_nsr   )r  r  rq  r-   r.   r/   _validate_artifact_wrapped  s    
zNWandbImporter._validate_artifact_sequences.<locals>._validate_artifact_wrappedc                 s  s    | ]
}||j |jfV  qd S r*   )r%   r&   )r   r   r.   r.   r/   	<genexpr>  s    z=WandbImporter._validate_artifact_sequences.<locals>.<genexpr>)rh  ri  r  r  r(   r?   rH   rB  rB   r   )rw   r  r  r   r  r%   r&   rH   rk  r  r   r  r   r  )r-   r  r  r  r  rq  r  r#   r  r  art_problemsr   r  r  rB  r(   r  r  r   r   r.   )r  r  rq  r-   r  r/   r    s8   	



z*WandbImporter._validate_artifact_sequences)rp  r  skip_ids
start_dater`   r  r   r  c                #  sv    t  j t  i |d urd|id< |d ur$d|id<  fdd}t| |}|E d H  d S )Nz$ninr(   z$gte	createdAtc                  3  sJ    D ]} t d|   j| jdD ]}t|fi jV  qqd S )NzCollecting runs from ns=rv  )rw   r
  rz  rh   rS   r   )r|  rT   r`   rw  rp  r-   r.   r/   _runs  s   z*WandbImporter._collect_runs.<locals>._runs)r   r   ry  	itertoolsislice)r-   rp  r  r  r  r`   r  rz  r.   r  r/   r    s   	zWandbImporter._collect_runs)r%   r`   c                c  sB    t || j}t ||j}||}|D ]
}t|j|jV  qd S r*   )r   r   default_entityprojectsr   r%   r(   )r-   r%   r`   r  r   r.   r.   r/   ry  #  s   
zWandbImporter._all_namespaces)rp  r  r`   c                #  sT    t  | j t |  tj| j| jd  fdd}t| |E d H  d S )N)rF   r   c                  3  s6    D ]}   | jD ]}tjj|j dV  qqd S )N)r`   )r  rh   r  r   from_urlr   )r|  r  r`   rp  r.   r/   r  6  s   z/WandbImporter._collect_reports.<locals>.reports)	r   r   ry  r^   loginrV   rU   r  r  )r-   rp  r  r`   r  r.   r  r/   r  *  s   zWandbImporter._collect_reportsc                #  sX    t  | j t |   fdd}t| |}dd |D }| E d H  d S )Nc               
   3  s    D ]Y} t d|  g }zdd  | jD }W n ty*   t d Y nw |D ].}g }|jdkr7q-z| }W n tyK   t d Y nw |D ]}| rZt	
|V  qNq-qd S )Nz&Collecting artifact sequences from ns=c                 S  s   g | ]}|qS r.   r.   )r   r   r.   r.   r/   r   L  rD   zYWandbImporter._collect_artifact_sequences.<locals>.artifact_sequences.<locals>.<listcomp>zFailed to get artifact types.r   z#Failed to get artifact collections.)rw   r
  artifact_typesrh   r  r3  r(   collectionsis_sequencer"   rK   )r|  typesr   r  cr  r.   r/   artifact_sequencesG  s2   
zEWandbImporter._collect_artifact_sequences.<locals>.artifact_sequencesc                 S  s   i | ]}|j |qS r.   r2   )r   r  r.   r.   r/   
<dictcomp>a      z=WandbImporter._collect_artifact_sequences.<locals>.<dictcomp>)r   r   ry  r  r  values)r-   rp  r  r`   r  r  unique_sequencesr.   r  r/   r  =  s   z)WandbImporter._collect_artifact_sequences)rU   r$   rV   r$   rW   r$   rX   r$   r   r   r)   rY   )rT   rS   r   r   rp   r   r)   rY   r*   )r  r"   r   r   )r  r"   r   r   r)   rY   )r  r"   r)   rY   )NN)r6  r   r%   r}   r&   r}   r)   r7  )F)r:  r   r;  r   r<  r=  r)   r>  )r:  r   r;  r   r)   rF  )r)   rf  )rp  rr  r`   rs  rq  rt  r)   rY   )r~  r   r   r   r)   rY   )r  r"   r   r   )rp  rr  rq  rt  r  r=  r  r=  r  r   r  r   r   r=  r   r=  r   r=  r   r=  r   r=  rr   r=  r   r=  )rp  rr  r  r   rq  rt  )rp  rr  r  r=  r  r   rq  rt  )rz  r=  r#   r=  r  r=  rp  rr  r  r=  rq  rt  )r:  r   rq  rt  r)   rY   )rz  r  rq  rt  r)   r  )FT)
r6  r7  r  r$   r  r$   r  r=  r  r=  )rz  r  r  r=  rq  rt  )r  rf  )
r  rf  r  r=  r  r=  r  r=  rq  rt  )rp  rr  r  r   r  r   r  r}   r`   rs  r)   r  )r%   r}   r`   rs  )rp  rr  r  r   r`   rs  )!rM   rN   rO   __doc__rf   r4   r  r  r-  r'  r9  rE  r@  rA  ro  r}  r  r  r  r  r  r  r  r  r  r  r  r  r  r  ry  r  r  r.   r.   r.   r/   r   P  s    	#B#
O


#
-$?F%*3A
+U
r   r   r7  r)   tuple[str, int]c                 C  s   | j d\}}|t|fS )N:v)r(   splitr>   )r   r(   r  r.   r.   r/   r  e  s   r  r(   r$   rn  r  r>   c                 C  sp   t | t}||_t|_tt}|jddd |t| }t	|d W d    n1 s,w   Y  |
| |S )NT)parentsr   w)r7  ART_DUMMY_PLACEHOLDER_TYPErn  r$  _descriptionr   ART_DUMMY_PLACEHOLDER_PATHmkdirr$   r   add_file)r(   rn  r  r   r   r   r.   r.   r/   _make_dummy_artj  s   

r	  r  startc                 #  s\    |d }| D ]$ t  \}|| dkr% fddt|d |D V   gV  |}qd S )Nr   c                   s   g | ]	}t  j|qS r.   )r	  rH   )r   r<   r   r(   r.   r/   r     s    z-_make_groups_of_artifacts.<locals>.<listcomp>)r  range)r  r
  prev_verr  r.   r  r/   r!  {  s    r!  c                 C  sV   t | trdd | D S t | tst| dr)i }|  D ]
\}}t|||< q|S | S )Nc                 S  s   g | ]}t |qS r.   )ra  )r   itemr.   r.   r/   r     r  z+_recursive_cast_to_dict.<locals>.<listcomp>rU  )r_  rt   rF  hasattrrU  ra  )objnew_dictrF   r   r.   r.   r/   ra    s   
ra  ư>c                   s   t tr!t tr!  krdS t fddD S t tjr5t tjr5t  k S ttur?dS kS )NFc                 3  s$    | ]}t | |  V  qd S r*   )rb  )r   r[  epsxyr.   r/   r    s   " z _almost_equal.<locals>.<genexpr>)r_  rF  keysallnumbersNumberabsrH   )r  r  r  r.   r  r/   rb    s   rb  c                   @  s   e Zd ZU dZded< dS )
_DummyUserr   r$   r   N)rM   rN   rO   r   rP   r.   r.   r.   r/   r    s   
 r  c                   @  s   e Zd ZU dZded< dZded< eZded< eZded< eZ	ded< dZ
ded< dZded	< dZded
< dZded< eedZded< eedZded< eedZded< eedZded< dd ZdS )r  r   r$   r%   r&   ri   rm   r   r   r   r~   z
2000-01-01r   )default_factoryr  r   rt   r   rF  rr   rp   c                 C  s   g S r*   r.   r,   r.   r.   r/   r     r   z_DummyRun.filesN)rM   rN   rO   r%   rP   r&   r{  ri   rm   r   r   r   r~   r   r   r  r   rt   r   rF  rr   rp   r   r.   r.   r.   r/   r    s   
 r  r   pl.DataFrame | Nonec              
   C  sr   zt | }W |S  ty   Y d S  ty8 } zdt|v r&W Y d }~d S dt|v r3W Y d }~d S  d }~ww )Nz&empty string is not a valid JSON valuezerror parsing ndjson)r   read_ndjsonFileNotFoundErrorRuntimeErrorr$   )r   r   r  r.   r.   r/   rj    s   rj  c                 C  s   d }z|   }W n  ty( } ztd| d|d| W Y d }~nd }~ww |d ur/|S td}|jj|d| jid}|di di }t	| j
| j|d	t|d	td
}|S )Nz2Can't log artifact because run doesn't exist, art=z, run=r  a  
        query ArtifactCreatedBy(
            $id: ID!
        ) {
            artifact(id: $id) {
                createdBy {
                    ... on Run {
                        name
                        project {
                            name
                            entityName
                        }
                    }
                }
            }
        }
    rm   r  artifact	createdByr(   )r%   r&   ri   rm   )r%  r  rw   rx   r   r  r  rm   r   r  r%   r&   r{  )r   r`   rT   r  queryrS  creatorr.   r.   r/   r     s.   

r   rY   c                 C  s   t j d|  }t j d|  }td|d| zt|| W n	 ty,   Y nw t| d	 W d    d S 1 s>w   Y  d S )Nr7   z/prev_zMoving old_fname=z to new_fname=r  )r   r   rw   r
  shutilcopy2r   r   )r   	old_fname	new_fnamer.   r.   r/   r    s   "r  r   r}   c                 C  sd   zt d | j|ddW  d    W S 1 sw   Y  W d S  ty1   td|  Y d S w )N
click.echoT)r   
skip_cachezError downloading artifact art=)r
   r   r  rw   r3  )r   r   r.   r.   r/   r     s   
(r   c                 C  s   |d u rt  d| j }t| |d }d u rtd| | jd\}}t|t}| j|_| j	|_
| j|_| j|_td || W d    |S 1 sOw   Y  |S )Nr7   r   zProblem downloading art=r   r*  )r   r(   r   r  r  r7  r  rH   rn  r   _created_ataliases_aliasesr#  r  r
   add_dir)r   r   rh   r(   r   r  r.   r.   r/   r     s    


r   c               	   C  sV   t tttg} | D ] }td|d t|d W d    n1 s#w   Y  qd S )NzCreating fname=z if not existsrB   )rk  r  r  r  rw   r
  r   )fnamesr   r.   r.   r/   r  (  s   r  r   list[pl.DataFrame]pl.DataFramec                   s   t | dkr
t S t | dkr| d S | d  | dd  D ]-} j|ddgd  fdd jD }|D ]\}} |  | } || q5q S )Nr   r   outerr   r  c                   s*   g | ]}| d  j v r|| d fqS )_right)columns)r   r  	merged_dfr.   r/   r   A  s
    z_merge_dfs.<locals>.<listcomp>)r&  r   r  r8   r5  	fill_nullr   drop)r   r   	col_pairsr   rightnew_colr.   r6  r/   r   6  s   
r   )r   r7  r)   r  )r(   r$   rn  r$   r  r>   )r   )r  r"   r
  r>   )r  )r   r$   r)   r  r*   )r   r7  )r   r$   r)   rY   )r   r7  r   r$   r)   r}   )r   r7  r   r}   )r)   rY   )r   r1  r)   r2  )hr  
__future__r   r  r   loggingr  rg   r   r&  collections.abcr   r   dataclassesr   r   r   r   pathlibr   typingr	   unittest.mockr
   r  polarsr   rQ  rN  wandb_workspaces.reports.v1r  v1r  r   	wandb_gqlr   r   r^   wandb.apis.publicr   r   wandb.apis.public.filesr   wandb.sdk.libr   
wandb.utilr   r   r   r   	internalsr   internals.protocolsr   r   internals.utilr   r   r7  r_   apispublicProjectrk  r  r  r  r$  r{  r  r  r   r  	getLoggerrM   rw   setLevelINFOgetenvrich.loggingr   
addHandlerStreamHandlerconsole_handler	Formatter	formattersetFormatterr"   rS   r   r  r	  r!  ra  rb  r  r  rj  r   r  r   r   r  r   r.   r.   r.   r/   <module>   s    





 h        




+

