o
    uyi<                     @  s   d dl mZ d dlmZmZ d dlmZmZmZmZm	Z	m
Z
mZmZ d dlmZ d dlmZmZmZmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZ er`d dlZ d dl!m"Z" G dd dZ#dS )    )annotations)datetimetimezone)TYPE_CHECKINGAnyDictIteratorListOptionalSetTuple)
from_bytes)DataFileContentManifestContentManifestFilePartitionFieldSummary)PartitionSpec)Snapshotancestors_of)PrimitiveType)ExecutorFactory)_convert_to_hashable_typeN)Tablec                   @  s  e Zd ZU ded< dFddZdGdHddZdIddZdGdJddZdIddZdGdJddZ	dKddZ
dLddZdLddZdMdNd%d&ZdId'd(ZdId)d*ZdId+d,Z	dGdOd0d1ZdLd2d3ZdPdQd4d5ZdGdJd6d7ZdGdJd8d9ZdGdJd:d;ZdId<d=ZdGdRd>d?ZdId@dAZdIdBdCZdIdDdEZdS )SInspectTabler   tblreturnNonec              
   C  s8   || _ zdd l}W d S  ty } ztd|d }~ww )Nr   z5For metadata operations PyArrow needs to be installed)r   pyarrowModuleNotFoundError)selfr   pae r"   Z/home/ubuntu/maya3_transcribe/venv/lib/python3.10/site-packages/pyiceberg/table/inspect.py__init__'   s   
zInspectTable.__init__Nsnapshot_idOptional[int]r   c                 C  sH   |d ur| j j| }r|S td| | j j  }r |S td)NzCannot find snapshot with ID z5Cannot get a snapshot as the table does not have any.)r   metadatasnapshot_by_id
ValueErrorcurrent_snapshot)r   r%   snapshotr"   r"   r#   _get_snapshot/   s   zInspectTable._get_snapshot
'pa.Table'c                 C  s
  dd l }||jd|jdddd|jd| dd|jd| d	d|jd
| d	d|jd| dd|jd|| | d	dg}g }| jjj	D ].}|j
 }r^|jj}|j
j}nd }d }|tj|jd tjd|j|jt||j|d qN|jj||dS )Nr   committed_atmsunitFnullabler%   	parent_idT	operationmanifest_listsummary     @@tz)r.   r%   r4   r5   r6   r7   schema)r   r<   field	timestampint64stringmap_r   r'   	snapshotsr7   r5   valueadditional_propertiesappendr   fromtimestamptimestamp_msr   utcr%   parent_snapshot_idstrr6   r   from_pylist)r   r    snapshots_schemarB   r+   r7   r5   rD   r"   r"   r#   rB   ;   s<   


zInspectTable.snapshotsc                   sP  dd l ddlm | jj g }d)fdd}| jj jD ]}|j	|j
||jd	d
 q!| jj }|}jd d	d
jd d	d
jd d	d
jd d	d
jdjd d	d
jd d	d
jd d	d
jd|d	d
jd d	d
jd d	d
jd  dd
jd  dd
jd  dd
jd  dd
jd  dd
jd  dd
jd dd
jd dd
jd dd
jd  dd
gd	d
jd!|dd
g}g }| |}	|	| jjD ]}
|
j| jjd	d"D ]}|jjpVi  |jjp]i |jjpdi |jjpki |jjpri |jjpyi 
 	
f	d#d$| jj jD }|jj fd%d$t!| jj" |
j# jD }||j$j%|j&|j'|j(i d|jj)d|jj*d|jj+d|d|jj,d|jj-dt.|jjdt.|jjpi dt.|jjpi dt.|jjpi d|jjd|jjd|jj/d|jj0d|jj1d |jj2d&|jj3|d' qNqCj4j5||d(S )*Nr   schema_to_pyarrow
bound_typer   r   pa.StructTypec                   v   | }   jd  dd jd  dd jd  dd jd  dd jd|dd jd|ddgS 	Ncolumn_sizeTr2   value_countnull_value_countnan_value_countlower_boundupper_boundstructr=   r?   rO   pa_bound_typer    rN   r"   r#   _readable_metrics_structj      z6InspectTable.entries.<locals>._readable_metrics_structFr2   statusr%   sequence_numberfile_sequence_number	data_filecontent	file_pathfile_format	partitionrecord_countfile_size_in_bytescolumn_sizesTvalue_countsnull_value_countsnan_value_countslower_boundsupper_boundskey_metadatasplit_offsetsequality_idssort_order_idreadable_metrics)iodiscard_deletedc                      i | ]<} |j |j|j|j|j|j r+t|jnd |j r:t|jnd dqS N)rS   rT   rU   rV   rW   rX   find_column_namefield_idgetr   
field_type.0r=   	rj   rW   rn   rm   rl   r<   rX   ro   rk   r"   r#   
<dictcomp>   s    




z(InspectTable.entries.<locals>.<dictcomp>c                      i | ]
\}}|j  | qS r"   namer   posr=   rg   r"   r#   r          
spec_id)r`   r%   ra   rb   rc   rt   r;   rO   r   r   rP   )6r   pyiceberg.io.pyarrowrN   r   r'   r<   fieldsrE   r=   rz   r{   r}   specs_structint8r?   rZ   r@   rA   int32binarylist_r,   	manifestsru   fetch_manifest_entryrc   rj   rk   rl   rm   rn   ro   rg   	enumeratespecspartition_spec_idr`   rC   r%   ra   rb   rd   re   rf   rh   ri   dictrp   rq   rr   rs   r   r   rK   )r   r%   readable_metrics_structr^   r=   partition_recordpa_record_structentries_schemaentriesr+   manifestentryrt   partition_record_dictr"   )rj   rW   rn   rm   rl   r    rg   r<   rN   rX   ro   rk   r#   r   a   s   "

	
;zInspectTable.entriesc                 C  s   dd l }||jd| dd|jd|| | dd|jd| dd|jd| dd|jd	| dd|jd
| ddg}g }| jjj	D ]}| jjj	
| }rk|||j |j|j|j|jd qL|jj||dS )Nr   r   Fr2   typer%   max_reference_age_in_msTmin_snapshots_to_keepmax_snapshot_age_in_ms)r   r   r%   r   r   r   r;   )r   r<   r=   r@   
dictionaryr   r?   r   r'   refsr|   rE   snapshot_ref_typeupperr%   max_ref_age_msr   max_snapshot_age_msr   rK   )r   r    
ref_schemaref_resultsrefsnapshot_refr"   r"   r#   r      s0   zInspectTable.refsc                 C  sd  dd l }ddlm} ||jd| dd|jd| dd|jd| dd|jd| dd|jd	| dd|jd
| dd|jd| dd|jd|jdddd|jd| ddg	}| jj	
 }t|jdk}|r||}||jd|dd|jd| ddg}|||g}| |}	t }
|
| j|	| jj}i }|D ]}}| D ]v\}}||vr|||< q|| }|d  |d 7  < |d  |d 7  < |d  |d 7  < |d  |d 7  < |d	  |d	 7  < |d
  |d
 7  < |d  |d 7  < |d r&|d r|d |d kr&|d |d< |d |d< qq|jj| |dS )Nr   rM   rh   Fr2   
file_counttotal_data_file_size_in_bytesposition_delete_record_countposition_delete_file_countequality_delete_record_countequality_delete_file_countlast_updated_atr/   r0   Tlast_updated_snapshot_idrg   r   r;   )r   r   rN   r<   r=   r?   r   r>   r   r'   r   lenr   unify_schemasr,   r   get_or_createmap_process_manifestr   ru   itemsr   rK   values)r   r%   r    rN   table_schemar   has_partitionsr   partitions_schemar+   executorlocal_partitions_mapspartitions_map	local_mappartition_record_keypartition_rowexistingr"   r"   r#   
partitions  sh   


zInspectTable.partitionsr   r   Dict[Tuple[str, Any], Any]c                   s  i }|j | jjdD ]}|jj  fddt| jj |j j	D }|j
d ur/| j|j
nd }t|}||vrU||jjddddddd|rI|jnd |rO|j
nd d||< || }|d urt|d d u sj|d |jk rt|j|d< |j
|d< |jjtjkr|d  |jj7  < |d	  d
7  < |d  |jj7  < q
|jjtjkr|d  |jj7  < |d  d
7  < q
|jjtjkr|d  |jj7  < |d  d
7  < q
td|jj d|S )N)ru   c                   r   r"   r   r   r   r"   r#   r   E  r   z2InspectTable._process_manifest.<locals>.<dictcomp>r   )rg   r   rh   r   r   r   r   r   r   r   r   r   r   rh   r      r   r   r   r   r   zUnknown DataFileContent ())r   r   ru   rc   rg   r   r'   r   r   r   r%   r(   r   r   rG   rd   r   DATArh   ri   POSITION_DELETESEQUALITY_DELETESr)   )r   r   r   r   r   entry_snapshotr   r   r"   r   r#   r   A  sN   



zInspectTable._process_manifest'pa.Schema'c                 C  sB  dd l }||jd| dd|jd| dd|jd| dd|jd| ddg}||jd	| dd|jd
| dd|jd| dd|jd| dd|jd| dd|jd| dd|jd| dd|jd| dd|jd| dd|jd| dd|jd| dd|jd|	|ddg}|S )Nr   contains_nullFr2   contains_nanTrW   rX   rd   pathlengthr   added_snapshot_idadded_data_files_countexisting_data_files_countdeleted_data_files_countadded_delete_files_countexisting_delete_files_countdeleted_delete_files_countpartition_summaries)
r   rZ   r=   bool_r@   r<   r   r?   r   r   )r   r    partition_summary_schemamanifest_schemar"   r"   r#   _get_manifests_schemat  s0   	z"InspectTable._get_manifests_schemac                 C  s.   dd l }|  }||jd| dd}|S )Nr   reference_snapshot_idFr2   )r   r   rE   r=   r?   )r   r    all_manifests_schemar"   r"   r#   _get_all_manifests_schema  s   z&InspectTable._get_all_manifests_schemaFr+   Optional[Snapshot]is_all_manifests_tableboolc                   s  dd l }d fdd	} jj }g }|ru| jjD ]X}|jtjk}|jtj	k}	|j|j
|j|j|j|r9|jnd|r?|jnd|rE|jnd|	rK|jnd|	rQ|jnd|	rW|jnd|jrd|||j |jng d
}
|ro|j|
d< ||
 q|jj||r  dS   dS )Nr   specr   r   List[PartitionFieldSummary]r   List[Dict[str, Any]]c           	        s   g }t |D ]A\}}| j| }|  j j| j}|jr)|j|t	||jnd }|j
r9|j|t	||j
nd }||j|j||d q|S )N)r   r   rW   rX   )r   r   partition_typer   r<   r}   rW   	transformto_human_stringr   rX   rE   r   r   )	r   r   rowsifield_summaryr=   partition_field_typerW   rX   r   r"   r#   _partition_summaries_to_rows  s4   
	zLInspectTable._generate_manifests_table.<locals>._partition_summaries_to_rows)rd   r   r   r   r   r   r   r   r   r   r   r   r   r;   )r   r   r   r   r   r   )r   r   r'   r   r   ru   rd   r   r   DELETESmanifest_pathmanifest_lengthr   r   added_files_countexisting_files_countdeleted_files_countr   r%   rE   r   rK   r   r   )r   r+   r   r    r   r   r   r   is_data_fileis_delete_filemanifest_rowr"   r   r#   _generate_manifests_table  s@   #

z&InspectTable._generate_manifests_tablec                 C  s   |  | j S N)r   r   r*   r   r"   r"   r#   r        zInspectTable.manifestsc                   s   dd l }ddlm} ||jd|jdddd|jd| dd|jd	| d
d|jd| d
d|jd| d
dg}dfdd j	j
j|j	jj	j
jdg }|jj fdd|D |dS )Nr   )MetadataLogEntryr>   r/   r0   Fr2   filelatest_snapshot_idTlatest_schema_idlatest_sequence_numbermetadata_entryr   r   Dict[str, Any]c                   sD    j | j}| j| j|r|jnd |r|jnd |r|jdS d dS )N)r>   r   r   r  r  )r   snapshot_as_of_timestamprG   metadata_filer%   	schema_idra   )r  latest_snapshotr   r"   r#   metadata_log_entry_to_row  s   zDInspectTable.metadata_log_entries.<locals>.metadata_log_entry_to_row)r  rG   c                   s   g | ]} |qS r"   r"   )r   r   )r	  r"   r#   
<listcomp>      z5InspectTable.metadata_log_entries.<locals>.<listcomp>r;   )r  r   r   r  )r   pyiceberg.table.snapshotsr   r<   r=   r>   r@   r?   r   r   r'   metadata_logmetadata_locationlast_updated_msr   rK   )r   r    r   r   metadata_log_entriesr"   )r	  r   r#   r    s$   
z!InspectTable.metadata_log_entriesc              
   C  s   dd l }||jd|jdddd|jd| dd|jd| d	d|jd
| ddg}dd t| j | jj	D }g }| jj	}|j
D ]$}||j}|tj|jd tjd|j|rb|jnd |j|v d qG|jj||dS )Nr   made_current_atr/   r0   Fr2   r%   r4   Tis_current_ancestorc                 S  s   h | ]}|j qS r"   )r%   r   r+   r"   r"   r#   	<setcomp>  s    z'InspectTable.history.<locals>.<setcomp>r8   r9   )r  r%   r4   r  r;   )r   r<   r=   r>   r?   r   r   r   r*   r'   snapshot_logr(   r%   rE   r   rF   rG   r   rH   rI   r   rK   )r   r    history_schemaancestors_idshistoryr'   snapshot_entryr+   r"   r"   r#   r    s*   	
	zInspectTable.historyr6   data_file_filterOptional[Set[DataFileContent]]c           
   
     s  dd l }g }| jj | jj}||D ]}|j}|r"|j|vr"q|jp&i  |j	p+i 	|j
p0i |jp5i |jp:i |jp?i  	f	dd| jj jD }|jfddt| jj |j jD }	|i d|jd|jd|jd|jd	|	d
|jd|jd|jd urt|jnd d|j	d urt|j	nd d|j
d urt|j
nd d|jd urt|jnd d|jd urt|jnd d|jd urt|jnd d|jd|jd|jd|jd|i q|jj||   dS )Nr   c                   rw   rx   ry   r~   r   r"   r#   r   7  s    




z9InspectTable._get_files_from_manifest.<locals>.<dictcomp>c                   r   r"   r   r   r   r"   r#   r   G  r   rd   re   rf   r   rg   rh   ri   rj   rk   rl   rm   rn   ro   rp   rq   rr   rs   rt   r;   )!r   r   r'   r<   ru   r   rc   rd   rj   rk   rl   rm   rn   ro   r   rg   r   r   r   rE   re   rf   r   rh   ri   r   rp   rq   rr   rs   r   rK   _get_files_schema)
r   r6   r  r    filesru   manifest_entryrc   rt   r   r"   )
rj   rW   rn   rm   rl   rg   r<   rX   ro   rk   r#   _get_files_from_manifest$  s   






	
z%InspectTable._get_files_from_manifestc                   s0  dd l  ddlm | jj }g }d fdd}| jj }|}| jj jD ]}| j	|
|j||jd	d
 q+  j	d  d	d
 j	d  d	d
 j	d     d	d
 j	d  d	d
 j	d|d	d
 j	d  d	d
 j	d  d	d
 j	d     dd
 j	d     dd
 j	d     dd
 j	d     dd
 j	d     dd
 j	d     dd
 j	d  dd
 j	d   dd
 j	d   dd
 j	d  dd
 j	d |dd
g}|S )Nr   rM   rO   r   r   rP   c                   rQ   rR   rY   r[   r]   r"   r#   r^   n  r_   z@InspectTable._get_files_schema.<locals>._readable_metrics_structFr2   rd   re   rf   r   rg   rh   ri   rj   Trk   rl   rm   rn   ro   rp   rq   rr   rs   rt   r   )r   r   rN   r   r'   r<   r   r   rE   r=   rz   r{   r}   r   r@   r   r   r?   rA   r   r   rZ   )r   r<   r   r^   r   r   r=   files_schemar"   r]   r#   r  f  sB   zInspectTable._get_files_schemac                   sh   dd l }|sjj s  S |}jj}t	 }t
| fdd||}||S )Nr   c                   s    |  S r   r  )r6   r  r   r"   r#   <lambda>  s    z%InspectTable._files.<locals>.<lambda>)r   r   r'   r*   r  empty_tabler,   ru   r   r   listr   r   concat_tables)r   r%   r  r    r+   ru   r   resultsr"   r"  r#   _files  s   

zInspectTable._filesc                 C  s
   |  |S r   )r(  r   r%   r"   r"   r#   r    s   
zInspectTable.filesc                 C  s   |  |tjhS r   )r(  r   r   r)  r"   r"   r#   
data_files  r   zInspectTable.data_filesc                 C  s   |  |tjtjhS r   )r(  r   r   r   r)  r"   r"   r#   delete_files  s   zInspectTable.delete_filesc                   sZ   dd l } j }|s|jjg   dS t }| fdddd |D }|	|S )Nr   r;   c                   
    j |  S r   )r   argsr   r"   r#   r#       
 z,InspectTable.all_manifests.<locals>.<lambda>c                 S  s   g | ]}|d fqS )Tr"   r  r"   r"   r#   r
    r  z.InspectTable.all_manifests.<locals>.<listcomp>)
r   r   rB   r   rK   r   r   r   r   r&  )r   r    rB   r   manifests_by_snapshotsr"   r   r#   all_manifests  s   

zInspectTable.all_manifestsc                   s   dd l }j }|s|jjg  dS t }|fdd|}dd |D }|fdd fdd	|D }|	|S )
Nr   r;   c                   s   |   jjS r   )r   r   ru   )r+   r   r"   r#   r#    s    z)InspectTable._all_files.<locals>.<lambda>c                 S  s    h | ]}|D ]}|j |fqqS r"   )r   )r   r6   r   r"   r"   r#   r    s     z*InspectTable._all_files.<locals>.<setcomp>c                   r,  r   r!  r-  r   r"   r#   r#    r/  c                   s   g | ]\}}| fqS r"   r"   )r   _r   )r  r"   r#   r
    s    z+InspectTable._all_files.<locals>.<listcomp>)
r   r   rB   r   rK   r  r   r   r   r&  )r   r  r    rB   r   manifest_listsunique_manifests
file_listsr"   r"  r#   
_all_files  s   

zInspectTable._all_filesc                 C  s   |   S r   )r6  r   r"   r"   r#   	all_files  s   zInspectTable.all_filesc                 C  s   |  tjhS r   )r6  r   r   r   r"   r"   r#   all_data_files  s   zInspectTable.all_data_filesc                 C  s   |  tjtjhS r   )r6  r   r   r   r   r"   r"   r#   all_delete_files  s   zInspectTable.all_delete_files)r   r   r   r   r   )r%   r&   r   r   )r   r-   )r%   r&   r   r-   )r   r   r   r   )r   r   )F)r+   r   r   r   r   r-   )r6   r   r  r  r   r-   )NN)r%   r&   r  r  r   r-   )r  r  r   r-   )__name__
__module____qualname____annotations__r$   r,   rB   r   r   r   r   r   r   r   r   r  r  r  r  r(  r  r*  r+  r1  r6  r7  r8  r9  r"   r"   r"   r#   r   $   s8   
 

& 

?
3

E

$ 
B5


r   )$
__future__r   r   r   typingr   r   r   r   r	   r
   r   r   pyiceberg.conversionsr   pyiceberg.manifestr   r   r   r   pyiceberg.partitioningr   r  r   r   pyiceberg.typesr   pyiceberg.utils.concurrentr   pyiceberg.utils.singletonr   r   r    pyiceberg.tabler   r   r"   r"   r"   r#   <module>   s   (