o
    ˳i                     @  s   d dl mZ d dlZd dlmZ d dlmZmZ d dlmZm	Z	 d dl
mZ d dlmZmZ d dlmZmZmZmZmZ d d	lmZ d d
lmZmZ d dlmZ d dlmZ d dlmZ erhd dl Z!d dl"m#Z# e Z$G dd dZ%dS )    )annotationsN)Iterator)datetimetimezone)TYPE_CHECKINGAny)
from_bytes)
AlwaysTrueBooleanExpression)DataFileDataFileContentManifestContentManifestFilePartitionFieldSummary)PartitionSpec)Snapshotancestors_of)PrimitiveType)ExecutorFactory)_convert_to_hashable_type)Tablec                   @  s"  e Zd ZU ded< dNddZdOdPddZdQddZdOdRddZdQddZde	dfdSddZ
dTd#d$ZdUd&d'ZdUd(d)ZdVdWd,d-ZdQd.d/ZdQd0d1ZdQd2d3Z	dOdXd8d9ZdUd:d;ZdYdZd<d=ZdOdRd>d?ZdOdRd@dAZdOdRdBdCZdQdDdEZdOd[dFdGZdQdHdIZdQdJdKZdQdLdMZdS )\InspectTabler   tblreturnNonec              
   C  s8   || _ zdd l}W d S  ty } ztd|d }~ww )Nr   z5For metadata operations PyArrow needs to be installed)r   pyarrowModuleNotFoundError)selfr   pae r    K/home/ubuntu/.local/lib/python3.10/site-packages/pyiceberg/table/inspect.py__init__,   s   
zInspectTable.__init__Nsnapshot_id
int | Noner   c                 C  sH   |d ur| j j| }r|S td| | j j  }r |S td)NzCannot find snapshot with ID z5Cannot get a snapshot as the table does not have any.)r   metadatasnapshot_by_id
ValueErrorcurrent_snapshot)r   r#   snapshotr    r    r!   _get_snapshot4   s   zInspectTable._get_snapshotpa.Tablec                 C  s
  dd l }||jd|jdddd|jd| dd|jd| d	d|jd
| d	d|jd| dd|jd|| | d	dg}g }| jjj	D ].}|j
 }r^|jj}|j
j}nd }d }|tj|jd tjd|j|jt||j|d qN|jj||dS )Nr   committed_atmsunitFnullabler#   	parent_idT	operationmanifest_listsummary     @@tz)r,   r#   r2   r3   r4   r5   schema)r   r:   field	timestampint64stringmap_r   r%   	snapshotsr5   r3   valueadditional_propertiesappendr   fromtimestamptimestamp_msr   utcr#   parent_snapshot_idstrr4   r   from_pylist)r   r   snapshots_schemar@   r)   r5   r3   rB   r    r    r!   r@   @   s<   


zInspectTable.snapshotsc                   sP  dd l ddlm | jj g }d)fdd}| jj jD ]}|j	|j
||jd	d
 q!| jj }|}jd d	d
jd d	d
jd d	d
jd d	d
jdjd d	d
jd d	d
jd d	d
jd|d	d
jd d	d
jd d	d
jd  dd
jd  dd
jd  dd
jd  dd
jd  dd
jd  dd
jd dd
jd dd
jd dd
jd  dd
gd	d
jd!|dd
g}g }| |}	|	| jjD ]}
|
j| jjd	d"D ]}|jjpVi  |jjp]i |jjpdi |jjpki |jjpri |jjpyi 
 	
f	d#d$| jj jD }|jj fd%d$t!| jj" |
j# jD }||j$j%|j&|j'|j(i d|jj)d|jj*d|jj+d|d|jj,d|jj-dt.|jjdt.|jjpi dt.|jjpi dt.|jjpi d|jjd|jjd|jj/d|jj0d|jj1d |jj2d&|jj3|d' qNqCj4j5||d(S )*Nr   schema_to_pyarrow
bound_typer   r   pa.StructTypec                   v   | }   jd  dd jd  dd jd  dd jd  dd jd|dd jd|ddgS 	Ncolumn_sizeTr0   value_countnull_value_countnan_value_countlower_boundupper_boundstructr;   r=   rM   pa_bound_typer   rL   r    r!   _readable_metrics_structo      z6InspectTable.entries.<locals>._readable_metrics_structFr0   statusr#   sequence_numberfile_sequence_number	data_filecontent	file_pathfile_format	partitionrecord_countfile_size_in_bytescolumn_sizesTvalue_countsnull_value_countsnan_value_countslower_boundsupper_boundskey_metadatasplit_offsetsequality_idssort_order_idreadable_metrics)iodiscard_deletedc                      i | ]<} |j |j|j|j|j|j r+t|jnd |j r:t|jnd dqS N)rQ   rR   rS   rT   rU   rV   find_column_namefield_idgetr   
field_type.0r;   	rh   rU   rl   rk   rj   r:   rV   rm   ri   r    r!   
<dictcomp>   s    




z(InspectTable.entries.<locals>.<dictcomp>c                      i | ]
\}}|j  | qS r    namer}   posr;   re   r    r!   r          
spec_id)r^   r#   r_   r`   ra   rr   r9   rM   r   r   rN   )6r   pyiceberg.io.pyarrowrL   r   r%   r:   fieldsrC   r;   rx   ry   r{   specs_structint8r=   rX   r>   r?   int32binarylist_r*   	manifestsrs   fetch_manifest_entryra   rh   ri   rj   rk   rl   rm   re   	enumeratespecspartition_spec_idr^   rA   r#   r_   r`   rb   rc   rd   rf   rg   dictrn   ro   rp   rq   r   r   rI   )r   r#   readable_metrics_structr\   r;   partition_recordpa_record_structentries_schemaentriesr)   manifestentryrr   partition_record_dictr    )rh   rU   rl   rk   rj   r   re   r:   rL   rV   rm   ri   r!   r   f   s   "

	
;zInspectTable.entriesc                 C  s   dd l }||jd| dd|jd|| | dd|jd| dd|jd| dd|jd	| dd|jd
| ddg}g }| jjj	D ]}| jjj	
| }rk|||j |j|j|j|jd qL|jj||dS )Nr   r   Fr0   typer#   max_reference_age_in_msTmin_snapshots_to_keepmax_snapshot_age_in_ms)r   r   r#   r   r   r   r9   )r   r:   r;   r>   
dictionaryr   r=   r   r%   refsrz   rC   snapshot_ref_typeupperr#   max_ref_age_msr   max_snapshot_age_msr   rI   )r   r   
ref_schemaref_resultsrefsnapshot_refr    r    r!   r      s0   zInspectTable.refsT
row_filterstr | BooleanExpressioncase_sensitiveboolc                   s  dd l }ddlm} ddlm} ||jd| dd|jd| dd|jd| dd|jd	| dd|jd
| dd|jd| dd|jd| dd|jd|j	dddd|jd| ddg	}| 
|}dd || jjD }	| jjj|	d}
t|
jdk}|r||
}||jd|dd|jd| ddg}|||g}|| jj| jj|||jd}i }tj| D ]1}|jj  fddt| jj |jj jD }|jd ur| j|jnd }| ||j|| q|jj |! |dS )Nr   rK   )DataScanrf   Fr0   
file_counttotal_data_file_size_in_bytesposition_delete_record_countposition_delete_file_countequality_delete_record_countequality_delete_file_countlast_updated_atr-   r.   Tlast_updated_snapshot_idc                 S     h | ]}|j qS r    )r   )r}   r   r    r    r!   	<setcomp>!      z*InspectTable.partitions.<locals>.<setcomp>)spec_idsre   r   )table_metadatars   r   r   r#   c                   r   r    r   r   r   r    r!   r   <  s    z+InspectTable.partitions.<locals>.<dictcomp>r9   )"r   r   rL   pyiceberg.tabler   r:   r;   r=   r   r<   r*   r   r   rs   r%   r   lenr   unify_schemasr#   	itertoolschainfrom_iterablescan_plan_helperra   re   r   r   r   r&   *_update_partitions_map_from_manifest_entryr   rI   values)r   r#   r   r   r   rL   r   table_schemar)   r   r   has_partitionsr   partitions_schemascanpartitions_mapr   r   entry_snapshotr    r   r!   
partitions  s`   


zInspectTable.partitionsr   dict[tuple[str, Any], Any]filer   r   dict[str, Any]r)   Snapshot | Nonec                 C  s>  t |}||vr#||jddddddd|r|jnd |r|jnd d||< || }|d urB|d d u s8|d |jk rB|j|d< |j|d< |jtjkrd|d  |j7  < |d  d7  < |d  |j7  < d S |jtj	kr}|d	  |j7  < |d
  d7  < d S |jtj
kr|d  |j7  < |d  d7  < d S td|j d)Nr   )re   r   rf   r   r   r   r   r   r   r   r   r   r   rf   r      r   r   r   r   r   zUnknown DataFileContent ())r   r   rE   r#   rb   r   DATArf   rg   POSITION_DELETESEQUALITY_DELETESr'   )r   r   r   r   r)   partition_record_keypartition_rowr    r    r!   r   I  s<   


z7InspectTable._update_partitions_map_from_manifest_entry	pa.Schemac                 C  sB  dd l }||jd| dd|jd| dd|jd| dd|jd| ddg}||jd	| dd|jd
| dd|jd| dd|jd| dd|jd| dd|jd| dd|jd| dd|jd| dd|jd| dd|jd| dd|jd| dd|jd|	|ddg}|S )Nr   contains_nullFr0   contains_nanTrU   rV   rb   pathlengthr   added_snapshot_idadded_data_files_countexisting_data_files_countdeleted_data_files_countadded_delete_files_countexisting_delete_files_countdeleted_delete_files_countpartition_summaries)
r   rX   r;   bool_r>   r:   r   r=   r   r   )r   r   partition_summary_schemamanifest_schemar    r    r!   _get_manifests_schemat  s0   	z"InspectTable._get_manifests_schemac                 C  s.   dd l }|  }||jd| dd}|S )Nr   reference_snapshot_idFr0   )r   r   rC   r;   r=   )r   r   all_manifests_schemar    r    r!   _get_all_manifests_schema  s   z&InspectTable._get_all_manifests_schemaFis_all_manifests_tablec                   s  dd l }d fdd	} jj }g }|ru| jjD ]X}|jtjk}|jtj	k}	|j|j
|j|j|j|r9|jnd|r?|jnd|rE|jnd|	rK|jnd|	rQ|jnd|	rW|jnd|jrd|||j |jng d
}
|ro|j|
d< ||
 q|jj||r  dS   dS )Nr   specr   r   list[PartitionFieldSummary]r   list[dict[str, Any]]c           	        s   g }t |D ]A\}}| j| }|  j j| j}|jr)|j|t	||jnd }|j
r9|j|t	||j
nd }||j|j||d q|S )N)r   r   rU   rV   )r   r   partition_typer   r:   r{   rU   	transformto_human_stringr   rV   rC   r   r   )	r   r   rowsifield_summaryr;   partition_field_typerU   rV   r   r    r!   _partition_summaries_to_rows  s4   
	zLInspectTable._generate_manifests_table.<locals>._partition_summaries_to_rows)rb   r   r   r   r   r   r   r   r   r   r   r   r   r9   )r   r   r   r   r   r   )r   r   r%   r   r   rs   rb   r   r   DELETESmanifest_pathmanifest_lengthr   r   added_files_countexisting_files_countdeleted_files_countr   r#   rC   r   rI   r   r   )r   r)   r   r   r   r   r   r   is_data_fileis_delete_filemanifest_rowr    r   r!   _generate_manifests_table  s@   #

z&InspectTable._generate_manifests_tablec                 C  s   |  | j S N)r  r   r(   r   r    r    r!   r        zInspectTable.manifestsc                   s   dd l }ddlm} ||jd|jdddd|jd| dd|jd	| d
d|jd| d
d|jd| d
dg}dfdd j	j
j|j	jj	j
jdg }|jj fdd|D |dS )Nr   )MetadataLogEntryr<   r-   r.   Fr0   r   latest_snapshot_idTlatest_schema_idlatest_sequence_numbermetadata_entryr  r   r   c                   sD    j | j}| j| j|r|jnd |r|jnd |r|jdS d dS )N)r<   r   r  r  r	  )r   snapshot_as_of_timestamprE   metadata_filer#   	schema_idr_   )r
  latest_snapshotr   r    r!   metadata_log_entry_to_row  s   zDInspectTable.metadata_log_entries.<locals>.metadata_log_entry_to_row)r  rE   c                   s   g | ]} |qS r    r    )r}   r   )r  r    r!   
<listcomp>      z5InspectTable.metadata_log_entries.<locals>.<listcomp>r9   )r
  r  r   r   )r   pyiceberg.table.snapshotsr  r:   r;   r<   r>   r=   r   r   r%   metadata_logmetadata_locationlast_updated_msr   rI   )r   r   r  r   metadata_log_entriesr    )r  r   r!   r    s$   
z!InspectTable.metadata_log_entriesc              
   C  s   dd l }||jd|jdddd|jd| dd|jd| d	d|jd
| ddg}dd t| j | jj	D }g }| jj	}|j
D ]$}||j}|tj|jd tjd|j|rb|jnd |j|v d qG|jj||dS )Nr   made_current_atr-   r.   Fr0   r#   r2   Tis_current_ancestorc                 S  r   r    )r#   r}   r)   r    r    r!   r     r   z'InspectTable.history.<locals>.<setcomp>r6   r7   )r  r#   r2   r  r9   )r   r:   r;   r<   r=   r   r   r   r(   r%   snapshot_logr&   r#   rC   r   rD   rE   r   rF   rG   r   rI   )r   r   history_schemaancestors_idshistoryr%   snapshot_entryr)   r    r    r!   r    s*   	
	zInspectTable.historyr4   r   data_file_filterset[DataFileContent] | Nonec           
   
     s  dd l }g }| jj | jj}||D ]}|j}|r"|j|vr"q|jp&i  |j	p+i 	|j
p0i |jp5i |jp:i |jp?i  	f	dd| jj jD }|jfddt| jj |j jD }	|i d|jd|jd|jd|jd	|	d
|jd|jd|jd urt|jnd d|j	d urt|j	nd d|j
d urt|j
nd d|jd urt|jnd d|jd urt|jnd d|jd urt|jnd d|jd|jd|jd|jd|i q|jj||   dS )Nr   c                   ru   rv   rw   r|   r~   r    r!   r   7  s    




z9InspectTable._get_files_from_manifest.<locals>.<dictcomp>c                   r   r    r   r   r   r    r!   r   G  r   rb   rc   rd   r   re   rf   rg   rh   ri   rj   rk   rl   rm   rn   ro   rp   rq   rr   r9   )!r   r   r%   r:   rs   r   ra   rb   rh   ri   rj   rk   rl   rm   r   re   r   r   r   rC   rc   rd   r   rf   rg   r   rn   ro   rp   rq   r   rI   _get_files_schema)
r   r4   r  r   filesrs   manifest_entryra   rr   r   r    )
rh   rU   rl   rk   rj   re   r:   rV   rm   ri   r!   _get_files_from_manifest$  s   






	
z%InspectTable._get_files_from_manifestc                   s0  dd l  ddlm | jj }g }d fdd}| jj }|}| jj jD ]}| j	|
|j||jd	d
 q+  j	d  d	d
 j	d  d	d
 j	d     d	d
 j	d  d	d
 j	d|d	d
 j	d  d	d
 j	d  d	d
 j	d     dd
 j	d     dd
 j	d     dd
 j	d     dd
 j	d     dd
 j	d     dd
 j	d  dd
 j	d   dd
 j	d   dd
 j	d  dd
 j	d |dd
g}|S )Nr   rK   rM   r   r   rN   c                   rO   rP   rW   rY   r[   r    r!   r\   n  r]   z@InspectTable._get_files_schema.<locals>._readable_metrics_structFr0   rb   rc   rd   r   re   rf   rg   rh   Tri   rj   rk   rl   rm   rn   ro   rp   rq   rr   r   )r   r   rL   r   r%   r:   r   r   rC   r;   rx   ry   r{   r   r>   r   r   r=   r?   r   r   rX   )r   r:   r   r\   r   r   r;   files_schemar    r[   r!   r!  f  sB   zInspectTable._get_files_schemac                   sh   dd l }|sjj s  S |}jj}t	 }t
| fdd||}||S )Nr   c                   s    |  S r  r$  )r4   r  r   r    r!   <lambda>  s    z%InspectTable._files.<locals>.<lambda>)r   r   r%   r(   r!  empty_tabler*   rs   r   get_or_createlistmapr   concat_tables)r   r#   r  r   r)   rs   executorresultsr    r'  r!   _files  s   

zInspectTable._filesc                 C  s
   |  |S r  )r0  r   r#   r    r    r!   r"    s   
zInspectTable.filesc                 C  s   |  |tjhS r  )r0  r   r   r1  r    r    r!   
data_files  r  zInspectTable.data_filesc                 C  s   |  |tjtjhS r  )r0  r   r   r   r1  r    r    r!   delete_files  s   zInspectTable.delete_filesc                   sZ   dd l } j }|s|jjg   dS t }| fdddd |D }|	|S )Nr   r9   c                   
    j |  S r  )r  argsr   r    r!   r(       
 z,InspectTable.all_manifests.<locals>.<lambda>c                 S  s   g | ]}|d fqS )Tr    r  r    r    r!   r    r  z.InspectTable.all_manifests.<locals>.<listcomp>)
r   r   r@   r   rI   r   r   r*  r,  r-  )r   r   r@   r.  manifests_by_snapshotsr    r   r!   all_manifests  s   

zInspectTable.all_manifestsc                   s   dd l }j }|s|jjg  dS t }|fdd|}dd |D }|fdd fdd	|D }|	|S )
Nr   r9   c                   s   |   jjS r  )r   r   rs   )r)   r   r    r!   r(    s    z)InspectTable._all_files.<locals>.<lambda>c                 S  s    h | ]}|D ]}|j |fqqS r    )r   )r}   r4   r   r    r    r!   r     s     z*InspectTable._all_files.<locals>.<setcomp>c                   r4  r  r&  r5  r   r    r!   r(    r7  c                   s   g | ]\}}| fqS r    r    )r}   _r   )r  r    r!   r    s    z+InspectTable._all_files.<locals>.<listcomp>)
r   r   r@   r   rI   r!  r   r*  r,  r-  )r   r  r   r@   r.  manifest_listsunique_manifests
file_listsr    r'  r!   
_all_files  s   

zInspectTable._all_filesc                 C  s   |   S r  )r>  r   r    r    r!   	all_files  s   zInspectTable.all_filesc                 C  s   |  tjhS r  )r>  r   r   r   r    r    r!   all_data_files  s   zInspectTable.all_data_filesc                 C  s   |  tjtjhS r  )r>  r   r   r   r   r    r    r!   all_delete_files  s   zInspectTable.all_delete_files)r   r   r   r   r  )r#   r$   r   r   )r   r+   )r#   r$   r   r+   )r#   r$   r   r   r   r   r   r+   )
r   r   r   r   r   r   r)   r   r   r   )r   r   )F)r)   r   r   r   r   r+   )r4   r   r  r   r   r+   )NN)r#   r$   r  r   r   r+   )r  r   r   r+   )__name__
__module____qualname____annotations__r"   r*   r@   r   r   ALWAYS_TRUEr   r   r   r   r  r   r  r  r$  r!  r0  r"  r2  r3  r9  r>  r?  r@  rA  r    r    r    r!   r   )   s>   
 

& 
 
B
+

E

$ 
B5


r   )&
__future__r   r   collections.abcr   r   r   typingr   r   pyiceberg.conversionsr   pyiceberg.expressionsr	   r
   pyiceberg.manifestr   r   r   r   r   pyiceberg.partitioningr   r  r   r   pyiceberg.typesr   pyiceberg.utils.concurrentr   pyiceberg.utils.singletonr   r   r   r   r   rF  r   r    r    r    r!   <module>   s$   