o
    uyi*                     @  s  d Z ddlmZ ddlZddlZddlZddlmZ ddlm	Z	 ddl
mZ ddlmZmZmZmZmZmZmZ ddlmZmZmZ dd	lmZ dd
lmZmZ ddlmZ ddlm Z  ddl!m"Z"m#Z#m$Z$m%Z% ddl&m'Z' ddl(m)Z)m*Z*m+Z+ ddl,m-Z- ddl.m/Z/m0Z0m1Z1 ddl2m3Z3m4Z4m5Z5m6Z6m7Z7 ddl8m9Z9 dZ:e;de<e:g Z=e>e=Z?dZ@e7e5dde3e?ddde5dde4de6 de6 dd dd!e5d"d#e3e@ddd!ZAd$ZBG d%d& d&e0ZCed'e1d(ZDeG d)d* d*eeD ZEG d+d, d,eeD ZFG d-d. d.eeD ZGdS )/z#Avro reader for reading Avro files.    )annotationsN)	dataclass)Enum)TracebackType)CallableDictGenericListOptionalTypeTypeVar)AVRO_CODEC_KEYCODEC_MAPPING_ICEBERG_TO_AVROKNOWN_CODECS)Codec)BinaryDecodernew_decoder)BinaryEncoder)Reader)construct_readerconstruct_writerresolve_readerresolve_writer)Writer)	InputFile
OutputFileOutputStream)Schema)
EMPTY_DICTRecordStructProtocol)	FixedTypeMapTypeNestedField
StringType
StructType)AvroSchemaConversion   s   Obj   magicd   )lengthT)namefield_id
field_typerequired   meta      )key_idkey_typevalue_id
value_typevalue_required)r-   r,   r.   r/   i,  synczavro.schemac                   @  sJ   e Zd ZedddZedddZeddd	ZdddZdddZdS )AvroFileHeaderreturnbytesc                 C  
   | j d S )Nr   _dataself rB   V/home/ubuntu/maya3_transcribe/venv/lib/python3.10/site-packages/pyiceberg/avro/file.pyr)   L      
zAvroFileHeader.magicDict[str, str]c                 C  r=   )Nr'   r>   r@   rB   rB   rC   r1   P   rD   zAvroFileHeader.metac                 C  r=   )N   r>   r@   rB   rB   rC   r9   T   rD   zAvroFileHeader.syncOptional[Type[Codec]]c                 C  s:   ddl m} | jt|j}|tvrtd| t| S zGet the file's compression codec algorithm from the file's metadata.

        In the case of a null codec, we return a None indicating that we
        don't need to compress/decompress.
        r   TablePropertieszUnsupported codec: )pyiceberg.tablerJ   r1   getr   WRITE_AVRO_COMPRESSION_DEFAULTr   
ValueError)rA   rJ   
codec_namerB   rB   rC   compression_codecX   s
   z AvroFileHeader.compression_codecr   c                 C  s2   t | jv r| jt  }t|}t |S td)Nz$No schema found in Avro file headers)_SCHEMA_KEYr1   jsonloadsr&   avro_to_icebergrN   )rA   avro_schema_stringavro_schemarB   rB   rC   
get_schemaf   s
   


zAvroFileHeader.get_schemaN)r;   r<   )r;   rE   r;   rG   )r;   r   )	__name__
__module____qualname__propertyr)   r1   r9   rP   rW   rB   rB   rB   rC   r:   K   s    
r:   D)boundc                   @  sP   e Zd ZU ded< ded< ded< dZded< dddZdddZdddZdS )Blockr   readerintblock_recordsr   block_decoderr   positionr;   Block[D]c                 C     | S )z'Return an iterator for the Block class.rB   r@   rB   rB   rC   __iter__y      zBlock.__iter__boolc                 C  s   | j | jk S N)rd   rb   r@   rB   rB   rC   has_next}   s   zBlock.has_nextr]   c                 C  s(   |   r|  jd7  _| j| jS t)z9Return the next item when iterating over the Block class.r'   )rk   rd   r`   readrc   StopIterationr@   rB   rB   rC   __next__   s   zBlock.__next__N)r;   re   )r;   ri   r;   r]   )rY   rZ   r[   __annotations__rd   rg   rk   rn   rB   rB   rB   rC   r_   r   s   
 

r_   c                   @  s   e Zd ZU dZded< ded< ded< ded	< d
ed< ded< ded< ded< ded< deefd.ddZd/ddZd0d"d#Zd/d$d%Z	d1d'd(Z
d2d*d+Zd3d,d-ZdS )4AvroFile)	
input_fileread_schema
read_types
read_enumsheaderschemar`   decoderblockr   rr   Optional[Schema]rs   (Dict[int, Callable[..., StructProtocol]]rt   Dict[int, Callable[..., Enum]]ru   r:   rv   r   rw   r   r`   r   rx   zOptional[Block[D]]ry   Nr;   Nonec                 C  s"   || _ || _|| _|| _d | _d S rj   )rr   rs   rt   ru   ry   )rA   rr   rs   rt   ru   rB   rB   rC   __init__   s
   
zAvroFile.__init__AvroFile[D]c                 C  sx   | j  }t| | _W d   n1 sw   Y  |  | _| j | _| j	s.| j| _	t
| j| j	| j| j| _| S )zGenerate a reader tree for the payload within an avro file.

        Return:
            A generator returning the AvroStructs.
        N)rr   openr   rl   rx   _read_headerrv   rW   rw   rs   r   rt   ru   r`   )rA   frB   rB   rC   	__enter__   s   
zAvroFile.__enter__exctypeOptional[Type[BaseException]]excinstOptional[BaseException]exctbOptional[TracebackType]c                 C  s   dS z=Perform cleanup when exiting the scope of a 'with' statement.NrB   rA   r   r   r   rB   rB   rC   __exit__   s    zAvroFile.__exit__c                 C  rf   )z*Return an iterator for the AvroFile class.rB   r@   rB   rB   rC   rg      rh   zAvroFile.__iter__ra   c                 C  s|   | j r| jt}|| jjkrtd| jjd|| j }| j }| j	  }r1|
|}t| j|t|d| _ |S )NzExpected sync bytes z
, but got )r`   rb   rc   )ry   rx   rl   	SYNC_SIZErv   r9   rN   read_int
read_bytesrP   
decompressr_   r`   r   )rA   sync_markerrb   block_bytescodecrB   rB   rC   _read_block   s   


zAvroFile._read_blockr]   c              
   C  sZ   | j r| j  rt| j S z|  }W n ty" } zt|d}~ww |dkr+|  S t)z<Return the next item when iterating over the AvroFile class.Nr   )ry   rk   nextr   EOFErrorrm   rn   )rA   	new_blockexcrB   rB   rC   rn      s   
zAvroFile.__next__c                 C  s   t tdti| jS )N)r   META_SCHEMAr:   rl   rx   r@   rB   rB   rC   r      s   zAvroFile._read_header)
rr   r   rs   rz   rt   r{   ru   r|   r;   r}   )r;   r   r   r   r   r   r   r   r;   r}   )r;   ra   ro   )r;   r:   )rY   rZ   r[   	__slots__rp   r   r~   r   r   rg   r   rn   r   rB   rB   rB   rC   rq      s*   
 




rq   c                   @  s   e Zd ZU ded< ded< ded< ded< d	ed
< ded< ded< defd,ddZd-ddZd.d!d"Zd/d#d$Zd0d&d'Z	d1d*d+Z
dS )2AvroOutputFiler   output_filer   output_streamr   file_schemastrschema_namer   encoderr<   
sync_bytesr   writerNrecord_schemarz   metadatarE   r;   r}   c                 C  sL   || _ || _|| _tt| _|d u rt| jdnt|| jd| _	|| _
d S )N)r   )r   r   )r   r   r   osurandomr   r   r   r   r   r   )rA   r   r   r   r   r   rB   rB   rC   r~      s   
zAvroOutputFile.__init__AvroOutputFile[D]c                 C  s(   | j jdd| _t| j| _|   | S )zx
        Open the file and writes the header.

        Returns:
            The file object to write records to
        T)	overwrite)r   creater   r   r   _write_headerr@   rB   rB   rC   r     s   zAvroOutputFile.__enter__r   r   r   r   r   r   c                 C  s   | j   dS r   )r   closer   rB   rB   rC   r     s   zAvroOutputFile.__exit__c                 C  s   ddl m} | jt|j}t| }r|}tt	 j
| j| jd}i | jt|t|i}tt|| j}tt| j| d S )Nr   rI   )r   )rK   rJ   r   rL   r   rM   r   rR   dumpsr&   iceberg_to_avror   r   rQ   r:   MAGICr   r   r   writer   )rA   rJ   rO   avro_codec_namejson_schemar1   rv   rB   rB   rC   r     s   zAvroOutputFile._write_headerrG   c                 C  sL   ddl m} | jt|j}t| }r|}|tvr"td| t| S rH   )	rK   rJ   r   rL   r   rM   r   r   rN   )rA   rJ   rO   r   rB   rB   rC   rP   $  s   z AvroOutputFile.compression_codecobjectsList[D]c           	      C  s   t  }t|d}|D ]	}| j|| q| }| jt| | 	  }r;|
|\}}| j| | j| n| jt| | j| | j| j d S )N)r   )ioBytesIOr   r   r   getvaluer   	write_intlenrP   compressr   )	rA   r   	in_memoryblock_content_encoderobjblock_contentr   contentcontent_lengthrB   rB   rC   write_block6  s   
zAvroOutputFile.write_block)r   r   r   r   r   r   r   rz   r   rE   r;   r}   )r;   r   r   )r;   r}   rX   )r   r   r;   r}   )rY   rZ   r[   rp   r   r~   r   r   r   rP   r   rB   rB   rB   rC   r      s    
 



r   )H__doc__
__future__r   r   rR   r   dataclassesr   enumr   typesr   typingr   r   r   r	   r
   r   r   pyiceberg.avro.codecsr   r   r   pyiceberg.avro.codecs.codecr   pyiceberg.avro.decoderr   r   pyiceberg.avro.encoderr   pyiceberg.avro.readerr   pyiceberg.avro.resolverr   r   r   r   pyiceberg.avro.writerr   pyiceberg.ior   r   r   pyiceberg.schemar   pyiceberg.typedefr   r   r    pyiceberg.typesr!   r"   r#   r$   r%   !pyiceberg.utils.schema_conversionr&   VERSIONr<   	bytearrayr   r   
MAGIC_SIZEr   r   rQ   r:   r]   r_   rq   r   rB   rB   rB   rC   <module>   sT   $
$_