o
    ioB                     @  sx  U d Z ddlmZ ddlmZ ddlmZmZ ddlm	Z	 ddlm
Z ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZmZ ddlmZ ddlmZ dMddZG dd deZ G dd de Z!G dd de Z"G dd de Z#G d d! d!e Z$G d"d# d#e Z%G d$d% d%e Z&G d&d' d'e$Z'G d(d) d)e$Z(G d*d+ d+e$Z)G d,d- d-e$Z*G d.d/ d/e$Z+G d0d1 d1e$Z,G d2d3 d3e Z-G d4d5 d5e Z.G d6d7 d7e Z/e	d8d9G d:d; d;e Z0G d<d= d=e Z1e	d8d>d?G d@dA dAe Z2e	d8d9G dBdC dCe Z3G dDdE dEe Z4e	d>d>d?G dFdG dGe Z5i Z6dHe7dI< e	d>d>d?G dJdK dKe Z8dLS )Na  
Classes for building the Reader tree.

Constructing a reader tree from the schema makes it easy
to decouple the reader implementation from the schema.

The reader tree can be changed in such a way that the
read schema is different, while respecting the read schema.
    )annotations)abstractmethod)CallableMapping)	dataclass)field)Decimal)Any)UUID)BinaryDecoder)StructProtocol)
StructType)bytes_to_decimaldecimal_required_bytes)LazyDict)	Singletondecoderr   
skip_entryCallable[[], None]returnNonec                 C  sX   |   }|dkr*|dk r|   }| | n
t|D ]}|  q|   }|dksdS dS )a  Skips over an array or map.

    Both the array and map are encoded similar, and we can reuse
    the logic of skipping in an efficient way.

    From the Avro spec:

    Maps (and arrays) are encoded as a series of blocks.
    Each block consists of a long count value, followed by that many key/value pairs in the case of a map,
    and followed by that many array items in the case of an array. A block with count zero indicates the
    end of the map. Each item is encoded per the map's value schema.

    If a block's count is negative, its absolute value is used, and the count is followed immediately by a
    long block size indicating the number of bytes in the block. This block size permits fast skipping
    through data, e.g., when projecting a record to a subset of its fields.

    Args:
        decoder:
            The decoder that reads the types from the underlying data.
        skip_entry:
            Function to skip over the underlying data, element in case of an array, and the
            key/value in the case of a map.
    r   N)read_intskiprange)r   r   block_count
block_size_ r   S/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/pyiceberg/avro/reader.py_skip_map_array/   s   r   c                   @  s2   e Zd ZedddZeddd	ZdddZdS )Readerr   r   r   r	   c                 C     d S Nr   selfr   r   r   r   readT      zReader.readr   c                 C  r!   r"   r   r#   r   r   r   r   W   r&   zReader.skipstrc                 C  s   | j j dS )z5Return the string representation of the Reader class.z())	__class____name__r$   r   r   r   __repr__Z      zReader.__repr__N)r   r   r   r	   r   r   r   r   r   r'   )r)   
__module____qualname__r   r%   r   r+   r   r   r   r   r    S   s    r    c                   @      e Zd ZdddZddd	Zd
S )
NoneReaderr   r   r   r   c                 C  r!   r"   r   r$   r   r   r   r   r%   `   r&   zNoneReader.readr   c                 C  r!   r"   r   r#   r   r   r   r   c   r&   zNoneReader.skipN)r   r   r   r   r-   r)   r/   r0   r%   r   r   r   r   r   r2   _       
r2   c                   @  s8   e Zd ZU dZded< dddZdd
dZdddZdS )DefaultReaderdefault_valuer	   r8   r   r   c                 C  s
   || _ d S r"   r7   )r$   r8   r   r   r   __init__k   s   
zDefaultReader.__init__r   r   c                 C     | j S r"   r7   r3   r   r   r   r%   n   s   zDefaultReader.readr   c                 C  r!   r"   r   r#   r   r   r   r   q   r&   zDefaultReader.skipN)r8   r	   r   r   )r   r   r   r	   r-   )r)   r/   r0   	__slots____annotations__r9   r%   r   r   r   r   r   r6   g   s   
 

r6   c                   @  r1   )BooleanReaderr   r   r   boolc                 C     |  S r"   )read_booleanr#   r   r   r   r%   v      zBooleanReader.readr   c                 C     |   d S r"   )skip_booleanr#   r   r   r   r   y      zBooleanReader.skipN)r   r   r   r>   r-   r4   r   r   r   r   r=   u   r5   r=   c                   @  $   e Zd ZdZdddZdd	d
ZdS )IntegerReaderzHLongs and ints are encoded the same way, and there is no long in Python.r   r   r   intc                 C  r?   r"   )r   r#   r   r   r   r%      rA   zIntegerReader.readr   c                 C  rB   r"   )skip_intr#   r   r   r   r      rD   zIntegerReader.skipN)r   r   r   rG   r-   r)   r/   r0   __doc__r%   r   r   r   r   r   rF   }   s    
rF   c                   @  r1   )FloatReaderr   r   r   floatc                 C  r?   r"   )
read_floatr#   r   r   r   r%      rA   zFloatReader.readr   c                 C  rB   r"   )
skip_floatr#   r   r   r   r      rD   zFloatReader.skipNr   r   r   rL   r-   r4   r   r   r   r   rK      r5   rK   c                   @  r1   )DoubleReaderr   r   r   rL   c                 C  r?   r"   )read_doubler#   r   r   r   r%      rA   zDoubleReader.readr   c                 C  rB   r"   )skip_doubler#   r   r   r   r      rD   zDoubleReader.skipNrO   r-   r4   r   r   r   r   rP      r5   rP   c                   @     e Zd ZdZdS )
DateReaderz_Reads a day granularity date from the stream.

    The number of days from 1 January 1970.
    Nr)   r/   r0   rJ   r   r   r   r   rT          rT   c                   @  rS   )
TimeReaderzReads a microsecond granularity timestamp from the stream.

    Long is decoded as an integer which represents
    the number of microseconds from the unix epoch, 1 January 1970.
    NrU   r   r   r   r   rW      rV   rW   c                   @  rS   )TimestampReaderzReads a microsecond granularity timestamp from the stream.

    Long is decoded as python integer which represents
    the number of microseconds from the unix epoch, 1 January 1970.
    NrU   r   r   r   r   rX      rV   rX   c                   @  rS   )TimestampNanoReaderzReads a nanosecond granularity timestamp from the stream.

    Long is decoded as python integer which represents
    the number of nanoseconds from the unix epoch, 1 January 1970.
    NrU   r   r   r   r   rY      rV   rY   c                   @  rS   )TimestamptzReaderzReads a microsecond granularity timestamptz from the stream.

    Long is decoded as python integer which represents
    the number of microseconds from the unix epoch, 1 January 1970.

    Adjusted to UTC.
    NrU   r   r   r   r   rZ      rV   rZ   c                   @  rS   )TimestamptzNanoReaderzReads a microsecond granularity timestamptz from the stream.

    Long is decoded as python integer which represents
    the number of nanoseconds from the unix epoch, 1 January 1970.

    Adjusted to UTC.
    NrU   r   r   r   r   r[      rV   r[   c                   @  r1   )StringReaderr   r   r   r'   c                 C  r?   r"   )	read_utf8r#   r   r   r   r%      rA   zStringReader.readr   c                 C  rB   r"   )	skip_utf8r#   r   r   r   r      rD   zStringReader.skipN)r   r   r   r'   r-   r4   r   r   r   r   r\      r5   r\   c                   @  r1   )
UUIDReaderr   r   r   r
   c                 C  s   t |ddS )N   )bytes)r
   r%   r#   r   r   r   r%      s   zUUIDReader.readr   c                 C  s   | d d S )Nr`   )r   r#   r   r   r   r         zUUIDReader.skipN)r   r   r   r
   r-   r4   r   r   r   r   r_      r5   r_   c                   @  s    e Zd Zd
ddZd
ddZd	S )UnknownReaderr   r   r   r   c                 C  r!   r"   r   r#   r   r   r   r%      r&   zUnknownReader.readc                 C  r!   r"   r   r#   r   r   r   r      r&   zUnknownReader.skipNr-   r4   r   r   r   r   rc      r5   rc   T)frozenc                   @  sD   e Zd ZU e Zded< dddZdd
dZdddZdddZ	dS )FixedReaderrG   _lenr   r   r   ra   c                 C  s   | t| S r"   )r%   lenr#   r   r   r   r%      rb   zFixedReader.readr   c                 C  s   | t|  d S r"   )r   rg   r#   r   r   r   r      s   zFixedReader.skipc                 C  r:   )z:Return the length of an instance of the FixedReader class.rf   r*   r   r   r   __len__      zFixedReader.__len__r'   c                 C  s   d| j  dS )z:Return the string representation of the FixedReader class.zFixedReader()rh   r*   r   r   r   r+      r,   zFixedReader.__repr__Nr   r   r   ra   r-   r   rG   r.   )
r)   r/   r0   dataclassfieldrf   r<   r%   r   ri   r+   r   r   r   r   re      s   
 


re   c                   @  rE   )BinaryReaderzRead a binary value.

    First reads an integer, to get the length of the binary value,
    then reads the binary field itself.
    r   r   r   ra   c                 C  r?   r"   )
read_bytesr#   r   r   r   r%      rA   zBinaryReader.readr   c                 C  rB   r"   
skip_bytesr#   r   r   r   r      rD   zBinaryReader.skipNrl   r-   rI   r   r   r   r   ro      s    
ro   F)rd   initc                   @  s^   e Zd ZU dZe Zded< e Zded< ded< dddZdddZ	dddZ
dddZdS )DecimalReaderzReads a value as a decimal.

    Decimal bytes are decoded as signed short, int or long depending on the
    size of bytes.
    rG   	precisionscale_lengthc                 C  s2   t | d| t | d| t | dt| d S )Nru   rv   rw   )object__setattr__r   )r$   ru   rv   r   r   r   r9     s   zDecimalReader.__init__r   r   r   r   c                 C  s   t || j| jS r"   )r   r%   rw   rv   r#   r   r   r   r%     s   zDecimalReader.readr   c                 C  rB   r"   rq   r#   r   r   r   r     rD   zDecimalReader.skipr'   c                 C  s   d| j  d| j dS )z<Return the string representation of the DecimalReader class.zDecimalReader(z, rk   )ru   rv   r*   r   r   r   r+     s   zDecimalReader.__repr__N)ru   rG   rv   rG   )r   r   r   r   r-   r.   )r)   r/   r0   rJ   rn   ru   r<   rv   r9   r%   r   r+   r   r   r   r   rt     s   
 


rt   c                   @  s0   e Zd ZU e Zded< dddZdd
dZdS )OptionReaderr    optionr   r   r   
Any | Nonec                 C     |  dkr| j|S d S Nr   )r   r{   r%   r#   r   r   r   r%   #  s   
zOptionReader.readr   c                 C  r}   r~   )r   r{   r   r#   r   r   r   r   1  s   zOptionReader.skipN)r   r   r   r|   r-   )r)   r/   r0   rn   r{   r<   r%   r   r   r   r   r   rz     s   
 
rz   c                   @  s   e Zd ZU dZded< ded< ded< eeedB eee	ge
f dB f d	f Zd ddZd!ddZd"ddZd#ddZd$ddZd%ddZdS )&StructReader)field_readerscreate_structstruct_field_reader_functions_hash_max_pos%tuple[tuple[int | None, Reader], ...]r   Callable[..., StructProtocol]r   r   r   N.r   r   c                 C  s   || _ || _|| _t|  tstd| j g }d}|D ]\}}|d ur4|||jf t||}q|d |j	f qt
|| _t| j| _d| | _d S )Nz"Incompatible with StructProtocol:    )r   r   r   
isinstancer   
ValueErrorappendr%   maxr   tupler   hashr   r   )r$   r   r   r   reading_callbacksmax_posposr   r   r   r   r9   D  s   
zStructReader.__init__r   r   r   c                 C  sD   | j d g| j  }| jD ]\}}|d ur||||< q|| q|S r"   )r   r   r   )r$   r   r   r   field_readerr   r   r   r%   _  s   
zStructReader.readc                 C  s   | j D ]	\}}|| qd S r"   )r   r   )r$   r   r   r   r   r   r   r   j  s   zStructReader.skipotherr	   r>   c                 C  s&   t |tr| j|jko| j|jkS dS )z?Return the equality of two instances of the StructReader class.F)r   r   r   r   )r$   r   r   r   r   __eq__n  s
   zStructReader.__eq__r'   c                 C  s*   dd dd | jD  dt| j dS )z;Return the string representation of the StructReader class.zStructReader((,c                 s  s    | ]}t |V  qd S r"   )repr).0r   r   r   r   	<genexpr>x  s    z(StructReader.__repr__.<locals>.<genexpr>z), rk   )joinr   r   r   r*   r   r   r   r+   v  s   *zStructReader.__repr__rG   c                 C  r:   )z9Return a hashed representation of the StructReader class.r   r*   r   r   r   __hash__z  rj   zStructReader.__hash__)r   r   r   r   r   r   r   r   )r   r   r   r   r-   )r   r	   r   r>   r.   rm   )r)   r/   r0   r;   r<   r   r'   rG   r   r   r	   field_reader_functionsr9   r%   r   r   r+   r   r   r   r   r   r   6  s   
 (




r   c                      sJ   e Zd ZU dZded< d fddZdddZdddZdddZ  Z	S )
ListReader)element_is_int_listr   r    r   r   r   c                   s.   t    || _t| j| _t| jt| _d S r"   )superr9   r   r   r   r   rF   r   )r$   r   r(   r   r   r9     s   
zListReader.__init__r   r   	list[Any]c                 C  sv   g }|  }|dkr9|dk r| }|  }| jr!||| nt|D ]}|| j| q%|  }|dks
|S r~   )r   r   extend	read_intsr   r   r   r%   )r$   r   
read_itemsr   r   r   r   r   r%     s   
zListReader.readc                   s   t   fdd d S )Nc                     s   j  S r"   )r   r   r   r   r$   r   r   <lambda>  s    z!ListReader.skip.<locals>.<lambda>r   r#   r   r   r   r     s   zListReader.skiprG   c                 C  r:   )z7Return a hashed representation of the ListReader class.r   r*   r   r   r   r     rj   zListReader.__hash__)r   r    r   r   )r   r   r   r   r-   rm   )
r)   r/   r0   r;   r<   r9   r%   r   r   __classcell__r   r   r   r   r     s   
 

r   zdict[Any, Any]
EMPTY_DICTc                      s\   e Zd ZU dZded< ded< d fddZdddZdddZdddZdddZ	  Z
S )	MapReader)keyvalue_is_int_int_is_int_bytes_key_reader_value_readerr   r    r   r   r   r   c                   sv   t    || _|| _t| jtr t| jt| _t| jt| _nd| _d| _| jj	| _
| jj	| _t| j| jf| _d S )NF)r   r9   r   r   r   rF   r   ro   r   r%   r   r   r   r   )r$   r   r   r   r   r   r9     s   


zMapReader.__init__r   r   Mapping[int, int]c                 C  sb   |  }|dkr
tS g }|dkr-|dk r| }|  |||d  |  }|dkst|S )a  Read a mapping from int to int from the decoder.

        Read a map of ints to ints from the decoder, since this is such a common
        data type, it is optimized to be faster than the generic map reader, by
        using a lazy dict.

        The time it takes to create the python dictionary is much larger than
        the time it takes to read the data from the decoder as an array, so the
        lazy dict defers creating the python dictionary until it is actually
        accessed.

        r      )r   r   rH   r   r   r   )r$   r   r   contents_arrayr   r   r   _read_int_int  s   zMapReader._read_int_intMapping[Any, Any]c                 C  s   i }| j s| jr3| j r| |S | }|dkr1|dk r#| }| }||| | }|dks|S | }|dkra|dk rF| }| }t|D ]}| |}| |||< qJ| }|dks;|S r~   )r   r   r   r   read_int_bytes_dictr   r   r   )r$   r   r   r   r   r   r   r   r   r%     s0   


zMapReader.readc                   s   d fdd}t  | d S )Nr   r   c                     s   j   j  d S r"   )r   r   r   r   r   r   r   r     s   zMapReader.skip.<locals>.skip)r   r   r   )r$   r   r   r   r   r   r     s   zMapReader.skiprG   c                 C  r:   )z6Return a hashed representation of the MapReader class.r   r*   r   r   r   r      rj   zMapReader.__hash__)r   r    r   r    r   r   )r   r   r   r   )r   r   r   r   r-   rm   )r)   r/   r0   r;   r<   r9   r   r%   r   r   r   r   r   r   r   r     s   
 

#
r   N)r   r   r   r   r   r   )9rJ   
__future__r   abcr   collections.abcr   r   dataclassesr   r   rn   decimalr   typingr	   uuidr
   pyiceberg.avro.decoderr   pyiceberg.typedefr   pyiceberg.typesr   pyiceberg.utils.decimalr   r   pyiceberg.utils.lazydictr   pyiceberg.utils.singletonr   r   r    r2   r6   r=   rF   rK   rP   rT   rW   rX   rY   rZ   r[   r\   r_   rc   re   ro   rt   rz   r   r   r   r<   r   r   r   r   r   <module>   sZ   

$




I"
