o
    SisB                     @   sN  d Z ddlZddlZddlZddlZddlZddlZddlmZ ddl	Z
ddlmZ ejdddkZ	 g d	Zd
efddZdd Zdd Zdd Zdd Zdd Zdd Zi ddd ddd ddd d d!d d"d#d d$d%d d&d'd d(d)d d*d+d d,d-d d.ed/ed0ed1d2d d3ed4ed5eeeeed6Zd7d8 Zd9d: Zd;d< Zi d=d>d?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQdRdSdRdTdUdVdWdXdYdZdYd[d\ZG d]d^ d^Zefd_d`Zdadb Z dcdd Z!G dedf dfZ"dgdh Z#e#gZ$egZ%G didj dje&Z'G dkdl dlZ(e(e$e% Z)dS )mz(Automatically decode webdataset samples.    N)partial   )utilsWDS_PYTORCH_WEIGHTS_ONLY01)?blpbmpdibbufrcurpcxdcxddspsepsfitfitsfliflcftcftugbrgifgribh5hdfpngapngjp2j2kjpcjpfjpxj2cicnsicoimiimtiftiffjfifjpejpgjpegmpgmpegmsppcdpxrpbmpgmppmpnmpsdbwrgbrgbasgirastgaicbvdavstwebpwmfemfxbmxpmdatac                 C   s8   ddl }ddl}tjrtd|| }|j|tddS )at  Function: torch_loads

    Description:
    This function loads data using torch.loads. It first imports torch
    only if necessary. Then it decodes the input data using torch.load.

    Parameters:
    - data (bytes): The data to be decoded.

    Returns:
    It returns the decoded input data.

    Example:
        data = b'...'
        output = torch_loads(data)
    r   NzMtorch.loads is not allowed for security reasons when enforce_security is set.cpu)weights_onlymap_location)iotorchr   enforce_security
ValueErrorBytesIOloadpytorch_weights_only)rG   rK   rL   stream rS   I/home/ubuntu/.local/lib/python3.10/site-packages/webdataset/autodecode.pytorch_loadsh   s   
rU   c                 C   s   ddl m} || S )z?Load data from tenbin format. Imports tenbin only if necessary.r   )tenbin) rV   decode_buffer)rG   rV   rS   rS   rT   tenbin_loads   s   
rY   c                 C      ddl }|| S )zALoad data from msgpack format. Imports msgpack only if necessary.r   N)msgpackunpackb)rG   r[   rS   rS   rT   msgpack_loads      
r]   c                 C   s    ddl }t| }|jj|S )z;Load data from npy format. Imports numpy only if necessary.r   N)numpy.lib.formatrK   rO   libformat
read_array)rG   numpyrR   rS   rS   rT   	npy_loads   s   
rd   c                 C   s   t | }tt|S )z;Load data from npz format. Imports numpy only if necessary.)rK   rO   dictnprP   )rG   rR   rS   rS   rT   	npz_loads   s   
rg   c                 C   rZ   )z;Load data from cbor format. Imports cbor only if necessary.r   N)cborloads)rG   rh   rS   rS   rT   
cbor_loads   r^   rj   c                 C   s   t jrtdt| S )z?Load data from pickle format. Imports pickle only if necessary.zLUnpickling is not allowed for security reasons when enforce_security is set.)r   rM   rN   pickleri   rG   rS   rS   rT   unpickle_loads   s   
rm   txtc                 C   
   |  dS Nutf-8decoderl   rS   rS   rT   <lambda>      
 rt   textc                 C   ro   rp   rr   rl   rS   rS   rT   rt      ru   
transcriptc                 C   ro   rp   rr   rl   rS   rS   rT   rt      ru   clsc                 C      t | S Nintrl   rS   rS   rT   rt          cls2c                 C   ry   rz   r{   rl   rS   rS   rT   rt      r}   indexc                 C   ry   rz   r{   rl   rS   rS   rT   rt      r}   inxc                 C   ry   rz   r{   rl   rS   rS   rT   rt      r}   idc                 C   ry   rz   r{   rl   rS   rS   rT   rt      r}   jsonc                 C   
   t | S rz   r   ri   rl   rS   rS   rT   rt      ru   jsnc                 C   r   rz   r   rl   rS   rS   rT   rt      ru   pklrk   pydpthc                 C   ry   rz   )rU   rl   rS   rS   rT   rt      r}   tentbmp)msgnpynpzrh   c                 C   s&   t dd| }|tv rt| |S dS )a  Handle basic file decoding.

    This function is usually part of the post= decoders.
    This handles the following forms of decoding:

    - txt -> unicode string
    - cls cls2 class count index inx id -> int
    - json jsn -> JSON decoding
    - pyd pickle -> pickle decoding
    - pth -> torch.loads
    - ten tenbin -> fast tensor loading
    - mp messagepack msg -> messagepack decoding
    - npy -> Python NPY decoding

    :param key: file name extension
    :param data: binary data to be decoded
    .*[.]rW   N)resubdecoders)keyrG   	extensionrS   rS   rT   basichandlers   s   r   c                 C   sZ   |   d}|D ]!}|d}t|t|krq	|t| d |kr*||  S q	dS )zCall the function f with the given data if the key matches the extensions.

    :param key: actual key found in the sample
    :param data: binary data
    :param f: decoder function
    :param extensions: list of matching extensions
    .N)lowersplitlen)r   rG   f
extensionsr   targetrS   rS   rT   call_extension_handler   s   
r   c                 C   s   |    } tt|| dS )a  Return a decoder function for the list of extensions.

    Extensions can be a space separated list of extensions.
    Extensions can contain dots, in which case the corresponding number
    of extension components must be present in the key given to f.
    Comparisons are case insensitive.

    Examples:
    handle_extension("jpg jpeg", my_decode_jpg)  # invoked for any file.jpg
    handle_extension("seg.jpg", special_case_jpg)  # invoked only for file.seg.jpg
    )r   r   )r   r   r   r   )r   r   rS   rS   rT   handle_extension   s   r   l8)rc   uint8lrgb8)rc   r   r:   rgba8)rc   r   r;   r   )rc   floatr   r:   )rc   r   r:   r;   )rc   r   r;   torchl8)rL   r   r   	torchrgb8)rL   r   r:   
torchrgba8)rL   r   r;   torchl)rL   r   r   torchrgb)rL   r   r:   rL   	torchrgba)rL   r   r;   pill)pilNr   r   )r   Nr:   pilrgbpilrgba)r   Nr;   c                   @   s$   e Zd ZdZefddZdd ZdS )ImageHandlera  Decode image data using the given `imagespec`.

    The `imagespec` specifies whether the image is decoded
    to numpy/torch/pi, decoded to uint8/float, and decoded
    to l/rgb/rgba:

    - l8: numpy uint8 l
    - rgb8: numpy uint8 rgb
    - rgba8: numpy uint8 rgba
    - l: numpy float l
    - rgb: numpy float rgb
    - rgba: numpy float rgba
    - torchl8: torch uint8 l
    - torchrgb8: torch uint8 rgb
    - torchrgba8: torch uint8 rgba
    - torchl: torch float l
    - torchrgb: torch float rgb
    - torch: torch float rgb
    - torchrgba: torch float rgba
    - pill: pil None l
    - pil: pil None rgb
    - pilrgb: pil None rgb
    - pilrgba: pil None rgba

    c                 C   s4   |t t vrtd| | | _t|| _dS )zCreate an image handler.

        :param imagespec: short string indicating the type of decoding
        :param extensions: list of extensions the image handler is invoked for
        zUnknown imagespec: %sN)list
imagespecskeysrN   r   	imagespecsetr   )selfr   r   rS   rS   rT   __init__?  s   
zImageHandler.__init__c                 C   s  ddl }tdd|}| | jvrdS | j}t| \}}}t|}	|j	
|	}
|
  |
| }
W d   n1 s?w   Y  |dkro|dkrS|
d}
|
S |dkr^|
d	}
|
S |d
kri|
d}
|
S td| t|
}|dkr|tjd }|jdv sJ |j|dv sJ ||dkr|jdkrtj|ddddddf dd}nu|dkr|jdkrtj|ddddtjf ddd}nY|jd dkr|ddddddf }nD|d
kr#|jdkrtj|ddddtjf ddd}d|dddddf< n|jd dkr#tj|dt|jdd  gdd}|dv s,J ||dkr3|S |dkrUddl}|jdkrN||ddd S || S dS )zjPerform image decoding.

        :param key: file name extension
        :param data: binary data
        r   Nr   rW   r   r   Lr:   RGBr;   RGBAzUnknown mode: %sr   g     o@)      )r   r:   r;   r   r   )axis      )rc   rL   rc   rL   r   )	PIL.Imager   r   r   r   r   r   rK   rO   ImageopenrP   convertupperrN   rf   asarrayastypefloat32ndimshapemeanrepeatnewaxisconcatenateonesrL   
from_numpy	transposecopy)r   r   rG   PILr   r   atypeetypemoderR   imgresultrL   rS   rS   rT   __call__J  sh   




$
&
$&

zImageHandler.__call__N)__name__
__module____qualname____doc__IMAGE_EXTENSIONSr   r   rS   rS   rS   rT   r   $  s    r   c                 C   s
   t | |S )zCreate an image handler.

    This is just a lower case alias for ImageHander.

    :param imagespec: textual image spec
    :param extensions: list of extensions the handler should be applied for
    )r   )r   r   rS   rS   rT   imagehandler  s   
r   c              	   C   s   t dd| }|d vrdS ddl}t 4}tj|d| }t	|d}|
| W d   n1 s7w   Y  |jj|dd	W  d   S 1 sNw   Y  dS )
zxDecode video using the torchvideo library.

    :param key: file name extension
    :param data: data to be decoded
    r   rW   z'mp4 ogv mjpeg avi mov h264 mpg webm wmvNr   file.wbsec)pts_unit)r   r   r   torchvision.iotempfileTemporaryDirectoryospathjoinr   writerK   
read_video)r   rG   r   torchvisiondirnamefnamerR   rS   rS   rT   torch_video  s   
$r   c              	   C   s   t dd| }|dvrdS ddl}t 1}tj|d| }t|d}|	| W d   n1 s5w   Y  |
|W  d   S 1 sIw   Y  dS )zxDecode audio using the torchaudio library.

    :param key: file name extension
    :param data: data to be decoded
    r   rW   )flacmp3soxwavm4aoggwmaNr   r   r   )r   r   
torchaudior   r   r   r   r   r   r   rP   )r   rG   r   r   r   r   rR   rS   rS   rT   torch_audio  s   
$r   c                   @   s   e Zd ZdZdd ZdS )ContinuezSpecial class for continuing decoding.

    This is mostly used for decompression, as in:

        def decompressor(key, data):
            if key.endswith(".gz"):
                return Continue(key[:-3], decompress(data))
            return None
    c                 C   s   ||| _ | _dS )z<__init__.

        :param key:
        :param data:
        N)r   rG   )r   r   rG   rS   rS   rT   r     s   zContinue.__init__Nr   r   r   r   r   rS   rS   rS   rT   r     s    
r   c                 C   s<   ddl }| dsdS |t| }t| dd |S )zDecode .gz files.

    This decodes compressed files and the continues decoding.

    :param key: file name extension
    :param data: binary data
    r   Nz.gz)gzipendswithr   rK   rO   readr   )r   rG   r   decompressedrS   rS   rT   gzfilter  s
   
r   c                   @   s   e Zd ZdZdddZdS )DecodingErrorz$Exception class for decoding errors.Nc                 C   s   || _ || _|| _|| _d S rz   )urlr   ksample)r   r   r   r  r  rS   rS   rT   r     s   
zDecodingError.__init__)NNNNr   rS   rS   rS   rT   r     s    r   c                   @   s2   e Zd ZdZdddZdd Zdd	 Zd
d ZdS )DecoderzDecode samples using a list of handlers.

    For each key/data item, this iterates through the list of
    handlers until some handler returns something other than None.
    NFc                 C   s   t |tsJ d| dt |tr| }|du r|nt|| _|du r't}|du r-t}tdd |D s>J d| dtdd |D sOJ d| dtd	d |D s`J d| d|| | | _	|| _
dS )
a  Create a Decoder.

        :param handlers: main list of handlers
        :param pre: handlers called before the main list (.gz handler by default)
        :param post: handlers called after the main list (default handlers by default)
        :param only: a list of extensions; when give, only ignores files with those extensions
        :param partial: allow partial decoding (i.e., don't decode fields that aren't of type bytes)
        zhandlers = z must be a listNc                 s       | ]}t |V  qd S rz   callable.0hrS   rS   rT   	<genexpr>      z#Decoder.__init__.<locals>.<genexpr>zone of z not callablec                 s   r  rz   r  r  rS   rS   rT   r
    r  c                 s   r  rz   r  r  rS   rS   rT   r
     r  )
isinstancer   strr   r   onlydefault_pre_handlersdefault_post_handlersallhandlersr   )r   r  prepostr  r   rS   rS   rT   r     s   	
"""
zDecoder.__init__c                 C   sL   d| }| j D ]}|||}t|tr|j|j}}q|dur#|  S q|S )zuDecode a single field of a sample.

        :param key: file name extension
        :param data: binary data
        r   N)r  r  r   r   rG   )r   r   rG   r   r   rS   rS   rT   decode1$  s   


zDecoder.decode1c                 C   sX  i }t |tsJ |t| D ]\}}zs|dd dkrDt |tr>z|d}W n ty=   td| d|  Y nw |||< W q| jdurT|| jvrT|||< W q|dusZJ | j	rpt |trk| 
||||< n|||< nt |tsJ d| d| | 
||||< W q ty } z|d	d}|d
d}t||||d|d}~ww |S )zeDecode an entire sample.

        :param sample: the sample, a dictionary of key value pairs
        Nr   __rq   zCan't decode v of k = z as utf-8: v = zk,v = z, __key____url__)r   r   r  r  )r  re   r   itemsbytesrs   	Exceptionprintr  r   r  getr   )r   r  r   r  vexnr   r   rS   rS   rT   rs   4  s>   


zDecoder.decodec                 C   s$   t |tsJ t||f| |S )zDDecode an entire sample.

        :param sample: the sample
        )r  re   r   rs   )r   r  rS   rS   rT   r   W  s   
zDecoder.__call__)NNNF)r   r   r   r   r   r  rs   r   rS   rS   rS   rT   r    s    
#r  )*r   rK   r   r   rk   r   r   	functoolsr   rc   rf   rW   r   environr  rQ   r   r  rU   rY   r]   rd   rg   rj   rm   r   r   r   r   r   r   r   r   r   r   r   r  r  r  r   r  default_decoderrS   rS   rS   rT   <module>   s   G		
	
k
Z