o
    bi                     @   s   d dl Z d dlZd dlZd dlmZmZmZmZmZm	Z	 d dl
Zd dlmZ d dlmZ d dlmZmZ d dlmZ d dlmZ erFd dlZeeZdZd	ZG d
d deZG dd deZdS )    N)TYPE_CHECKINGIteratorListOptionalTupleUnion)DelegatingBlockBuilder)_check_import)BlockBlockMetadata)FileBasedDatasource)DefaultFileMetadataProvider   g      ?c                       s   e Zd ZdZdZg dZdZ		ddeee	e f de
eeef  de
e f fd	d
Zdddedee fddZdd Zde
e fddZdefddZ  ZS )ImageDatasourcez'A datasource that lets you read images.T)pngjpgjpegtiftiffbmpgif   Npathssizemodec                    s   t  j|fi | t| ddd |d ur%t|dkr%tdt| d|d ur=|d dk s5|d dk r=td	| d
|| _|| _|dd }t|t	r[| 
 | _|| j d S t| _d S )NPILPillow)modulepackage   zFExpected `size` to contain two integers for height and width, but got z integers instead.r   r   z6Expected `size` to contain positive integers, but got z	 instead.meta_provider)super__init__r	   len
ValueErrorr   r   get
isinstanceImageFileMetadataProvider_estimate_files_encoding_ratio_encoding_ratio_set_encoding_ratio%IMAGE_ENCODING_RATIO_ESTIMATE_DEFAULT)selfr   r   r   file_based_datasource_kwargsr    	__class__ b/home/ubuntu/.local/lib/python3.10/site-packages/ray/data/_internal/datasource/image_datasource.pyr"   %   s&    



zImageDatasource.__init__fzpyarrow.NativeFilepathreturnc              
   c   s    ddl m}m} | }z
|t|}W n |y, } z	td| d|d }~ww | jd urK|jt	t
| jkrK| j\}}	|j|	|f|jd}| jd ur\|j| jkr\|| j}t }
t|}d|i}|
| |
 }|V  d S )Nr   )ImageUnidentifiedImageErrorz&PIL couldn't load image file at path 'z'.)resampleimage)r   r5   r6   readallopenioBytesIOr$   r   tuplereversedresizeBILINEARr   convertr   npasarrayaddbuild)r,   r2   r3   r5   r6   datar8   eheightwidthbuilderarrayitemblockr0   r0   r1   _read_streamE   s(   



zImageDatasource._read_streamc                 C   s   dS )Nr   r0   )r,   r0   r0   r1   _rows_per_filea   s   zImageDatasource._rows_per_filec                 C   s,   d}|   D ]
}|d ur||7 }q|| j S )Nr   )_file_sizesr)   )r,   
total_size	file_sizer0   r0   r1   estimate_inmemory_data_sized   s   
z+ImageDatasource.estimate_inmemory_data_sizec                 C   s(  t  }ttdd t|  |  }t|}|dkr#t	d t
S | jdurn| jdurn| jdv r5d}n| jdv r=d	}n| jd
v rEd}nt	d| j d t
S | j\}}|| | }|| }tdd |D }	||	 }
nt
}
t  | }|dkrt	dt|d d td|
 d t|
tS )z5Return an estimate of the image files encoding ratio.c                 S   s   | d dkS )Nr   r   r0   )pr0   r0   r1   <lambda>r   s    z@ImageDatasource._estimate_files_encoding_ratio.<locals>.<lambda>r   zYAll input image files are empty. Use on-disk file size to estimate images in-memory size.N)1LPr   )RGBYCbCrLABHSV   )RGBACMYKIF   zFound unknown image mode: .c                 s   s    | ]}|d  V  qdS )r   Nr0   ).0rT   r0   r0   r1   	<genexpr>   s    zAImageDatasource._estimate_files_encoding_ratio.<locals>.<genexpr>   z!Image input size estimation took r   z	 seconds.z0Estimated image encoding ratio from sampling is )timeperf_counterlistfilterzip_pathsrP   r#   loggerwarningr+   r   r   sumrounddebugmax)IMAGE_ENCODING_RATIO_ESTIMATE_LOWER_BOUND)r,   
start_timenon_empty_path_and_size	num_files	dimensionrH   rI   single_image_sizetotal_estimated_sizetotal_file_sizeratiosampling_durationr0   r0   r1   r(   m   sD   





z.ImageDatasource._estimate_files_encoding_ratio)NN)__name__
__module____qualname____doc___WRITE_FILE_PER_ROW_FILE_EXTENSIONS_NUM_THREADS_PER_TASKr   strr   r   r   intr"   r   r
   rN   rO   rS   floatr(   __classcell__r0   r0   r.   r1   r      s0     
	r   c                       sL   e Zd ZdefddZdee dee deee  def fdd	Z	  Z
S )
r'   encoding_ratioc                 C   s
   || _ dS )zJSet image file encoding ratio, to provide accurate size in bytes metadata.N)r)   )r,   r   r0   r0   r1   r*      s   
z-ImageFileMetadataProvider._set_encoding_ratior   rows_per_file
file_sizesr4   c                   s2   t  j|||d}|jd urt|j| j |_|S )N)r   r   )r!   _get_block_metadata
size_bytesr   r)   )r,   r   r   r   metadatar.   r0   r1   r      s   
z-ImageFileMetadataProvider._get_block_metadata)r}   r~   r   r   r*   r   r   r   r   r   r   r0   r0   r.   r1   r'      s    
r'   )r;   loggingrg   typingr   r   r   r   r   r   numpyrB   +ray.data._internal.delegating_block_builderr   ray.data._internal.utilr	   ray.data.blockr
   r   )ray.data.datasource.file_based_datasourcer   &ray.data.datasource.file_meta_providerr   pyarrow	getLoggerr}   rm   r+   rs   r   r'   r0   r0   r0   r1   <module>   s$     
 