o
    $iz                     @   sX   d dl mZmZmZ d dlmZ d dlmZ eddG dd dZG dd	 d	eZ	d
S )    )AnyDictOptional)Block)	PublicAPIalpha)	stabilityc                   @   sX   e Zd ZdZdededededef
ddZd	eee	f dededed
edefddZ
dS )FilenameProvidera  Generates filenames when you write a :class:`~ray.data.Dataset`.

    Use this class to customize the filenames used when writing a Dataset.

    Some methods write each row to a separate file, while others write each block to a
    separate file. For example, :meth:`ray.data.Dataset.write_images` writes individual
    rows, and :func:`ray.data.Dataset.write_parquet` writes blocks of data. For more
    information about blocks, see :ref:`Data internals <datasets_scheduling>`.

    If you're writing each row to a separate file, implement
    :meth:`~FilenameProvider.get_filename_for_row`. Otherwise, implement
    :meth:`~FilenameProvider.get_filename_for_block`.

    Example:

        This snippet shows you how to encode labels in written files. For example, if
        `"cat"` is a label, you might write a file named `cat_000000_000000_000000.png`.

        .. testcode::

            import ray
            from ray.data.datasource import FilenameProvider

            class ImageFilenameProvider(FilenameProvider):

                def __init__(self, file_format: str):
                    self.file_format = file_format

                def get_filename_for_row(self, row, write_uuid, task_index, block_index, row_index):
                    return (
                        f"{row['label']}_{write_uuid}_{task_index:06}_{block_index:06}"
                        f"_{row_index:06}.{self.file_format}"
                    )

            ds = ray.data.read_parquet("s3://anonymous@ray-example-data/images.parquet")
            ds.write_images(
                "/tmp/results",
                column="image",
                filename_provider=ImageFilenameProvider("png")
            )
    block
write_uuid
task_indexblock_indexreturnc                 C      t )aV  Generate a filename for a block of data.

        .. note::
            Filenames must be unique and deterministic for a given write UUID, and
            task and block index.

            A block consists of multiple rows and corresponds to a single output file.
            Each task might produce a different number of blocks.

        Args:
            block: The block that will be written to a file.
            write_uuid: The UUID of the write operation.
            task_index: The index of the write task.
            block_index: The index of the block *within* the write task.
        NotImplementedError)selfr
   r   r   r    r   b/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/ray/data/datasource/filename_provider.pyget_filename_for_block3   s   z'FilenameProvider.get_filename_for_blockrow	row_indexc                 C   r   )a  Generate a filename for a row.

        .. note::
            Filenames must be unique and deterministic for a given write UUID, and
            task, block, and row index.

            A block consists of multiple rows, and each row corresponds to a single
            output file. Each task might produce a different number of blocks, and each
            block might contain a different number of rows.

        .. tip::
            If you require a contiguous row index into the global dataset, use
            :meth:`~ray.data.Dataset.iter_rows`. This method is single-threaded and
            isn't recommended for large datasets.

        Args:
            row: The row that will be written to a file.
            write_uuid: The UUID of the write operation.
            task_index: The index of the write task.
            block_index: The index of the block *within* the write task.
            row_index: The index of the row *within* the block.
        r   )r   r   r   r   r   r   r   r   r   get_filename_for_rowG   s   z%FilenameProvider.get_filename_for_rowN)__name__
__module____qualname____doc__r   strintr   r   r   r   r   r   r   r   r	      s4    *

r	   c                   @   s   e Zd Z	ddee dee fddZdededed	ed
ef
ddZde	ee
f deded	eded
efddZded
efddZdS )_DefaultFilenameProviderNdataset_uuidfile_formatc                 C   s   || _ || _d S )N_dataset_uuid_file_format)r   r    r!   r   r   r   __init__i   s   
z!_DefaultFilenameProvider.__init__r
   r   r   r   r   c                 C   s"   | d|dd|d}|  |S N_06_generate_filename)r   r
   r   r   r   file_idr   r   r   r   o   s   
z/_DefaultFilenameProvider.get_filename_for_blockr   r   c                 C   s*   | d|dd|dd|d}|  |S r&   r)   )r   r   r   r   r   r   r+   r   r   r   r   u   s    
z-_DefaultFilenameProvider.get_filename_for_rowr+   c                 C   sD   d}| j d ur|| j  d7 }||7 }| jd ur |d| j 7 }|S )N r'   .r"   )r   r+   filenamer   r   r   r*      s   

z+_DefaultFilenameProvider._generate_filename)NN)r   r   r   r   r   r%   r   r   r   r   r   r   r*   r   r   r   r   r   h   s@    



r   N)
typingr   r   r   ray.data.blockr   ray.util.annotationsr   r	   r   r   r   r   r   <module>   s    `