o
    `۷i|                     @   s   d Z ddlZddlZddlZddlmZ ddlmZmZm	Z	m
Z
 ddlmZ ddlmZ eG dd deZeG d	d
 d
ejZeG dd deZeG dd deZG dd dZdS )z
Serialization handlers for preprocessor save/load functionality.

This module implements a factory pattern to abstract different serialization formats,
making it easier to add new formats and maintain existing ones.
    N)Enum)AnyDictOptionalUnion)cloudpickle)DeveloperAPIc                   @   s   e Zd ZdZdZdZdS )HandlerFormatNamez1Enum for consistent format naming in the factory.r   pickleN)__name__
__module____qualname____doc__CLOUDPICKLEPICKLE r   r   c/home/ubuntu/vllm_env/lib/python3.10/site-packages/ray/data/preprocessors/serialization_handlers.pyr	      s    r	   c                   @   s   e Zd ZdZejdedeee	f f deee
f fddZejdeee
f de	fdd	Zejdeee
f fd
dZdeee
f deee
f fddZdS )SerializationHandlerzDAbstract base class for handling preprocessor serialization formats.dataPreprocessorreturnc                 C      dS )zSerialize preprocessor data to the specific format.

        Args:
            data: Dictionary containing preprocessor metadata and stats

        Returns:
            Serialized data in format-specific representation
        Nr   selfr   r   r   r   	serialize   s   zSerializationHandler.serialize
serializedc                 C   r   )aU  Deserialize data from the specific format.

        Args:
            serialized: Serialized data in format-specific representation

        Returns:
            For structured formats (CloudPickle/JSON/MessagePack): Dictionary containing preprocessor metadata and stats
            For pickle format: The actual deserialized object
        Nr   r   r   r   r   r   deserialize,   s   z SerializationHandler.deserializec                 C   r   )z+Get the magic bytes/prefix for this format.Nr   r   r   r   r   get_magic_bytes9   s   z$SerializationHandler.get_magic_bytesc                 C   s4   |   }t|ttfr||r|t|d S |S )z(Remove magic bytes from serialized data.N)r   
isinstancestrbytes
startswithlen)r   r   magicr   r   r   strip_magic_bytes>   s   z&SerializationHandler.strip_magic_bytesN)r   r   r   r   abcabstractmethodr   r   r!   r   r"   r   r   r   r&   r   r   r   r   r      s    
&r   c                   @   s^   e Zd ZdZdZdedeeef f de	fddZ
de	deeef fd	d
Zde	fddZdS )CloudPickleSerializationHandlerz-Handler for CloudPickle serialization format.s   CPKL:r   r   r   c                 C   s   | j t| S )z2Serialize to CloudPickle format with magic prefix.)MAGIC_CLOUDPICKLEr   dumpsr   r   r   r   r   L   s   z)CloudPickleSerializationHandler.serializer   c                 C   sR   t |tstdt| || jstd|dd  | |}t|S )z$Deserialize from CloudPickle format.z4Expected bytes for CloudPickle deserialization, got z!Invalid CloudPickle magic bytes: N
   )	r    r"   
ValueErrortyper#   r*   r&   r   loads)r   r   cloudpickle_datar   r   r   r   R   s   


z+CloudPickleSerializationHandler.deserializec                 C   s   | j S )N)r*   r   r   r   r   r   _   s   z/CloudPickleSerializationHandler.get_magic_bytesN)r   r   r   r   r*   r   r   r!   r   r"   r   r   r   r   r   r   r   r)   F   s    
r)   c                   @   sR   e Zd ZdZdedeeef f defddZdedefdd	Z	defd
dZ
dS )PickleSerializationHandlerz/Handler for legacy Pickle serialization format.r   r   r   c                 C   s   t t|dS )z
        Serialize using pickle format (for backward compatibility).
        data is ignored, but kept for consistency

        ascii)base64	b64encoder
   r+   decoder   r   r   r   r   g   s   z$PickleSerializationHandler.serializer   c                 C   s   t t|S )z0Deserialize from pickle format (legacy support).)r
   r/   r3   	b64decoder   r   r   r   r   q   s   z&PickleSerializationHandler.deserializec                 C   r   )N r   r   r   r   r   r   x   s   z*PickleSerializationHandler.get_magic_bytesN)r   r   r   r   r   r   r!   r   r   r   r   r   r   r   r   r1   c   s    


r1   c                	   @   s   e Zd ZdZejeejeiZ	e
dedefddZe
		ddee deeeef  d	efd
dZe
deeef d	efddZdS )SerializationHandlerFactoryz>Factory class for creating appropriate serialization handlers.format_namehandler_classc                 C   s   || j |< dS )z%Register a new serialization handler.N)	_handlers)clsr9   r:   r   r   r   register_handler   s   z,SerializationHandlerFactory.register_handlerNformat_identifierr   r   c                 K   sH   |s|  |}|| jvrtd|j dt| j  | j| }| S )a  Get the appropriate serialization handler for a format or serialized data.

        Args:
            format_identifier: The format to use for serialization. If None, will detect from data.
            data: Serialized data to detect format from (used when format_identifier is None).
            **kwargs: Additional keyword arguments (currently unused).

        Returns:
            SerializationHandler instance for the format

        Raises:
            ValueError: If format is not supported or cannot be detected
        z"Unsupported serialization format: z. Supported formats: )detect_formatr;   r-   valuelistkeys)r<   r>   r   kwargsr:   r   r   r   get_handler   s   



z'SerializationHandlerFactory.get_handlerr   c                 C   sD   t |tr|tjrtjS t |trtjS t	d|dd  d)zDetect the serialization format from the magic bytes.

        Args:
            serialized: Serialized data

        Returns:
            Format name enum

        Raises:
            ValueError: If format cannot be detected
        z)Cannot detect serialization format from: N   z...)
r    r"   r#   r)   r*   r	   r   r!   r   r-   )r<   r   r   r   r   r?      s   
z)SerializationHandlerFactory.detect_format)NN)r   r   r   r   r	   r   r)   r   r1   r;   classmethodr.   r=   r   r   r!   r"   r   rD   r?   r   r   r   r   r8   |   s&      r8   )r   r'   r3   r
   enumr   typingr   r   r   r   ray.cloudpickler   ray.util.annotationsr   r	   ABCr   r)   r1   r8   r   r   r   r   <module>   s"    +