o
    ciq                     @   s   d dl mZmZmZmZmZmZmZ d dlZ	d dl
mZ d dlmZ d dlmZ d dlmZ er4d dlZeddG d	d
 d
eZdS )    )TYPE_CHECKINGCallableDictListMappingOptionalUnionN)BatchFormat)_create_possibly_ragged_ndarray)Preprocessor)	PublicAPIalpha)	stabilityc                	   @   s   e Zd ZdZdZ		ddee deed gdf de	ee  d	e
fd
dZdefddZdeedf deedf fddZdefddZdS )TorchVisionPreprocessora	  Apply a `TorchVision transform <https://pytorch.org/vision/stable/transforms.html>`_
    to image columns.

    Examples:

        Torch models expect inputs of shape :math:`(B, C, H, W)` in the range
        :math:`[0.0, 1.0]`. To convert images to this format, add ``ToTensor`` to your
        preprocessing pipeline.

        .. testcode::

            from torchvision import transforms

            import ray
            from ray.data.preprocessors import TorchVisionPreprocessor

            transform = transforms.Compose([
                transforms.ToTensor(),
                transforms.Resize((224, 224)),
            ])
            preprocessor = TorchVisionPreprocessor(["image"], transform=transform)

            dataset = ray.data.read_images("s3://anonymous@air-example-data-2/imagenet-sample-images")
            dataset = preprocessor.transform(dataset)


        For better performance, set ``batched`` to ``True`` and replace ``ToTensor``
        with a batch-supporting ``Lambda``.

        .. testcode::

            import numpy as np
            import torch

            def to_tensor(batch: np.ndarray) -> torch.Tensor:
                tensor = torch.as_tensor(batch, dtype=torch.float)
                # (B, H, W, C) -> (B, C, H, W)
                tensor = tensor.permute(0, 3, 1, 2).contiguous()
                # [0., 255.] -> [0., 1.]
                tensor = tensor.div(255)
                return tensor

            transform = transforms.Compose([
                transforms.Lambda(to_tensor),
                transforms.Resize((224, 224))
            ])
            preprocessor = TorchVisionPreprocessor(["image"], transform=transform, batched=True)

            dataset = ray.data.read_images("s3://anonymous@air-example-data-2/imagenet-sample-images")
            dataset = preprocessor.transform(dataset)

    Args:
        columns: The columns to apply the TorchVision transform to.
        transform: The TorchVision transform you want to apply. This transform should
            accept a ``np.ndarray`` or ``torch.Tensor`` as input and return a
            ``torch.Tensor`` as output.
        output_columns: The output name for each input column. If not specified, this
            defaults to the same set of columns as the columns.
        batched: If ``True``, apply ``transform`` to batches of shape
            :math:`(B, H, W, C)`. Otherwise, apply ``transform`` to individual images.
    FNcolumns	transform)
np.ndarraytorch.Tensorr   output_columnsbatchedc                 C   sJ   |s|}t |t |krtd| d| d|| _|| _|| _|| _d S )NzAThe length of columns should match the length of output_columns: z vs .)len
ValueError_columns_output_columns_torchvision_transform_batched)selfr   r   r   r    r   P/home/ubuntu/.local/lib/python3.10/site-packages/ray/data/preprocessors/torch.py__init__P   s   
z TorchVisionPreprocessor.__init__returnc                 C   s&   | j j d| j d| j d| jdS )Nz	(columns=z, output_columns=z, transform=))	__class____name__r   r   r   )r   r   r   r   __repr__c   s   
z TorchVisionPreprocessor.__repr__
data_batchr   c                    s   dd l ddlm dtjdtjffdd dtjdtjf fdd	}t|trBtjj	D ]\}}||| ||< q3|S ||}|S )
Nr   )convert_ndarray_to_torch_tensorarrayr!   c                    sn   z | } |}W n ty    | }Y nw t|jr$| }t|tjs5tdt|j	 d|S )Nz}`TorchVisionPreprocessor` expected your transform to return a `torch.Tensor` or `np.ndarray`, but your transform returned a `z
` instead.)
r   	TypeError
isinstanceTensornumpynpndarrayr   typer$   )r(   tensoroutput)r'   r   torchr   r   apply_torchvision_transformr   s   zMTorchVisionPreprocessor._transform_numpy.<locals>.apply_torchvision_transformbatchc                    s$   j r | S t fdd| D S )Nc                    s   g | ]} |qS r   r   ).0r(   )r3   r   r   
<listcomp>   s    zUTorchVisionPreprocessor._transform_numpy.<locals>.transform_batch.<locals>.<listcomp>)r   r
   )r4   )r3   r   r   r   transform_batch   s
   zATorchVisionPreprocessor._transform_numpy.<locals>.transform_batch)
r2   ray.air._internal.torch_utilsr'   r-   r.   r*   r   zipr   r   )r   r&   r7   	input_col
output_colr   )r3   r'   r   r2   r   _transform_numpyk   s   
z(TorchVisionPreprocessor._transform_numpyc                 C   s   t jS )N)r	   NUMPY)clsr   r   r   preferred_batch_format   s   z.TorchVisionPreprocessor.preferred_batch_format)NF)r$   
__module____qualname____doc___is_fittabler   strr   r   r   boolr    r%   r   r<   r	   r?   r   r   r   r   r      s*    >




)r   )typingr   r   r   r   r   r   r   r,   r-   "ray.air.util.data_batch_conversionr	   $ray.air.util.tensor_extensions.utilsr
   ray.data.preprocessorr   ray.util.annotationsr   r2   r   r   r   r   r   <module>   s   $ 