o
    $i,                     @   s   d dl mZmZmZmZmZmZmZ d dlZ	d dl
mZ d dlmZ d dlmZ d dlmZ er4d dlZeddG d	d
 d
eZdS )    )TYPE_CHECKINGCallableDictListMappingOptionalUnionN)BatchFormat)_create_possibly_ragged_ndarray)Preprocessor)	PublicAPIalpha)	stabilityc                	       s   e Zd ZdZdZ		ddee deed gdf de	ee  d	e
f fd
dZdefddZdeedf deedf fddZdee fddZdee fddZdefddZ  ZS )TorchVisionPreprocessora	  Apply a `TorchVision transform <https://pytorch.org/vision/stable/transforms.html>`_
    to image columns.

    Examples:

        Torch models expect inputs of shape :math:`(B, C, H, W)` in the range
        :math:`[0.0, 1.0]`. To convert images to this format, add ``ToTensor`` to your
        preprocessing pipeline.

        .. testcode::

            from torchvision import transforms

            import ray
            from ray.data.preprocessors import TorchVisionPreprocessor

            transform = transforms.Compose([
                transforms.ToTensor(),
                transforms.Resize((224, 224)),
            ])
            preprocessor = TorchVisionPreprocessor(["image"], transform=transform)

            dataset = ray.data.read_images("s3://anonymous@air-example-data-2/imagenet-sample-images")
            dataset = preprocessor.transform(dataset)


        For better performance, set ``batched`` to ``True`` and replace ``ToTensor``
        with a batch-supporting ``Lambda``.

        .. testcode::

            import numpy as np
            import torch

            def to_tensor(batch: np.ndarray) -> torch.Tensor:
                tensor = torch.as_tensor(batch, dtype=torch.float)
                # (B, H, W, C) -> (B, C, H, W)
                tensor = tensor.permute(0, 3, 1, 2).contiguous()
                # [0., 255.] -> [0., 1.]
                tensor = tensor.div(255)
                return tensor

            transform = transforms.Compose([
                transforms.Lambda(to_tensor),
                transforms.Resize((224, 224))
            ])
            preprocessor = TorchVisionPreprocessor(["image"], transform=transform, batched=True)

            dataset = ray.data.read_images("s3://anonymous@air-example-data-2/imagenet-sample-images")
            dataset = preprocessor.transform(dataset)

    Args:
        columns: The columns to apply the TorchVision transform to.
        transform: The TorchVision transform you want to apply. This transform should
            accept a ``np.ndarray`` or ``torch.Tensor`` as input and return a
            ``torch.Tensor`` as output.
        output_columns: The output name for each input column. If not specified, this
            defaults to the same set of columns as the columns.
        batched: If ``True``, apply ``transform`` to batches of shape
            :math:`(B, H, W, C)`. Otherwise, apply ``transform`` to individual images.
    FNcolumns	transform)
np.ndarraytorch.Tensorr   output_columnsbatchedc                    sT   t    |s	|}t|t|krtd| d| d|| _|| _|| _|| _d S )NzAThe length of columns should match the length of output_columns: z vs .)super__init__len
ValueError_columns_output_columns_torchvision_transform_batched)selfr   r   r   r   	__class__ Y/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/ray/data/preprocessors/torch.pyr   P   s   

z TorchVisionPreprocessor.__init__returnc                 C   s&   | j j d| j d| j d| jdS )Nz	(columns=z, output_columns=z, transform=))r!   __name__r   r   r   r   r"   r"   r#   __repr__d   s   
z TorchVisionPreprocessor.__repr__
data_batchr   c                    s   dd l ddlm dtjdtjffdd dtjdtjf fdd	}t|trBtjj	D ]\}}||| ||< q3|S ||}|S )
Nr   )convert_ndarray_to_torch_tensorarrayr$   c                    sn   z | } |}W n ty    | }Y nw t|jr$| }t|tjs5tdt|j	 d|S )Nz}`TorchVisionPreprocessor` expected your transform to return a `torch.Tensor` or `np.ndarray`, but your transform returned a `z
` instead.)
r   	TypeError
isinstanceTensornumpynpndarrayr   typer&   )r+   tensoroutput)r*   r   torchr"   r#   apply_torchvision_transforms   s   zMTorchVisionPreprocessor._transform_numpy.<locals>.apply_torchvision_transformbatchc                    s$   j r | S t fdd| D S )Nc                    s   g | ]} |qS r"   r"   ).0r+   )r6   r"   r#   
<listcomp>   s    zUTorchVisionPreprocessor._transform_numpy.<locals>.transform_batch.<locals>.<listcomp>)r   r
   )r7   )r6   r   r"   r#   transform_batch   s
   zATorchVisionPreprocessor._transform_numpy.<locals>.transform_batch)
r5   ray.air._internal.torch_utilsr*   r0   r1   r-   r   zipr   r   )r   r)   r:   	input_col
output_colr"   )r6   r*   r   r5   r#   _transform_numpyl   s   
z(TorchVisionPreprocessor._transform_numpyc                 C      | j S N)r   r'   r"   r"   r#   get_input_columns      z)TorchVisionPreprocessor.get_input_columnsc                 C   r@   rA   )r   r'   r"   r"   r#   get_output_columns   rC   z*TorchVisionPreprocessor.get_output_columnsc                 C   s   t jS rA   )r	   NUMPY)clsr"   r"   r#   preferred_batch_format   rC   z.TorchVisionPreprocessor.preferred_batch_format)NF)r&   
__module____qualname____doc___is_fittabler   strr   r   r   boolr   r(   r   r?   rB   rD   r	   rG   __classcell__r"   r"   r    r#   r      s.    >



)r   )typingr   r   r   r   r   r   r   r/   r0   "ray.air.util.data_batch_conversionr	   $ray.air.util.tensor_extensions.utilsr
   ray.data.preprocessorr   ray.util.annotationsr   r5   r   r"   r"   r"   r#   <module>   s   $ 