o
    ۷i                      @   sZ   d dl Z d dlZd dlZd dlZd dlm  mZ ddl	m
Z
mZmZ G dd de
ZdS )    N   )VaeImageProcessoris_valid_imageis_valid_image_imagelistc                   @   s   e Zd ZdZddedB dedB dejfddZ	dd	ejd
ede	j
ejB eejj B fddZededededeeef fddZedejdededejfddZdS )VideoProcessorzSimple video processor.Nheightwidthreturnc                    s,  t |tr!t |d tjr!|d jdkr!tdt tj|dd}t |trBt |d t	j
rB|d jdkrBtdt t	j|dd}t |tjt	j
frU|jdkrUt|}n%t |tr`t|d sdt|rh|g}nt |trvt|d rv|}ntdt	j fdd|D dd	}|dd
ddd}|S )a  
        Preprocesses input video(s). Keyword arguments will be forwarded to `VaeImageProcessor.preprocess`.

        Args:
            video (`list[PIL.Image]`, `list[list[PIL.Image]]`, `torch.Tensor`, `np.array`, `list[torch.Tensor]`, `list[np.array]`):
                The input video. It can be one of the following:
                * list of the PIL images.
                * list of list of PIL images.
                * 4D Torch tensors (expected shape for each tensor `(num_frames, num_channels, height, width)`).
                * 4D NumPy arrays (expected shape for each array `(num_frames, height, width, num_channels)`).
                * list of 4D Torch tensors (expected shape for each tensor `(num_frames, num_channels, height,
                  width)`).
                * list of 4D NumPy arrays (expected shape for each array `(num_frames, height, width, num_channels)`).
                * 5D NumPy arrays: expected shape for each array `(batch_size, num_frames, height, width,
                  num_channels)`.
                * 5D Torch tensors: expected shape for each array `(batch_size, num_frames, num_channels, height,
                  width)`.
            height (`int`, *optional*, defaults to `None`):
                The height in preprocessed frames of the video. If `None`, will use the `get_default_height_width()` to
                get default height.
            width (`int`, *optional*`, defaults to `None`):
                The width in preprocessed frames of the video. If `None`, will use get_default_height_width()` to get
                the default width.

        Returns:
            `torch.Tensor` of shape `(batch_size, num_channels, num_frames, height, width)`:
                A 5D tensor holding the batched channels-first video(s).
        r      zPassing `video` as a list of 5d np.ndarray is deprecated.Please concatenate the list along the batch dimension and pass it as a single 5d np.ndarray)axiszPassing `video` as a list of 5d torch.Tensor is deprecated.Please concatenate the list along the batch dimension and pass it as a single 5d torch.TensorzeInput is in incorrect format. Currently, we only support numpy.ndarray, torch.Tensor, PIL.Image.Imagec                    s$   g | ]}j |f d qS ))r   r   )
preprocess).0imgr   kwargsselfr    O/home/ubuntu/vllm_env/lib/python3.10/site-packages/diffusers/video_processor.py
<listcomp>V   s   $ z3VideoProcessor.preprocess_video.<locals>.<listcomp>)dim   r         )
isinstancelistnpndarrayndimwarningswarnFutureWarningconcatenatetorchTensorcatr   r   
ValueErrorstackpermute)r   videor   r   r   r   r   r   preprocess_video   s0   ((
"zVideoProcessor.preprocess_videor   r(   output_typec           	      K   s   |j d }g }t|D ]}|| dddd}| j||fi |}|| q|dkr2t|}|S |dkr=t|}|S |dksHt| d|S )	aI  
        Converts a video tensor to a list of frames for export. Keyword arguments will be forwarded to
        `VaeImageProcessor.postprocess`.

        Args:
            video (`torch.Tensor`): The video as a tensor.
            output_type (`str`, defaults to `"np"`): Output type of the postprocessed `video` tensor.
        r   r   r   r   r   ptpilz9 does not exist. Please choose one of ['np', 'pt', 'pil'])	shaperanger'   postprocessappendr   r&   r"   r%   )	r   r(   r*   r   
batch_sizeoutputs	batch_idx	batch_vidbatch_outputr   r   r   postprocess_video]   s   


z VideoProcessor.postprocess_videoratiosc                    sD   t | |  t|  fddd}|| }t|d t|d fS )a  
        Returns the binned height and width based on the aspect ratio.

        Args:
            height (`int`): The height of the image.
            width (`int`): The width of the image.
            ratios (`dict`): A dictionary where keys are aspect ratios and values are tuples of (height, width).

        Returns:
            `tuple[int, int]`: The closest binned height and width.
        c                    s   t t|   S )N)absfloat)ratioarr   r   <lambda>   s    z:VideoProcessor.classify_height_width_bin.<locals>.<lambda>)keyr   r   )r9   minkeysint)r   r   r7   closest_ratio
default_hwr   r;   r   classify_height_width_binx   s   z(VideoProcessor.classify_height_width_binsamples	new_width
new_heightc                 C   s  | j d | j d }}||ks||krt|| || }t|| }t|| }| j \}}	}
}}| ddddd||
 |	||} tj| ||fddd} || d }|| }|| d }|| }| d	d	d	d	||||f } | ||
|	||ddddd} | S )
a4  
        Resizes and crops a tensor of videos to the specified dimensions.

        Args:
            samples (`torch.Tensor`):
                A tensor of shape (N, C, T, H, W) where N is the batch size, C is the number of channels, T is the
                number of frames, H is the height, and W is the width.
            new_width (`int`): The desired width of the output videos.
            new_height (`int`): The desired height of the output videos.

        Returns:
            `torch.Tensor`: A tensor containing the resized and cropped videos.
        r   r   r   r   r   bilinearF)sizemodealign_cornersN)r-   maxrA   r'   reshapeFinterpolate)rE   rF   rG   orig_height
orig_widthr:   resized_widthresized_heightncthwstart_xend_xstart_yend_yr   r   r   resize_and_crop_tensor   s"   "  z%VideoProcessor.resize_and_crop_tensor)NN)r   )__name__
__module____qualname____doc__rA   r"   r#   r)   strr   r   r   PILImager6   staticmethoddicttuplerD   r]   r   r   r   r   r      s    "B
$$r   )r   numpyr   rc   r"   torch.nn.functionalnn
functionalrN   image_processorr   r   r   r   r   r   r   r   <module>   s   