o
    Gi                      @   sP   d dl Zd dlZd dlZddlmZ ddlmZ ddl	m
Z
 G dd deZdS )    N   )register_to_config)VaeImageProcessor)PIL_INTERPOLATIONc                       s   e Zd ZdZe												d d
edededeeef dededededededee	B ee	df B dB f fddZ
dejjdededejjfddZ		d!dejjejB ejB dedB dedB deeef fddZ  ZS )"WanAnimateImageProcessorat  
    Image processor to preprocess the reference (character) image for the Wan Animate model.

    Args:
        do_resize (`bool`, *optional*, defaults to `True`):
            Whether to downscale the image's (height, width) dimensions to multiples of `vae_scale_factor`. Can accept
            `height` and `width` arguments from [`image_processor.VaeImageProcessor.preprocess`] method.
        vae_scale_factor (`int`, *optional*, defaults to `8`):
            VAE (spatial) scale factor. If `do_resize` is `True`, the image is automatically resized to multiples of
            this factor.
        vae_latent_channels (`int`, *optional*, defaults to `16`):
            VAE latent channels.
        spatial_patch_size (`tuple[int, int]`, *optional*, defaults to `(2, 2)`):
            The spatial patch size used by the diffusion transformer. For Wan models, this is typically (2, 2).
        resample (`str`, *optional*, defaults to `lanczos`):
            Resampling filter to use when resizing the image.
        do_normalize (`bool`, *optional*, defaults to `True`):
            Whether to normalize the image to [-1,1].
        do_binarize (`bool`, *optional*, defaults to `False`):
            Whether to binarize the image to 0/1.
        do_convert_rgb (`bool`, *optional*, defaults to be `False`):
            Whether to convert the images to RGB format.
        do_convert_grayscale (`bool`, *optional*, defaults to be `False`):
            Whether to convert the images to grayscale format.
        fill_color (`str` or `float` or `tuple[float, ...]`, *optional*, defaults to `None`):
            An optional fill color when `resize_mode` is set to `"fill"`. This will fill the empty space with that
            color instead of filling with data from the image. Any valid `color` argument to `PIL.Image.new` is valid;
            if `None`, will default to filling with data from `image`.
    T         r
   lanczosNFr   	do_resizevae_scale_factorvae_latent_channelsspatial_patch_sizeresamplereducing_gapdo_normalizedo_binarizedo_convert_rgbdo_convert_grayscale
fill_color.c                    s$   t    |	r|
rtddd S d S )Nz`do_convert_rgb` and `do_convert_grayscale` can not both be set to `True`, if you intended to convert the image into RGB format, please set `do_convert_grayscale = False`.z` if you intended to convert the image into grayscale format, please set `do_convert_rgb = False`)super__init__
ValueError)selfr   r   r   r   r   r   r   r   r   r   r   	__class__ [/home/ubuntu/.local/lib/python3.10/site-packages/diffusers/pipelines/wan/image_processor.pyr   8   s   
z!WanAnimateImageProcessor.__init__imagewidthheightreturnc                 C   s  || }|j |j }| jjdu }| jjpd}||k r|n|j | |j }||kr*|n|j| |j  }	|j||	ft| jj d}
tjj	d||f|d}|j
|
|d |d  |d |	d  fd |r||k r|d |	d  }|dkr|j
|
j||fdd|dfddd |j
|
j||fd|
j||
jfdd||	 fd |S ||kr|d |d  }|dkr|j
|
j||fddd|fddd |j
|
j||f|
j d|
j |fd|| dfd |S )	a*  
        Resize the image to fit within the specified width and height, maintaining the aspect ratio, and then center
        the image within the dimensions, filling empty with data from image.

        Args:
            image (`PIL.Image.Image`):
                The image to resize and fill.
            width (`int`):
                The width to resize the image to.
            height (`int`):
                The height to resize the image to.

        Returns:
            `PIL.Image.Image`:
                The resized and filled image.
        Nr   )r   RGB)colorr
   )box)r   r   )r    r!   configr   resizer   r   PILImagenewpaste)r   r   r    r!   ratio	src_ratiofill_with_image_datar   src_wsrc_hresizedresfill_height
fill_widthr   r   r   _resize_and_fillO   s8   *$
$
z)WanAnimateImageProcessor._resize_and_fillc                 C   s   |du r t |tjjr|j}nt |tjr|jd }n|jd }|du r@t |tjjr/|j}nt |tjr;|jd }n|jd }|| }|| }| jj	| jj
d  }| jj	| jj
d  }tt|| | | }tt|| | | }||fS )a=  
        Returns the height and width of the image, downscaled to the next integer multiple of `vae_scale_factor`.

        Args:
            image (`PIL.Image.Image | np.ndarray | torch.Tensor`):
                The image input, which can be a PIL image, NumPy array, or PyTorch tensor. If it is a NumPy array, it
                should have shape `[batch, height, width]` or `[batch, height, width, channels]`. If it is a PyTorch
                tensor, it should have shape `[batch, channels, height, width]`.
            height (`int | None`, *optional*, defaults to `None`):
                The height of the preprocessed image. If `None`, the height of the `image` input will be used.
            width (`int | None`, *optional*, defaults to `None`):
                The width of the preprocessed image. If `None`, the width of the `image` input will be used.

        Returns:
            `tuple[int, int]`:
                A tuple containing the height and width, both resized to the nearest integer multiple of
                `vae_scale_factor * spatial_patch_size`.
        Nr
      r   r   )
isinstancer(   r)   r!   torchTensorshaper    r&   r   r   roundnpsqrt)r   r   r!   r    max_areaaspect_ratiomod_value_hmod_value_wr   r   r   get_default_height_width   s&   

z1WanAnimateImageProcessor.get_default_height_width)Tr   r   r	   r   NTFFFr   )NN)__name__
__module____qualname____doc__r   boolinttuplestrfloatr   r(   r)   r5   r<   ndarrayr8   r9   rB   __classcell__r   r   r   r   r      sr    
	

:
r   )numpyr<   	PIL.Imager(   r8   configuration_utilsr   image_processorr   utilsr   r   r   r   r   r   <module>   s   