o
    Gi7                     @   s   d dl mZ d dlZddlmZmZ ddlmZ ddlm	Z	m
Z
mZmZ ddlmZ dd	lmZmZ e
 rAd dlm  mZ d
ZndZeeZdZdddZG dd deZdS )    )CallableN   )UNet2DConditionModelVQModel)DDPMScheduler)	deprecateis_torch_xla_availableloggingreplace_example_docstring)randn_tensor   )DiffusionPipelineImagePipelineOutputTFae  
    Examples:
        ```py
        >>> from diffusers import KandinskyV22Pipeline, KandinskyV22PriorPipeline
        >>> import torch

        >>> pipe_prior = KandinskyV22PriorPipeline.from_pretrained("kandinsky-community/kandinsky-2-2-prior")
        >>> pipe_prior.to("cuda")
        >>> prompt = "red cat, 4k photo"
        >>> out = pipe_prior(prompt)
        >>> image_emb = out.image_embeds
        >>> zero_image_emb = out.negative_image_embeds
        >>> pipe = KandinskyV22Pipeline.from_pretrained("kandinsky-community/kandinsky-2-2-decoder")
        >>> pipe.to("cuda")
        >>> image = pipe(
        ...     image_embeds=image_emb,
        ...     negative_image_embeds=zero_image_emb,
        ...     height=768,
        ...     width=768,
        ...     num_inference_steps=50,
        ... ).images
        >>> image[0].save("cat.png")
        ```
   c                 C   sX   | |d  }| |d  dkr|d7 }||d  }||d  dkr$|d7 }|| || fS )Nr   r       )heightwidthscale_factor
new_height	new_widthr   r   j/home/ubuntu/.local/lib/python3.10/site-packages/diffusers/pipelines/kandinsky2_2/pipeline_kandinsky2_2.pydownscale_height_and_width>   s   r   c                       s  e Zd ZdZdZg dZdededef fddZ	d	d
 Z
edd Zedd Zedd Ze eedddddddddddgfdejeej B dejeej B dededededed ejeej B dB dejdB d!edB d"ed#eeegdf dB d$ee fd%d&Z  ZS )'KandinskyV22Pipelinea  
    Pipeline for text-to-image generation using Kandinsky

    This model inherits from [`DiffusionPipeline`]. Check the superclass documentation for the generic methods the
    library implements for all the pipelines (such as downloading or saving, running on a particular device, etc.)

    Args:
        scheduler (`DDIMScheduler` | `DDPMScheduler`):
            A scheduler to be used in combination with `unet` to generate image latents.
        unet ([`UNet2DConditionModel`]):
            Conditional U-Net architecture to denoise the image embedding.
        movq ([`VQModel`]):
            MoVQ Decoder to generate the image from the latents.
    z
unet->movq)latentsimage_embedsnegative_image_embedsunet	schedulermovqc                    s6   t    | j|||d dt| jjjd  | _d S )N)r   r   r   r   r   )super__init__register_moduleslenr   configblock_out_channelsmovq_scale_factor)selfr   r   r   	__class__r   r   r!   [   s   
zKandinskyV22Pipeline.__init__c                 C   sR   |d u rt ||||d}n|j|krtd|j d| ||}||j }|S )N)	generatordevicedtypezUnexpected latents shape, got z, expected )r   shape
ValueErrortoinit_noise_sigma)r'   r-   r,   r+   r*   r   r   r   r   r   prepare_latentsk   s   


z$KandinskyV22Pipeline.prepare_latentsc                 C      | j S N_guidance_scaler'   r   r   r   guidance_scalev      z#KandinskyV22Pipeline.guidance_scalec                 C   s
   | j dkS )Nr   r4   r6   r   r   r   do_classifier_free_guidancez   s   
z0KandinskyV22Pipeline.do_classifier_free_guidancec                 C   r2   r3   )_num_timestepsr6   r   r   r   num_timesteps~   r8   z"KandinskyV22Pipeline.num_timestepsi   d   g      @r   NpilTr   r   r   r   r   num_inference_stepsr7   num_images_per_promptr*   output_typereturn_dictcallback_on_step_end"callback_on_step_end_tensor_inputsc           $         s  | dd}| dd}|durtddd |dur tddd |durAt fdd|D sAtd	 j d
 fdd|D   j}| _t|trSt	j
|dd}|jd | }t|trft	j
|dd} jr|j|dd}|j|dd}t	j
||gddj jj|d} jj||d  jj} jjj}t|| j\}} ||||f|j|||	 j}	t| _t |D ]\}} jrt	
|	gd n|	}d|i} j||d|ddd } jr|j|	jd dd\}}|d\}}|d\}}| j||   }t	j
||gdd}t  jjdr jjj!dv s*|j|	jd dd\}} jj"|||	|dd }	|durci }|D ]
} t# |  || < q?| |||}!|! d|	}	|! d|}|! d|}|dur~|| dkr~|t$ jdd }"||"||	 t%rt&'  q|
dvrtd|
 |
dksˈ j(j)|	dd d! }#|
d"v r|#d# d# }#|#*dd}#|#+ ,ddd$d- . }#|
d%krʈ /|#}#n|	}# 0  |s|#fS t1|#d&S )'a  
        Function invoked when calling the pipeline for generation.

        Args:
            image_embeds (`torch.Tensor` or `list[torch.Tensor]`):
                The clip image embeddings for text prompt, that will be used to condition the image generation.
            negative_image_embeds (`torch.Tensor` or `list[torch.Tensor]`):
                The clip image embeddings for negative text prompt, will be used to condition the image generation.
            height (`int`, *optional*, defaults to 512):
                The height in pixels of the generated image.
            width (`int`, *optional*, defaults to 512):
                The width in pixels of the generated image.
            num_inference_steps (`int`, *optional*, defaults to 100):
                The number of denoising steps. More denoising steps usually lead to a higher quality image at the
                expense of slower inference.
            guidance_scale (`float`, *optional*, defaults to 4.0):
                Guidance scale as defined in [Classifier-Free Diffusion
                Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
                of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
                `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
                the text `prompt`, usually at the expense of lower image quality.
            num_images_per_prompt (`int`, *optional*, defaults to 1):
                The number of images to generate per prompt.
            generator (`torch.Generator` or `list[torch.Generator]`, *optional*):
                One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
                to make generation deterministic.
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
                tensor will be generated by sampling using the supplied random `generator`.
            output_type (`str`, *optional*, defaults to `"pil"`):
                The output format of the generate image. Choose between: `"pil"` (`PIL.Image.Image`), `"np"`
                (`np.array`) or `"pt"` (`torch.Tensor`).
            return_dict (`bool`, *optional*, defaults to `True`):
                Whether or not to return a [`~pipelines.ImagePipelineOutput`] instead of a plain tuple.
            callback_on_step_end (`Callable`, *optional*):
                A function that calls at the end of each denoising steps during the inference. The function is called
                with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
                callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
                `callback_on_step_end_tensor_inputs`.
            callback_on_step_end_tensor_inputs (`list`, *optional*):
                The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
                will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
                `._callback_tensor_inputs` attribute of your pipeline class.

        Examples:

        Returns:
            [`~pipelines.ImagePipelineOutput`] or `tuple`
        callbackNcallback_stepsz1.0.0zhPassing `callback` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`znPassing `callback_steps` as an input argument to `__call__` is deprecated, consider use `callback_on_step_end`c                 3   s    | ]}| j v V  qd S r3   _callback_tensor_inputs.0kr6   r   r   	<genexpr>   s    

z0KandinskyV22Pipeline.__call__.<locals>.<genexpr>z2`callback_on_step_end_tensor_inputs` has to be in z, but found c                    s   g | ]	}| j vr|qS r   rF   rH   r6   r   r   
<listcomp>   s    z1KandinskyV22Pipeline.__call__.<locals>.<listcomp>r   )dim)r,   r+   )r+   r   r   F)sampletimestepencoder_hidden_statesadded_cond_kwargsrA   r   variance_type)learnedlearned_range)r*   r   r   order)ptnpr=   latentzIOnly the output types `pt`, `pil` and `np` are supported not output_type=rX   T)force_not_quantizerN   )rW   r=   g      ?r   r=   )images)2popr   allr.   rG   _execution_devicer5   
isinstancelisttorchcatr-   r9   repeat_interleaver/   r   r,   r   set_timesteps	timestepsr$   in_channelsr   r&   r1   r#   r:   	enumerateprogress_barsplitchunkr7   hasattrrR   steplocalsgetattrXLA_AVAILABLExm	mark_stepr   decodeclampcpupermutefloatnumpynumpy_to_pilmaybe_free_model_hooksr   )$r'   r   r   r   r   r>   r7   r?   r*   r   r@   rA   rB   rC   kwargsrD   rE   r+   
batch_sizerd   num_channels_latentsitlatent_model_inputrQ   
noise_predvariance_prednoise_pred_uncondnoise_pred_text_variance_pred_textcallback_kwargsrJ   callback_outputsstep_idximager   r6   r   __call__   s   F




	






zKandinskyV22Pipeline.__call__)__name__
__module____qualname____doc__model_cpu_offload_seqrG   r   r   r   r!   r1   propertyr7   r9   r;   r`   no_gradr
   EXAMPLE_DOC_STRINGTensorr_   intru   	Generatorstrboolr   r   __classcell__r   r   r(   r   r   H   st    


	
r   )r   )typingr   r`   modelsr   r   
schedulersr   utilsr   r   r	   r
   utils.torch_utilsr   pipeline_utilsr   r   torch_xla.core.xla_modelcore	xla_modelro   rn   
get_loggerr   loggerr   r   r   r   r   r   r   <module>   s   


