o
    Gi0>                     @   s   d dl mZmZ d dlZd dlmZmZ ddlmZ ddl	m
Z
mZ ddlmZ ddlmZmZ d	d
lmZmZmZ e rId dlm  mZ dZndZdZG dd deeZdS )    )AnyCallableN)CLIPTextModelWithProjectionCLIPTokenizer   )VaeImageProcessor)UVit2DModelVQModel)AmusedScheduler)is_torch_xla_availablereplace_example_docstring   )DeprecatedPipelineMixinDiffusionPipelineImagePipelineOutputTFa{  
    Examples:
        ```py
        >>> import torch
        >>> from diffusers import AmusedPipeline

        >>> pipe = AmusedPipeline.from_pretrained("amused/amused-512", variant="fp16", torch_dtype=torch.float16)
        >>> pipe = pipe.to("cuda")

        >>> prompt = "a photo of an astronaut riding a horse on mars"
        >>> image = pipe(prompt).images[0]
        ```
c                .       s  e Zd ZU dZeed< eed< eed< eed< e	ed< e
ed< dZdededede	de
f
 fd	d
Ze ee																					d*dee eB dB dedB dedB dededeee B dB dedB dejdB dejdB dejdB dejdB dejdB d ejdB d!ed"eeeejgdf dB d#ed$eeef dB d%ed&eeef d'eeeef B ee B f(d(d)Z  ZS )+AmusedPipelinez0.33.1image_processorvqvae	tokenizertext_encodertransformer	schedulerz text_encoder->transformer->vqvaec                    sZ   t    | j|||||d t| dd r dt| jjjd  nd| _t	| jdd| _
d S )N)r   r   r   r   r   r   r         F)vae_scale_factordo_normalize)super__init__register_modulesgetattrlenr   configblock_out_channelsr   r   r   )selfr   r   r   r   r   	__class__ ^/home/ubuntu/.local/lib/python3.10/site-packages/diffusers/pipelines/amused/pipeline_amused.pyr   =   s   
$zAmusedPipeline.__init__N         $@r   pilT   r   r   r   r   promptheightwidthnum_inference_stepsguidance_scalenegative_promptnum_images_per_prompt	generatorlatentsprompt_embedsencoder_hidden_statesnegative_prompt_embedsnegative_encoder_hidden_statesreturn_dictcallbackcallback_stepscross_attention_kwargs"micro_conditioning_aesthetic_scoremicro_conditioning_crop_coordtemperaturec           &   	   C   s  |
dur|du s|
du r|durt d|dur|du s$|du r(|dur(t d|du r0|
du s8|dur<|
dur<t dt|trD|g}|durMt|}n|
jd }|| }|du rb| jjj| j }|du rn| jjj| j }|
du r| j	|ddd| j	j
d	j| j}| j|ddd
}|j}
|jd }|
|d}
||dd}|dkr|du r|du rdgt| }t|tr|g}| j	|ddd| j	j
d	j| j}| j|ddd
}|j}|jd }||d}||dd}t||
g}
t||g}tj|||d |d |g| j|jd}|d}||dkrd| n|d}||| j || j f}|	du r<tj|| jjjtj| jd}	| j||| j t| jj|| jj  }| j|d}t | jjD ]\}}|dkrpt!|	gd }n|	}| j|||
||d} |dkr| "d\}!}"|!||"|!   } | jj#| ||	|dj$}	|t| jjd ks|d |kr|d | jj dkr|%  |dur|| dkr|t&| jdd }#||#||	 t'rt()  q^W d   n	1 sw   Y  |dkr|	}$n?| j*jtj+ko| j*jj,}%|%r| j*-  | j*j.|	d||| j || j | j*jj/fdj01dd}$| j23|$|}$|%r7| j*4  | 5  |sA|$fS t6|$S )ay  
        The call function to the pipeline for generation.

        Args:
            prompt (`str` or `list[str]`, *optional*):
                The prompt or prompts to guide image generation. If not defined, you need to pass `prompt_embeds`.
            height (`int`, *optional*, defaults to `self.transformer.config.sample_size * self.vae_scale_factor`):
                The height in pixels of the generated image.
            width (`int`, *optional*, defaults to `self.unet.config.sample_size * self.vae_scale_factor`):
                The width in pixels of the generated image.
            num_inference_steps (`int`, *optional*, defaults to 16):
                The number of denoising steps. More denoising steps usually lead to a higher quality image at the
                expense of slower inference.
            guidance_scale (`float`, *optional*, defaults to 10.0):
                A higher guidance scale value encourages the model to generate images closely linked to the text
                `prompt` at the expense of lower image quality. Guidance scale is enabled when `guidance_scale > 1`.
            negative_prompt (`str` or `list[str]`, *optional*):
                The prompt or prompts to guide what to not include in image generation. If not defined, you need to
                pass `negative_prompt_embeds` instead. Ignored when not using guidance (`guidance_scale < 1`).
            num_images_per_prompt (`int`, *optional*, defaults to 1):
                The number of images to generate per prompt.
            generator (`torch.Generator`, *optional*):
                A [`torch.Generator`](https://pytorch.org/docs/stable/generated/torch.Generator.html) to make
                generation deterministic.
            latents (`torch.IntTensor`, *optional*):
                Pre-generated tokens representing latent vectors in `self.vqvae`, to be used as inputs for image
                generation. If not provided, the starting latents will be completely masked.
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs (prompt weighting). If not
                provided, text embeddings are generated from the `prompt` input argument. A single vector from the
                pooled and projected final hidden states.
            encoder_hidden_states (`torch.Tensor`, *optional*):
                Pre-generated penultimate hidden states from the text encoder providing additional text conditioning.
            negative_prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated negative text embeddings. Can be used to easily tweak text inputs (prompt weighting). If
                not provided, `negative_prompt_embeds` are generated from the `negative_prompt` input argument.
            negative_encoder_hidden_states (`torch.Tensor`, *optional*):
                Analogous to `encoder_hidden_states` for the positive prompt.
            output_type (`str`, *optional*, defaults to `"pil"`):
                The output format of the generated image. Choose between `PIL.Image` or `np.array`.
            return_dict (`bool`, *optional*, defaults to `True`):
                Whether or not to return a [`~pipelines.stable_diffusion.StableDiffusionPipelineOutput`] instead of a
                plain tuple.
            callback (`Callable`, *optional*):
                A function that calls every `callback_steps` steps during inference. The function is called with the
                following arguments: `callback(step: int, timestep: int, latents: torch.Tensor)`.
            callback_steps (`int`, *optional*, defaults to 1):
                The frequency at which the `callback` function is called. If not specified, the callback is called at
                every step.
            cross_attention_kwargs (`dict`, *optional*):
                A kwargs dictionary that if specified is passed along to the [`AttentionProcessor`] as defined in
                [`self.processor`](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
            micro_conditioning_aesthetic_score (`int`, *optional*, defaults to 6):
                The targeted aesthetic score according to the laion aesthetic classifier. See
                https://laion.ai/blog/laion-aesthetics/ and the micro-conditioning section of
                https://huggingface.co/papers/2307.01952.
            micro_conditioning_crop_coord (`tuple[int]`, *optional*, defaults to (0, 0)):
                The targeted height, width crop coordinates. See the micro-conditioning section of
                https://huggingface.co/papers/2307.01952.
            temperature (`int | tuple[int, int, list[int]]`, *optional*, defaults to (2, 0)):
                Configures the temperature scheduler on `self.scheduler` see `AmusedScheduler#set_timesteps`.

        Examples:

        Returns:
            [`~pipelines.pipeline_utils.ImagePipelineOutput`] or `tuple`:
                If `return_dict` is `True`, [`~pipelines.pipeline_utils.ImagePipelineOutput`] is returned, otherwise a
                `tuple` is returned where the first element is a list with the generated images.
        NzGpass either both `prompt_embeds` and `encoder_hidden_states` or neitherzXpass either both `negatve_prompt_embeds` and `negative_encoder_hidden_states` or neitherz,pass only one of `prompt` or `prompt_embeds`r   pt
max_lengthT)return_tensorspadding
truncationrC   )r;   output_hidden_statesr   g      ? )devicedtyper   )rK   rJ   )total)micro_condspooled_text_embr8   r>   )model_outputtimestepsampler5   orderlatent)force_not_quantizeshape)7
ValueError
isinstancestrr    rV   r   r!   sample_sizer   r   model_max_length	input_idsto_execution_devicer   text_embedshidden_statesrepeattorchconcattensorrK   	unsqueezeexpandfullr   mask_token_idlongset_timesteps	timestepsrS   progress_bar	enumeratecatchunkstepprev_sampleupdater   XLA_AVAILABLExm	mark_stepr   float16force_upcastfloatdecodelatent_channelsrR   clipr   postprocesshalfmaybe_free_model_hooksr   )&r#   r.   r/   r0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   output_typer;   r<   r=   r>   r?   r@   rA   
batch_sizer\   outputsrN   rV   num_warmup_stepsrl   irQ   model_inputrP   uncond_logitscond_logitsstep_idxoutputneeds_upcastingr&   r&   r'   __call__S   s
  _ 











$
%

	

zAmusedPipeline.__call__)NNNr(   r)   Nr   NNNNNNr*   TNr   Nr+   r,   r-   )__name__
__module____qualname___last_supported_versionr   __annotations__r	   r   r   r   r
   model_cpu_offload_seqr   rb   no_gradr   EXAMPLE_DOC_STRINGlistrY   intrx   	Generator	IntTensorTensorboolr   dictr   tupler   __classcell__r&   r&   r$   r'   r   2   s   
 	

r   )typingr   r   rb   transformersr   r   r   r   modelsr   r	   
schedulersr
   utilsr   r   pipeline_utilsr   r   r   torch_xla.core.xla_modelcore	xla_modelrt   rs   r   r   r&   r&   r&   r'   <module>   s   