o
    GÆÏiò  ã                
   @   s<  d dl Z d dlmZmZ d dlZd dlmZmZ ddlm	Z	m
Z
 ddlmZ ddlmZ ddlmZmZ dd	lmZ dd
lmZmZmZmZmZmZmZ ddlmZ ddlmZm Z  eƒ rid dl!m"  m#Z$ dZ%ndZ%e &e'¡Z(dZ)				dde*dB de+ej,B dB de-e* dB de-e. dB fdd„Z/G dd„ deeƒZ0dS )é    N)ÚAnyÚCallable)ÚT5TokenizerÚUMT5EncoderModelé   )ÚMultiPipelineCallbacksÚPipelineCallback)ÚVaeImageProcessor)ÚAuraFlowLoraLoaderMixin)ÚAuraFlowTransformer2DModelÚAutoencoderKL)ÚFlowMatchEulerDiscreteScheduler)ÚUSE_PEFT_BACKENDÚ	deprecateÚis_torch_xla_availableÚloggingÚreplace_example_docstringÚscale_lora_layersÚunscale_lora_layers)Úrandn_tensoré   )ÚDiffusionPipelineÚImagePipelineOutputTFa  
    Examples:
        ```py
        >>> import torch
        >>> from diffusers import AuraFlowPipeline

        >>> pipe = AuraFlowPipeline.from_pretrained("fal/AuraFlow", torch_dtype=torch.float16)
        >>> pipe = pipe.to("cuda")
        >>> prompt = "A cat holding a sign that says hello world"
        >>> image = pipe(prompt).images[0]
        >>> image.save("aura_flow.png")
        ```
Únum_inference_stepsÚdeviceÚ	timestepsÚsigmasc                 K   s  |dur|durt dƒ‚|dur>dtt | j¡j ¡ ƒv }|s(t d| j› dƒ‚| jd||dœ|¤Ž | j}t	|ƒ}||fS |durpdtt | j¡j ¡ ƒv }|sZt d| j› dƒ‚| jd||d	œ|¤Ž | j}t	|ƒ}||fS | j|fd
|i|¤Ž | j}||fS )a  
    Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
    custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.

    Args:
        scheduler (`SchedulerMixin`):
            The scheduler to get timesteps from.
        num_inference_steps (`int`):
            The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
            must be `None`.
        device (`str` or `torch.device`, *optional*):
            The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
        timesteps (`list[int]`, *optional*):
            Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
            `num_inference_steps` and `sigmas` must be `None`.
        sigmas (`list[float]`, *optional*):
            Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
            `num_inference_steps` and `timesteps` must be `None`.

    Returns:
        `tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
        second element is the number of inference steps.
    NzYOnly one of `timesteps` or `sigmas` can be passed. Please choose one to set custom valuesr   zThe current scheduler class zx's `set_timesteps` does not support custom timestep schedules. Please check whether you are using the correct scheduler.)r   r   r   zv's `set_timesteps` does not support custom sigmas schedules. Please check whether you are using the correct scheduler.)r   r   r   © )
Ú
ValueErrorÚsetÚinspectÚ	signatureÚset_timestepsÚ
parametersÚkeysÚ	__class__r   Úlen)Ú	schedulerr   r   r   r   ÚkwargsÚaccepts_timestepsÚaccept_sigmasr   r   úd/home/ubuntu/.local/lib/python3.10/site-packages/diffusers/pipelines/aura_flow/pipeline_aura_flow.pyÚretrieve_timesteps@   s2   ÿóÿþr,   c                .       s(  e Zd ZdZg ZdZddgZdedede	de
d	ef
‡ fd
d„Z					d:dd„Z										d;deee B deee B dededejdB dejdB dejdB dejdB dejdB dededB fdd„Z	d<dd„Zd d!„ Zed"d#„ ƒZed$d%„ ƒZed&d'„ ƒZe ¡ eeƒddd(dd)dd*d*dddddddd+ddddgfdeee B deee B d,ed-ee d.ededB d/edB d0edB d1ej eej  B dB dejdB dejdB dejdB dejdB dejdB ded2edB d3ed4e!ee"f dB d5e#eegdf e$B e%B dB d6ee d7e&e'B f*d8d9„ƒƒZ(‡  Z)S )=ÚAuraFlowPipelinea¾  
    Args:
        tokenizer (`T5TokenizerFast`):
            Tokenizer of class
            [T5Tokenizer](https://huggingface.co/docs/transformers/model_doc/t5#transformers.T5Tokenizer).
        text_encoder ([`T5EncoderModel`]):
            Frozen text-encoder. AuraFlow uses
            [T5](https://huggingface.co/docs/transformers/model_doc/t5#transformers.T5EncoderModel), specifically the
            [EleutherAI/pile-t5-xl](https://huggingface.co/EleutherAI/pile-t5-xl) variant.
        vae ([`AutoencoderKL`]):
            Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
        transformer ([`AuraFlowTransformer2DModel`]):
            Conditional Transformer (MMDiT and DiT) architecture to denoise the encoded image latents.
        scheduler ([`FlowMatchEulerDiscreteScheduler`]):
            A scheduler to be used in combination with `transformer` to denoise the encoded image latents.
    ztext_encoder->transformer->vaeÚlatentsÚprompt_embedsÚ	tokenizerÚtext_encoderÚvaeÚtransformerr'   c                    sX   t ƒ  ¡  | j|||||d t| dd ƒr dt| jjjƒd  nd| _t	| jd| _
d S )N)r0   r1   r2   r3   r'   r2   r   é   é   )Úvae_scale_factor)ÚsuperÚ__init__Úregister_modulesÚgetattrr&   r2   ÚconfigÚblock_out_channelsr6   r	   Úimage_processor)Úselfr0   r1   r2   r3   r'   ©r%   r   r+   r8   ”   s   

ÿ(zAuraFlowPipeline.__init__Nc
           
         sØ  |ˆ j d  dks|ˆ j d  dkr#tdˆ j d › d|› d|› dƒ‚|	d urDt‡ fdd„|	D ƒƒsDtd	ˆ j› d
‡ fdd„|	D ƒ› ƒ‚|d urW|d urWtd|› d|› dƒ‚|d u rc|d u rctdƒ‚|d urzt|tƒszt|tƒsztdt|ƒ› ƒ‚|d ur|d urtd|› d|› dƒ‚|d ur |d ur td|› d|› dƒ‚|d ur¬|d u r¬tdƒ‚|d ur¸|d u r¸tdƒ‚|d uræ|d urè|j|jkrÓtd|j› d|j› dƒ‚|j|jkrêtd|j› d|j› dƒ‚d S d S d S )Nr   r   z-`height` and `width` have to be divisible by z	 but are z and Ú.c                 3   s    | ]}|ˆ j v V  qd S ©N©Ú_callback_tensor_inputs©Ú.0Úk©r>   r   r+   Ú	<genexpr>¶   s   € 

ÿz0AuraFlowPipeline.check_inputs.<locals>.<genexpr>z2`callback_on_step_end_tensor_inputs` has to be in z, but found c                    s   g | ]	}|ˆ j vr|‘qS r   rB   rD   rG   r   r+   Ú
<listcomp>º   s    z1AuraFlowPipeline.check_inputs.<locals>.<listcomp>zCannot forward both `prompt`: z and `prompt_embeds`: z2. Please make sure to only forward one of the two.zeProvide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined.z2`prompt` has to be of type `str` or `list` but is z and `negative_prompt_embeds`: z'Cannot forward both `negative_prompt`: zEMust provide `prompt_attention_mask` when specifying `prompt_embeds`.zWMust provide `negative_prompt_attention_mask` when specifying `negative_prompt_embeds`.zu`prompt_embeds` and `negative_prompt_embeds` must have the same shape when passed directly, but got: `prompt_embeds` z != `negative_prompt_embeds` z`prompt_attention_mask` and `negative_prompt_attention_mask` must have the same shape when passed directly, but got: `prompt_attention_mask` z% != `negative_prompt_attention_mask` )	r6   r   ÚallrC   Ú
isinstanceÚstrÚlistÚtypeÚshape)
r>   ÚpromptÚheightÚwidthÚnegative_promptr/   Únegative_prompt_embedsÚprompt_attention_maskÚnegative_prompt_attention_maskÚ"callback_on_step_end_tensor_inputsr   rG   r+   Úcheck_inputs¥   sn   $ÿÿÿÿÿÿÿÿÿÿþÿÿþÿøzAuraFlowPipeline.check_inputsTr4   é   rP   rS   Údo_classifier_free_guidanceÚnum_images_per_promptr   rT   rU   rV   Úmax_sequence_lengthÚ
lora_scalec                    s  |durt | tƒr|| _| jdurtrt| j|ƒ ˆ du r | j‰ |dur,t |tƒr,d}n|dur:t |tƒr:t	|ƒ}n|j
d }|
}|du r«| j|d|ddd}|d }| j|d	dd
j}|j
d |j
d kr‡t ||¡s‡| j |dd…|d d…f ¡}t d|› d|› ¡ ‡ fdd„| ¡ D ƒ}| jdi |¤Žd }|d  d¡ |j
¡}|| }| jdurµ| jj}n| jdur¿| jj}nd}|j|ˆ d}|j
\}}}| d|d¡}| || |d¡}| |d¡}| |d¡}|r6|du r6|põd}t |tƒr|g| n|}|j
d }| j|d|ddd}‡ fdd„| ¡ D ƒ}| jdi |¤Žd }|d  d¡ |j
¡}	||	 }|rb|j
d }|j|ˆ d}| d|d¡}| || |d¡}|	 |d¡}	|	 |d¡}	nd}d}	| jdur{t | tƒr{tr{t| j|ƒ ||||	fS )a
  
        Encodes the prompt into text encoder hidden states.

        Args:
            prompt (`str` or `list[str]`, *optional*):
                prompt to be encoded
            negative_prompt (`str` or `list[str]`, *optional*):
                The prompt not to guide the image generation. If not defined, one has to pass `negative_prompt_embeds`
                instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is less than `1`).
            do_classifier_free_guidance (`bool`, *optional*, defaults to `True`):
                whether to use classifier free guidance or not
            num_images_per_prompt (`int`, *optional*, defaults to 1):
                number of images that should be generated per prompt
            device: (`torch.device`, *optional*):
                torch device to place the resulting embeddings on
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
            prompt_attention_mask (`torch.Tensor`, *optional*):
                Pre-generated attention mask for text embeddings.
            negative_prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated negative text embeddings.
            negative_prompt_attention_mask (`torch.Tensor`, *optional*):
                Pre-generated attention mask for negative text embeddings.
            max_sequence_length (`int`, defaults to 256): Maximum sequence length to use for the prompt.
            lora_scale (`float`, *optional*):
                A lora scale that will be applied to all LoRA layers of the text encoder if LoRA layers are loaded.
        Nr4   r   TÚ
max_lengthÚpt)Ú
truncationr^   ÚpaddingÚreturn_tensorsÚ	input_idsÚlongest)ra   rb   éÿÿÿÿzZThe following part of your input was truncated because T5 can only handle sequences up to z	 tokens: c                    ó   i | ]
\}}||  ˆ ¡“qS r   ©Úto©rE   rF   Úv©r   r   r+   Ú
<dictcomp>9  ó    z2AuraFlowPipeline.encode_prompt.<locals>.<dictcomp>Úattention_mask)Údtyper   Ú c                    rf   r   rg   ri   rk   r   r+   rl   Z  rm   r   )rK   r
   Ú_lora_scaler1   r   r   Ú_execution_devicerL   rM   r&   rO   r0   rc   ÚtorchÚequalÚbatch_decodeÚloggerÚwarningÚitemsÚ	unsqueezeÚexpandro   r3   rh   ÚrepeatÚviewÚreshaper   )r>   rP   rS   rZ   r[   r   r/   rT   rU   rV   r\   r]   Ú
batch_sizer^   Útext_inputsÚtext_input_idsÚuntruncated_idsÚremoved_textro   Úbs_embedÚseq_lenÚ_Úuncond_tokensÚuncond_inputr   rk   r+   Úencode_promptè   sš   ,

ûÿ ÿÿÿ



ûÿ
zAuraFlowPipeline.encode_promptc	           
      C   sz   |d ur|j ||dS ||t|ƒ| j t|ƒ| j f}	t|tƒr3t|ƒ|kr3tdt|ƒ› d|› dƒ‚t|	|||d}|S )N)r   ro   z/You have passed a list of generators of length z+, but requested an effective batch size of z@. Make sure the batch size matches the length of the generators.)Ú	generatorr   ro   )rh   Úintr6   rK   rM   r&   r   r   )
r>   r~   Únum_channels_latentsrQ   rR   ro   r   r‰   r.   rO   r   r   r+   Úprepare_latentsx  s   üÿÿz AuraFlowPipeline.prepare_latentsc                 C   s    t dddƒ | jjtjd d S )NÚ
upcast_vaez1.0.0z­`upcast_vae` is deprecated. Please use `pipe.vae.to(torch.float32)`. For more details, please refer to: https://github.com/huggingface/diffusers/pull/12619#issue-3606633695.©ro   )r   r2   rh   rs   Úfloat32rG   r   r   r+   r   ˜  s   ýzAuraFlowPipeline.upcast_vaec                 C   ó   | j S rA   )Ú_guidance_scalerG   r   r   r+   Úguidance_scale   ó   zAuraFlowPipeline.guidance_scalec                 C   r   rA   )Ú_attention_kwargsrG   r   r   r+   Úattention_kwargs¤  r“   z!AuraFlowPipeline.attention_kwargsc                 C   r   rA   )Ú_num_timestepsrG   r   r   r+   Únum_timesteps¨  r“   zAuraFlowPipeline.num_timestepsé2   g      @i   Úpilr   r   r’   rQ   rR   r‰   Úoutput_typeÚreturn_dictr•   Úcallback_on_step_endrW   Úreturnc           *      C   s|  |p	| j jj| j }|p| j jj| j }| j|||||||||d	 || _|| _|dur4t|tƒr4d}n|durBt|t	ƒrBt
|ƒ}n|jd }| j}| jdurV| j dd¡nd}|dk}| j|||||||||||d\}}}}|r{tj||gdd}tr€d	}n|}t| j|||d
\}}| j jj}|  || ||||j||	|
¡}
tt
|ƒ|| jj  dƒ}t
|ƒ| _| j|d¬}t|ƒD ]Ÿ\}}|rÌt |
gd ¡n|
} t |d g¡ | jd ¡}!|!j|
j |
jd}!| j | ||!d| jdd }"|r|" !d¡\}#}$|#||$|#   }"| jj"|"||
ddd }
|dur7i }%|D ]
}&t#ƒ |& |%|&< q|| |||%ƒ}'|' $d|
¡}
|' $d|¡}|t
|ƒd ksR|d |krV|d | jj dkrV| %¡  tr]t& '¡  q¾W d  ƒ n	1 siw   Y  |dkrv|
}(n9| j(jtj)ko‚| j(jj*})|)r˜|  +¡  |
 t,t-| j(j. /¡ ƒƒj¡}
| j(j0|
| j(jj1 ddd }(| j2j3|(|d}(|  4¡  |s¹|(fS t5|(dS )a{  
        Function invoked when calling the pipeline for generation.

        Args:
            prompt (`str` or `list[str]`, *optional*):
                The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
                instead.
            negative_prompt (`str` or `list[str]`, *optional*):
                The prompt or prompts not to guide the image generation. If not defined, one has to pass
                `negative_prompt_embeds` instead. Ignored when not using guidance (i.e., ignored if `guidance_scale` is
                less than `1`).
            height (`int`, *optional*, defaults to self.transformer.config.sample_size * self.vae_scale_factor):
                The height in pixels of the generated image. This is set to 1024 by default for best results.
            width (`int`, *optional*, defaults to self.transformer.config.sample_size * self.vae_scale_factor):
                The width in pixels of the generated image. This is set to 1024 by default for best results.
            num_inference_steps (`int`, *optional*, defaults to 50):
                The number of denoising steps. More denoising steps usually lead to a higher quality image at the
                expense of slower inference.
            sigmas (`list[float]`, *optional*):
                Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
                `num_inference_steps` and `timesteps` must be `None`.
            guidance_scale (`float`, *optional*, defaults to 5.0):
                Guidance scale as defined in [Classifier-Free Diffusion
                Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
                of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
                `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
                the text `prompt`, usually at the expense of lower image quality.
            num_images_per_prompt (`int`, *optional*, defaults to 1):
                The number of images to generate per prompt.
            generator (`torch.Generator` or `list[torch.Generator]`, *optional*):
                One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
                to make generation deterministic.
            latents (`torch.FloatTensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
                tensor will be generated by sampling using the supplied random `generator`.
            prompt_embeds (`torch.FloatTensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
            prompt_attention_mask (`torch.Tensor`, *optional*):
                Pre-generated attention mask for text embeddings.
            negative_prompt_embeds (`torch.FloatTensor`, *optional*):
                Pre-generated negative text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt
                weighting. If not provided, negative_prompt_embeds will be generated from `negative_prompt` input
                argument.
            negative_prompt_attention_mask (`torch.Tensor`, *optional*):
                Pre-generated attention mask for negative text embeddings.
            output_type (`str`, *optional*, defaults to `"pil"`):
                The output format of the generate image. Choose between
                [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
            return_dict (`bool`, *optional*, defaults to `True`):
                Whether or not to return a [`~pipelines.stable_diffusion_xl.StableDiffusionXLPipelineOutput`] instead
                of a plain tuple.
            attention_kwargs (`dict`, *optional*):
                A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
                `self.processor` in
                [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
            callback_on_step_end (`Callable`, *optional*):
                A function that calls at the end of each denoising steps during the inference. The function is called
                with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
                callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
                `callback_on_step_end_tensor_inputs`.
            callback_on_step_end_tensor_inputs (`list`, *optional*):
                The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
                will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
                `._callback_tensor_inputs` attribute of your pipeline class.
            max_sequence_length (`int` defaults to 256): Maximum sequence length to use with the `prompt`.

        Examples:

        Returns: [`~pipelines.ImagePipelineOutput`] or `tuple`:
            If `return_dict` is `True`, [`~pipelines.ImagePipelineOutput`] is returned, otherwise a `tuple` is returned
            where the first element is a list with the generated images.
        )rW   Nr4   r   Úscaleg      ð?)rP   rS   rZ   r[   r   r/   rT   rU   rV   r\   r]   )ÚdimÚcpu)r   )Útotalr   iè  rŽ   F)Úencoder_hidden_statesÚtimestepr›   r•   )r›   r.   r/   Úlatent)rš   )Úimages)6r3   r;   Úsample_sizer6   rX   r‘   r”   rK   rL   rM   r&   rO   rr   r•   Úgetrˆ   rs   ÚcatÚXLA_AVAILABLEr,   r'   Úin_channelsrŒ   ro   ÚmaxÚorderr–   Úprogress_barÚ	enumerateÚtensorrz   rh   r   ÚchunkÚstepÚlocalsÚpopÚupdateÚxmÚ	mark_stepr2   Úfloat16Úforce_upcastr   ÚnextÚiterÚpost_quant_convr#   ÚdecodeÚscaling_factorr=   ÚpostprocessÚmaybe_free_model_hooksr   )*r>   rP   rS   r   r   r’   r[   rQ   rR   r‰   r.   r/   rU   rT   rV   r\   rš   r›   r•   rœ   rW   r~   r   r]   rZ   Útimestep_devicer   Úlatent_channelsÚnum_warmup_stepsr­   ÚiÚtÚlatent_model_inputr£   Ú
noise_predÚnoise_pred_uncondÚnoise_pred_textÚcallback_kwargsrF   Úcallback_outputsÚimageÚneeds_upcastingr   r   r+   Ú__call__¬  sÔ   d÷

õû

ÿ
ø
ûú	
6€Øÿ
+
zAuraFlowPipeline.__call__)NNNNN)
NTr4   NNNNNrY   NrA   )*Ú__name__Ú
__module__Ú__qualname__Ú__doc__Ú_optional_componentsÚmodel_cpu_offload_seqrC   r   r   r   r   r   r8   rX   rL   rM   ÚboolrŠ   rs   r   ÚTensorÚfloatrˆ   rŒ   r   Úpropertyr’   r•   r—   Úno_gradr   ÚEXAMPLE_DOC_STRINGÚ	GeneratorÚdictr   r   r   r   r   ÚtuplerÍ   Ú__classcell__r   r   r?   r+   r-   {   s   þþýüûú
öFô
þ
ýüûúùø	÷
öõ
ô 
÷ 


ë
þ
ýüûúùø	÷
öõôóòñðïîíìëêr-   )NNNN)1r    Útypingr   r   rs   Útransformersr   r   Ú	callbacksr   r   r=   r	   Úloadersr
   Úmodelsr   r   Ú
schedulersr   Úutilsr   r   r   r   r   r   r   Úutils.torch_utilsr   Úpipeline_utilsr   r   Útorch_xla.core.xla_modelÚcoreÚ	xla_modelrµ   r©   Ú
get_loggerrÎ   rv   rÙ   rŠ   rL   r   rM   rÖ   r,   r-   r   r   r   r+   Ú<module>   s@   $	
ûþý
ü

û;