o
    Gi3                  
   @   sl  d dl Z d dlmZmZ d dlZd dlZd dlZd dlm	Z	m
Z
 ddlmZ ddlmZmZ ddlmZ ddlmZmZmZ dd	lmZ d
dlmZ ddlmZ ddlmZ e red dlm  m Z! dZ"ndZ"e#e$Z%dZ&de'de'de(fddZ)				d%de'dB de*ej+B dB de,e' dB de,e( dB fddZ-	d&dej.dej/dB d e*fd!d"Z0G d#d$ d$eeZ1dS )'    N)AnyCallable)Qwen2TokenizerFastQwen3ForCausalLM   )Flux2LoraLoaderMixin)AutoencoderKLFlux2Flux2Transformer2DModel)FlowMatchEulerDiscreteScheduler)is_torch_xla_availableloggingreplace_example_docstring)randn_tensor   )DiffusionPipeline   )Flux2ImageProcessor)Flux2PipelineOutputTFa  
    Examples:
        ```py
        >>> import torch
        >>> from diffusers import Flux2KleinPipeline

        >>> pipe = Flux2KleinPipeline.from_pretrained(
        ...     "black-forest-labs/FLUX.2-klein-base-9B", torch_dtype=torch.bfloat16
        ... )
        >>> pipe.to("cuda")
        >>> prompt = "A cat holding a sign that says hello world"
        >>> # Depending on the variant being used, the pipeline call will slightly vary.
        >>> # Refer to the pipeline documentation for more details.
        >>> image = pipe(prompt, num_inference_steps=50, guidance_scale=4.0).images[0]
        >>> image.save("flux2_output.png")
        ```
image_seq_len	num_stepsreturnc                 C   sp   d\}}d\}}| dkr||  | }t |S ||  | }||  | }|| d }	|d|	  }
|	| |
 }t |S )N)gT	?gŒ_?)g w:/&?gDw:?i  g     g@g      i@)float)r   r   a1b1a2b2mum_200m_10ab r!   b/home/ubuntu/.local/lib/python3.10/site-packages/diffusers/pipelines/flux2/pipeline_flux2_klein.pycompute_empirical_mu?   s   r#   num_inference_stepsdevice	timestepssigmasc                 K   s  |dur|durt d|dur>dtt| jj v }|s(t d| j d| jd||d| | j}t	|}||fS |durpdtt| jj v }|sZt d| j d| jd||d	| | j}t	|}||fS | j|fd
|i| | j}||fS )a  
    Calls the scheduler's `set_timesteps` method and retrieves timesteps from the scheduler after the call. Handles
    custom timesteps. Any kwargs will be supplied to `scheduler.set_timesteps`.

    Args:
        scheduler (`SchedulerMixin`):
            The scheduler to get timesteps from.
        num_inference_steps (`int`):
            The number of diffusion steps used when generating samples with a pre-trained model. If used, `timesteps`
            must be `None`.
        device (`str` or `torch.device`, *optional*):
            The device to which the timesteps should be moved to. If `None`, the timesteps are not moved.
        timesteps (`list[int]`, *optional*):
            Custom timesteps used to override the timestep spacing strategy of the scheduler. If `timesteps` is passed,
            `num_inference_steps` and `sigmas` must be `None`.
        sigmas (`list[float]`, *optional*):
            Custom sigmas used to override the timestep spacing strategy of the scheduler. If `sigmas` is passed,
            `num_inference_steps` and `timesteps` must be `None`.

    Returns:
        `tuple[torch.Tensor, int]`: A tuple where the first element is the timestep schedule from the scheduler and the
        second element is the number of inference steps.
    NzYOnly one of `timesteps` or `sigmas` can be passed. Please choose one to set custom valuesr&   zThe current scheduler class zx's `set_timesteps` does not support custom timestep schedules. Please check whether you are using the correct scheduler.)r&   r%   r'   zv's `set_timesteps` does not support custom sigmas schedules. Please check whether you are using the correct scheduler.)r'   r%   r%   r!   )

ValueErrorsetinspect	signatureset_timesteps
parameterskeys	__class__r&   len)	schedulerr$   r%   r&   r'   kwargsaccepts_timestepsaccept_sigmasr!   r!   r"   retrieve_timestepsR   s2   r5   sampleencoder_output	generatorsample_modec                 C   sR   t | dr|dkr| j|S t | dr|dkr| j S t | dr%| jS td)Nlatent_distr6   argmaxlatentsz3Could not access latents of provided encoder_output)hasattrr:   r6   moder<   AttributeError)r7   r8   r9   r!   r!   r"   retrieve_latents   s   

r@   c                *       s*  e Zd ZdZdZddgZ	dZdededed	e	d
e
def fddZe				d[ded	e	deee B dejdB dejdB dedee fddZe	d\dejdejdB fddZedejfddZe	d]deej d efd!d"Zed#d$ Zed%d& Zed'd( Zedejd)ejd*eej fd+d,Z		-			d^deee B dejdB d.edejdB ded/ee fd0d1Zd2ejd3ejfd4d5Z 	d\d3ejdejdB fd6d7Z!d8eej d3ejfd9d:Z"			d_d;d<Z#e$d=d> Z%e$d?d@ Z&e$dAdB Z'e$dCdD Z(e$dEdF Z)e$dGdH Z*e+ e,e-dddddIddJd-dddddKdLdddgddfd2ee.j/j/ e.j/j/B dB deee B dMedB dNedB dOedPee0 dB dQe0d.ed3ejeej B dB dejdB dejdB dReee B dB dSedTedUe1ee2f dB dVe3eee1gdf dB dWee ded/ee f&dXdYZ4  Z5S )`Flux2KleinPipelinea.  
    The Flux2 Klein pipeline for text-to-image generation.

    Reference:
    [https://bfl.ai/blog/flux2-klein-towards-interactive-visual-intelligence](https://bfl.ai/blog/flux2-klein-towards-interactive-visual-intelligence)

    Args:
        transformer ([`Flux2Transformer2DModel`]):
            Conditional Transformer (MMDiT) architecture to denoise the encoded image latents.
        scheduler ([`FlowMatchEulerDiscreteScheduler`]):
            A scheduler to be used in combination with `transformer` to denoise the encoded image latents.
        vae ([`AutoencoderKLFlux2`]):
            Variational Auto-Encoder (VAE) Model to encode and decode images to and from latent representations.
        text_encoder ([`Qwen3ForCausalLM`]):
            [Qwen3ForCausalLM](https://huggingface.co/docs/transformers/en/model_doc/qwen3#transformers.Qwen3ForCausalLM)
        tokenizer (`Qwen2TokenizerFast`):
            Tokenizer of class
            [Qwen2TokenizerFast](https://huggingface.co/docs/transformers/en/model_doc/qwen2#transformers.Qwen2TokenizerFast).
    ztext_encoder->transformer->vaer<   prompt_embedsFr1   vaetext_encoder	tokenizertransformeris_distilledc                    st   t    | j|||||d | j|d t| dd r&dt| jjjd  nd| _	t
| j	d d| _d| _d	| _d S )
N)rC   rD   rE   r1   rF   )rG   rC   r   r      )vae_scale_factor      )super__init__register_modulesregister_to_configgetattrr0   rC   configblock_out_channelsrI   r   image_processortokenizer_max_lengthdefault_sample_size)selfr1   rC   rD   rE   rF   rG   r/   r!   r"   rM      s   
	(
zFlux2KleinPipeline.__init__NrJ   	         promptdtyper%   max_sequence_lengthhidden_states_layersc                    s(  |d u r| j n|}|d u r| jn|}t|tr|gn|}g }g }|D ](}	d|	dg}
|j|
dddd}||ddd|d}||d	  ||d
  q"tj|dd|}tj|dd|}| ||ddd tj	 fdd|D dd}|j||d}|j
\}}}}|dddd|||| }|S )Nuser)rolecontentFT)tokenizeadd_generation_promptenable_thinkingpt
max_length)return_tensorspadding
truncationrg   	input_idsattention_maskr   dim)rk   rl   output_hidden_states	use_cachec                    s   g | ]} j | qS r!   )hidden_states.0koutputr!   r"   
<listcomp>       z?Flux2KleinPipeline._get_qwen3_prompt_embeds.<locals>.<listcomp>r   )r]   r%   r   r   )r]   r%   
isinstancestrapply_chat_templateappendtorchcattostackshapepermutereshape)rD   rE   r\   r]   r%   r^   r_   all_input_idsall_attention_maskssingle_promptmessagestextinputsrk   rl   out
batch_sizenum_channelsseq_len
hidden_dimrB   r!   ru   r"   _get_qwen3_prompt_embeds   sF   
z+Flux2KleinPipeline._get_qwen3_prompt_embedsxt_coordc                 C   sz   | j \}}}g }t|D ]+}|d u rtdn|| }td}td}	t|}
t|||	|
}|| qt|S Nr   )r   ranger}   arangecartesian_prodr|   r   )r   r   BL_out_idsithwlcoordsr!   r!   r"   _prepare_text_ids  s   



z$Flux2KleinPipeline._prepare_text_idsc           
      C   s^   | j \}}}}td}t|}t|}td}t||||}	|	d|dd}	|	S )a  
        Generates 4D position coordinates (T, H, W, L) for latent tensors.

        Args:
            latents (torch.Tensor):
                Latent tensor of shape (B, C, H, W)

        Returns:
            torch.Tensor:
                Position IDs tensor of shape (B, H*W, 4) All batches share the same coordinate structure: T=0,
                H=[0..H-1], W=[0..W-1], L=0
        r   r   )r   r}   r   r   	unsqueezeexpand)
r<   r   r   heightwidthr   r   r   r   
latent_idsr!   r!   r"   _prepare_latent_ids  s   



z&Flux2KleinPipeline._prepare_latent_ids
   image_latentsscalec           
   	      s   t | tstdt|  d fddtdt| D }dd |D }g }t| |D ]%\}}|d}|j	\}}}t
|t|t|td}	||	 q,tj|dd}|d}|S )	a  
        Generates 4D time-space coordinates (T, H, W, L) for a sequence of image latents.

        This function creates a unique coordinate for every pixel/patch across all input latent with different
        dimensions.

        Args:
            image_latents (list[torch.Tensor]):
                A list of image latent feature tensors, typically of shape (C, H, W).
            scale (int, optional):
                A factor used to define the time separation (T-coordinate) between latents. T-coordinate for the i-th
                latent is: 'scale + scale * i'. Defaults to 10.

        Returns:
            torch.Tensor:
                The combined coordinate tensor. Shape: (1, N_total, 4) Where N_total is the sum of (H * W) for all
                input latents.

        Coordinate Components (Dimension 4):
            - T (Time): The unique index indicating which latent image the coordinate belongs to.
            - H (Height): The row index within that latent image.
            - W (Width): The column index within that latent image.
            - L (Seq. Length): A sequence length dimension, which is always fixed at 0 (size 1)
        z+Expected `image_latents` to be a list, got .c                    s   g | ]}  |  qS r!   r!   rs   r   r   r!   r"   rw   `  s    z9Flux2KleinPipeline._prepare_image_ids.<locals>.<listcomp>r   c                 S   s   g | ]}| d qS )r   )viewr   r!   r!   r"   rw   a  rx   r   rm   )ry   listr(   typer}   r   r0   zipsqueezer   r   r|   r~   r   )
r   r   t_coordsimage_latent_idsr   r   r   r   r   x_idsr!   r   r"   _prepare_image_ids=  s   

"
z%Flux2KleinPipeline._prepare_image_idsc                 C   s^   | j \}}}}| |||d d|d d} | dddddd} | ||d |d |d } | S )Nr   r   r   r         )r   r   r   r   r<   r   num_channels_latentsr   r   r!   r!   r"   _patchify_latentsp  s
   z$Flux2KleinPipeline._patchify_latentsc                 C   sZ   | j \}}}}| ||d dd||} | dddddd} | ||d |d |d } | S )Nr   r   r   r   r   r   r   r   r   r   r!   r!   r"   _unpatchify_latentsy  s
   z&Flux2KleinPipeline._unpatchify_latentsc                 C   s.   | j \}}}}| |||| ddd} | S )zw
        pack latents: (batch_size, num_channels, height, width) -> (batch_size, height * width, num_channels)
        r   r   r   r   )r<   r   r   r   r   r!   r!   r"   _pack_latents  s   z Flux2KleinPipeline._pack_latentsr   r   c                 C   s   g }t | |D ]b\}}|j\}}|dddf tj}|dddf tj}t|d }	t|d }
||
 | }tj|	|
 |f|j|jd}|	d|
dd|| ||	|
|ddd}|| qtj|ddS )zA
        using position ids to scatter tokens into place
        Nr   r   r%   r]   r   r   rm   )r   r   r   r}   int64maxzerosr%   r]   scatter_r   r   r   r   r|   r   )r   r   x_listdataposr   chh_idsw_idsr   r   flat_idsr   r!   r!   r"   _unpack_latents_with_ids  s   
z+Flux2KleinPipeline._unpack_latents_with_idsr   num_images_per_prompttext_encoder_out_layersc                 C   s   |p| j }|d u rd}t|tr|gn|}|d u r&| j| j| j||||d}|j\}}}	|d|d}||| |d}| 	|}
|

|}
||
fS )N )rD   rE   r\   r%   r^   r_   r   r   )_execution_devicery   rz   r   rD   rE   r   repeatr   r   r   )rV   r\   r%   r   rB   r^   r   r   r   r   text_idsr!   r!   r"   encode_prompt  s&   
		

z Flux2KleinPipeline.encode_promptimager8   c                 C   s   |j dkrtd|j  dt| j||dd}| |}| jjjdddd	|j
|j}t| jjjdddd| jjj }|| | }|S )Nr   zExpected image dims 4, got r   r;   )r8   r9   r   r   )ndimr(   r@   rC   encoder   bnrunning_meanr   r   r%   r]   r}   sqrtrunning_varrQ   batch_norm_eps)rV   r   r8   r   latents_bn_meanlatents_bn_stdr!   r!   r"   _encode_vae_image  s   

"&z$Flux2KleinPipeline._encode_vae_imagec	                 C   s   dt || jd   }dt || jd   }||d |d |d f}	t|tr:t||kr:tdt| d| d|d u rGt|	|||d}n|j||d}| |}
|
|}
| 	|}||
fS )Nr   r   z/You have passed a list of generators of length z+, but requested an effective batch size of z@. Make sure the batch size matches the length of the generators.)r8   r%   r]   r   )
intrI   ry   r   r0   r(   r   r   r   r   )rV   r   num_latents_channelsr   r   r]   r%   r8   r<   r   r   r!   r!   r"   prepare_latents  s    


z"Flux2KleinPipeline.prepare_latentsimagesc                 C   s   g }|D ]}|j ||d}| j||d}|| q| |}	g }
|D ]}| |}|d}|
| q#tj|
dd}|d}|	|dd}|		|dd}	|	 |}	||	fS )Nr   )r   r8   r   rm   r   )
r   r   r|   r   r   r   r}   r~   r   r   )rV   r   r   r8   r%   r]   r   r   imagge_latentr   packed_latentslatentpackedr!   r!   r"   prepare_image_latents  s"   




z(Flux2KleinPipeline.prepare_image_latentsc              	      s4  |d ur| j d  dks|d ur,| j d  dkr,td j d  d| d| d |d urMt fdd|D sMtd	 j d
 fdd|D  |d ur`|d ur`td| d| d|d u rl|d u rltd|d urt|tst|tstdt	| |dkr j
jrtd| d d S d S d S )Nr   r   z-`height` and `width` have to be divisible by z	 but are z and z(. Dimensions will be resized accordinglyc                 3   s    | ]}| j v V  qd S N_callback_tensor_inputsrr   rV   r!   r"   	<genexpr>1  s    

z2Flux2KleinPipeline.check_inputs.<locals>.<genexpr>z2`callback_on_step_end_tensor_inputs` has to be in z, but found c                    s   g | ]	}| j vr|qS r!   r   rr   r   r!   r"   rw   5  s    z3Flux2KleinPipeline.check_inputs.<locals>.<listcomp>zCannot forward both `prompt`: z and `prompt_embeds`: z2. Please make sure to only forward one of the two.zeProvide either `prompt` or `prompt_embeds`. Cannot leave both `prompt` and `prompt_embeds` undefined.z2`prompt` has to be of type `str` or `list` but is       ?zGuidance scale z+ is ignored for step-wise distilled models.)rI   loggerwarningallr(   r   ry   rz   r   r   rQ   rG   )rV   r\   r   r   rB   "callback_on_step_end_tensor_inputsguidance_scaler!   r   r"   check_inputs  s4   
zFlux2KleinPipeline.check_inputsc                 C      | j S r   )_guidance_scaler   r!   r!   r"   r   G     z!Flux2KleinPipeline.guidance_scalec                 C   s   | j dko	| jj S r   )r   rQ   rG   r   r!   r!   r"   do_classifier_free_guidanceK  s   z.Flux2KleinPipeline.do_classifier_free_guidancec                 C   r   r   )_attention_kwargsr   r!   r!   r"   attention_kwargsO  r   z#Flux2KleinPipeline.attention_kwargsc                 C   r   r   )_num_timestepsr   r!   r!   r"   num_timestepsS  r   z Flux2KleinPipeline.num_timestepsc                 C   r   r   )_current_timestepr   r!   r!   r"   current_timestepW  r   z#Flux2KleinPipeline.current_timestepc                 C   r   r   )
_interruptr   r!   r!   r"   	interrupt[  r   zFlux2KleinPipeline.interrupt2   g      @pilTr   r   r$   r'   r   negative_prompt_embedsoutput_typereturn_dictr   callback_on_step_endr   c           4      C   s  | j ||||||d || _|| _d| _d| _|dur#t|tr#d}n|dur1t|tr1t|}n|j	d }| j
}| j||||||d\}}| jrhd}|dur[t|tr[|gt| }| j||||||d\}}|durtt|tst|g}d}|dur|D ]}| j| q|g }|D ]A}|j\}}|| dkr| j|d}|j\}}| jd	 }|| | }|| | }| jj|||d
d}|| |p|}|p|}q|p| j| j }|p| j| j }| jjjd }| j|| ||||j||	|
d\}
}d} d}!|dur| j||| |	|| jjd\} }!|du rtdd| |n|}t| jjdr-| jjj r-d}|
j	d }"t!|"|d}#t"| j||||#d\}$}t#t|$|| jj$  d}%t|$| _%| j&d | j'|d3}&t(|$D ]%\}'}(| j)rsqh|(| _|(*|
j	d +|
j})|
+| jj}*|}+| durt,j-|
| gdd+| jj}*t,j-||!gdd}+| j.d | j|*|)d d|||+| j/ddd },W d   n	1 sw   Y  |,ddd|
df },| jr| j.d | j|*|)d d|||+| jddd }-W d   n	1 sw   Y  |-ddd|
df }-|-||,|-   },|
j}.| jj0|,|(|
ddd }
|
j|.kr@t,j1j23 r@|
+|.}
|durgi }/|D ]
}0t4 |0 |/|0< qI|| |'|(|/}1|15d|
}
|15d|}|'t|$d ks|'d |%kr|'d | jj$ dkr|&6  t7rt89  qhW d   n	1 sw   Y  d| _| :|
|}
| jj;j<=dddd+|
j>|
j}2t,?| jj;j@=dddd| jjjA +|
j>|
j}3|
|3 |2 }
| B|
}
|dkr|
}n| jjC|
ddd }| jjD||d}| E  |s|fS tF|dS ) a  
        Function invoked when calling the pipeline for generation.

        Args:
            image (`torch.Tensor`, `PIL.Image.Image`, `np.ndarray`, `List[torch.Tensor]`, `List[PIL.Image.Image]`, or `List[np.ndarray]`):
                `Image`, numpy array or tensor representing an image batch to be used as the starting point. For both
                numpy array and pytorch tensor, the expected value range is between `[0, 1]` If it's a tensor or a list
                or tensors, the expected shape should be `(B, C, H, W)` or `(C, H, W)`. If it is a numpy array or a
                list of arrays, the expected shape should be `(B, H, W, C)` or `(H, W, C)` It can also accept image
                latents as `image`, but if passing latents directly it is not encoded again.
            prompt (`str` or `List[str]`, *optional*):
                The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`.
                instead.
            guidance_scale (`float`, *optional*, defaults to 4.0):
                Guidance scale as defined in [Classifier-Free Diffusion
                Guidance](https://huggingface.co/papers/2207.12598). `guidance_scale` is defined as `w` of equation 2.
                of [Imagen Paper](https://huggingface.co/papers/2205.11487). Guidance scale is enabled by setting
                `guidance_scale > 1`. Higher guidance scale encourages to generate images that are closely linked to
                the text `prompt`, usually at the expense of lower image quality. For step-wise distilled models,
                `guidance_scale` is ignored.
            height (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
                The height in pixels of the generated image. This is set to 1024 by default for the best results.
            width (`int`, *optional*, defaults to self.unet.config.sample_size * self.vae_scale_factor):
                The width in pixels of the generated image. This is set to 1024 by default for the best results.
            num_inference_steps (`int`, *optional*, defaults to 50):
                The number of denoising steps. More denoising steps usually lead to a higher quality image at the
                expense of slower inference.
            sigmas (`List[float]`, *optional*):
                Custom sigmas to use for the denoising process with schedulers which support a `sigmas` argument in
                their `set_timesteps` method. If not defined, the default behavior when `num_inference_steps` is passed
                will be used.
            num_images_per_prompt (`int`, *optional*, defaults to 1):
                The number of images to generate per prompt.
            generator (`torch.Generator` or `List[torch.Generator]`, *optional*):
                One or a list of [torch generator(s)](https://pytorch.org/docs/stable/generated/torch.Generator.html)
                to make generation deterministic.
            latents (`torch.Tensor`, *optional*):
                Pre-generated noisy latents, sampled from a Gaussian distribution, to be used as inputs for image
                generation. Can be used to tweak the same generation with different prompts. If not provided, a latents
                tensor will be generated by sampling using the supplied random `generator`.
            prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated text embeddings. Can be used to easily tweak text inputs, *e.g.* prompt weighting. If not
                provided, text embeddings will be generated from `prompt` input argument.
            negative_prompt_embeds (`torch.Tensor`, *optional*):
                Pre-generated negative text embeddings. Note that "" is used as the negative prompt in this pipeline.
                If not provided, will be generated from "".
            output_type (`str`, *optional*, defaults to `"pil"`):
                The output format of the generate image. Choose between
                [PIL](https://pillow.readthedocs.io/en/stable/): `PIL.Image.Image` or `np.array`.
            return_dict (`bool`, *optional*, defaults to `True`):
                Whether or not to return a [`~pipelines.qwenimage.QwenImagePipelineOutput`] instead of a plain tuple.
            attention_kwargs (`dict`, *optional*):
                A kwargs dictionary that if specified is passed along to the `AttentionProcessor` as defined under
                `self.processor` in
                [diffusers.models.attention_processor](https://github.com/huggingface/diffusers/blob/main/src/diffusers/models/attention_processor.py).
            callback_on_step_end (`Callable`, *optional*):
                A function that calls at the end of each denoising steps during the inference. The function is called
                with the following arguments: `callback_on_step_end(self: DiffusionPipeline, step: int, timestep: int,
                callback_kwargs: Dict)`. `callback_kwargs` will include a list of all tensors as specified by
                `callback_on_step_end_tensor_inputs`.
            callback_on_step_end_tensor_inputs (`List`, *optional*):
                The list of tensor inputs for the `callback_on_step_end` function. The tensors specified in the list
                will be passed as `callback_kwargs` argument. You will only be able to include variables listed in the
                `._callback_tensor_inputs` attribute of your pipeline class.
            max_sequence_length (`int` defaults to 512): Maximum sequence length to use with the `prompt`.
            text_encoder_out_layers (`tuple[int]`):
                Layer indices to use in the `text_encoder` to derive the final prompt embeddings.

        Examples:

        Returns:
            [`~pipelines.flux2.Flux2PipelineOutput`] or `tuple`: [`~pipelines.flux2.Flux2PipelineOutput`] if
            `return_dict` is True, otherwise a `tuple`. When returning a tuple, the first element is a list with the
            generated images.
        )r\   r   r   rB   r   r   NFr   r   )r\   rB   r%   r   r^   r   r   i   r   crop)r   r   resize_moder   )r   r   r   r   r]   r%   r8   r<   )r   r   r8   r%   r]   r   use_flow_sigmas)r   r   )r'   r   )totalrm   condi  )rq   timestepguidanceencoder_hidden_statestxt_idsimg_idsjoint_attention_kwargsr  uncond)r  r<   rB   r   r   )r  )r   )Gr   r   r   r   r   ry   rz   r   r0   r   r   r   r   rS   check_image_inputsize_resize_to_target_arearI   
preprocessr|   rU   rF   rQ   in_channelsr   r]   r   rC   nplinspacer=   r1   r  r#   r5   r   orderr   set_begin_indexprogress_bar	enumerater   r   r   r}   r~   cache_contextr   stepbackendsmpsis_availablelocalspopupdateXLA_AVAILABLExm	mark_stepr   r   r   r   r%   r   r   r   r   decodepostprocessmaybe_free_model_hooksr   )4rV   r   r\   r   r   r$   r'   r   r   r8   r<   rB   r  r  r  r   r  r   r^   r   r   r%   r   negative_promptnegative_text_idscondition_imagesimgimage_widthimage_heightmultiple_ofr   r   r   r   r   r   r&   num_warmup_stepsr  r   r   r
  latent_model_inputlatent_image_ids
noise_predneg_noise_predlatents_dtypecallback_kwargsrt   callback_outputsr   r   r!   r!   r"   __call___  sZ  e	


	









 	



		

6F"&


zFlux2KleinPipeline.__call__)F)NNrJ   rX   r   )r   )Nr   NrJ   rX   )NNN)6__name__
__module____qualname____doc__model_cpu_offload_seqr   r
   r   r   r   r	   boolrM   staticmethodrz   r   r}   r]   r%   r   r   Tensorr   r   r   r   r   r   r   tupler   	Generatorr   r   r   r   propertyr   r   r   r   r   r   no_gradr   EXAMPLE_DOC_STRINGPILImager   dictr   r   r9  __classcell__r!   r!   rW   r"   rA      sT   	
81



"

#	
"
'
)







	
rA   )NNNN)Nr6   )2r*   typingr   r   numpyr  rG  r}   transformersr   r   loadersr   modelsr   r	   
schedulersr
   utilsr   r   r   utils.torch_utilsr   pipeline_utilsr   rS   r   pipeline_outputr   torch_xla.core.xla_modelcore	xla_modelr%  r$  
get_loggerr:  r   rF  r   r   r#   rz   r%   r   r5   rA  rC  r@   rA   r!   r!   r!   r"   <module>   sV   



=
