o
    Gi=;                     @   s   d dl Z ddlmZ ddlmZ ddlmZmZ ddlm	Z	m
Z
 ddlmZmZ d	d
lmZ eeZG dd deZG dd deZG dd deZG dd deZdS )    N   )FluxPipeline)logging   )ModularPipelineBlocksPipelineState)
InputParamOutputParam) calculate_dimension_from_latentsrepeat_tensor_to_batch_size   )FluxModularPipelinec                   @   st   e Zd ZdZedefddZedee fddZ	edee fddZ
d	d
 Ze dededefddZdS )FluxTextInputStepfluxreturnc                 C      	 dS )NzText input processing step that standardizes text embeddings for the pipeline.
This step:
  1. Determines `batch_size` and `dtype` based on `prompt_embeds`
  2. Ensures all text embeddings have consistent batch sizes (batch_size * num_images_per_prompt) selfr   r   [/home/ubuntu/.local/lib/python3.10/site-packages/diffusers/modular_pipelines/flux/inputs.pydescription"      zFluxTextInputStep.descriptionc                 C   s0   t dddt dddtjddt d	dtjd
dgS )Nnum_images_per_promptr   )defaultprompt_embedsTdenoiser_input_fieldszGPre-generated text embeddings. Can be generated from text_encoder step.)requiredkwargs_type	type_hintr   pooled_prompt_embedszNPre-generated pooled text embeddings. Can be generated from text_encoder step.)r   r   r   )r   torchTensorr   r   r   r   inputs+   s   
zFluxTextInputStep.inputsc              	   C   s>   t dtddt dtjddt dtjddd	t d
tjddd	gS )N
batch_sizezdNumber of prompts, the final batch size of model inputs should be batch_size * num_images_per_prompt)r   r   dtypez@Data type of model tensor inputs (determined by `prompt_embeds`)r   r   z2text embeddings used to guide the image generation)r   r   r   r   z9pooled text embeddings used to guide the image generation)r	   intr    r$   r!   r   r   r   r   intermediate_outputs?   s.   z&FluxTextInputStep.intermediate_outputsc                 C   sV   |j d ur%|jd ur'|j jd |jjd kr)td|j j d|jj dd S d S d S )Nr   zx`prompt_embeds` and `pooled_prompt_embeds` must have the same batch size when passed directly, but got: `prompt_embeds` z != `pooled_prompt_embeds` .)r   r   shape
ValueError)r   
componentsblock_stater   r   r   check_inputs[   s   zFluxTextInputStep.check_inputsr*   statec                 C   s   |  |}| || |jjd |_|jj|_|jj\}}}|jd|jd|_|j|j|j |d|_|j	d|j}||j|j d|_	| 
|| ||fS )Nr   r   )get_block_stater,   r   r(   r#   r$   repeatr   viewr   set_block_state)r   r*   r-   r+   _seq_lenr   r   r   r   __call__d   s   

zFluxTextInputStep.__call__N)__name__
__module____qualname__
model_namepropertystrr   listr   r"   r&   r,   r    no_gradr   r   r5   r   r   r   r   r      s    	r   c                       s   e Zd ZdZdgg fdee dee f fddZedefdd	Zedee	 fd
dZ
edee fddZdededefddZ  ZS )FluxAdditionalInputsStepr   image_latentsimage_latent_inputsadditional_batch_inputsc                    s:   t |ts|g}t |ts|g}|| _|| _t   d S )N)
isinstancer<   _image_latent_inputs_additional_batch_inputssuper__init__)r   r@   rA   	__class__r   r   rF      s   

z!FluxAdditionalInputsStep.__init__r   c                 C   sT   d}d}| j s
| jr"d}| j r|d| j  7 }| jr"|d| j 7 }d}|| | S )NzInput processing step that:
  1. For image latent inputs: Updates height/width if None, patchifies latents, and expands batch size
  2. For additional batch inputs: Expands batch dimensions to match final batch size z

Configured inputs:z
  - Image latent inputs: z
  - Additional batch inputs: zN

This block should be placed after the encoder steps and the text input step.)rC   rD   )r   summary_sectioninputs_infoplacement_sectionr   r   r   r      s   z$FluxAdditionalInputsStep.descriptionc                 C   sd   t dddt dddt ddt d	dg}| jD ]
}|t |d q| jD ]
}|t |d q%|S )
Nr   r   )namer   r#   T)rM   r   heightrM   width)r   rC   appendrD   )r   r"   image_latent_input_name
input_namer   r   r   r"      s   



zFluxAdditionalInputsStep.inputsc                 C      t dtddt dtddgS )Nimage_heightzThe height of the image latentsrM   r   r   image_widthzThe width of the image latentsr	   r%   r   r   r   r   r&         z-FluxAdditionalInputsStep.intermediate_outputsr*   r-   c                 C   s  |  |}| jD ]V}t||}|d u rqt||j\}}|jp ||_|jp&||_t|ds0||_t|ds8||_	|j
dd  \}}	t||j|j
d ||	}t|||j|jd}t||| q| jD ]}
t||
}|d u rnqbt|
||j|jd}t||
| qb| || ||fS NrU   rW   r   r   )rS   input_tensorr   r#   )r/   rC   getattrr
   vae_scale_factorrN   rP   hasattrrU   rW   r(   r   _pack_latentsr#   r   r   setattrrD   r2   r   r*   r-   r+   rR   image_latent_tensorrN   rP   latent_heightlatent_widthrS   r[   r   r   r   r5      sH   






z!FluxAdditionalInputsStep.__call__)r6   r7   r8   r9   r<   r;   rF   r:   r   r   r"   r	   r&   r   r   r5   __classcell__r   r   rG   r   r>   |   s     r>   c                   @   s&   e Zd ZdZdededefddZdS )FluxKontextAdditionalInputsStepflux-kontextr*   r-   r   c                 C   s   |  |}| jD ]J}t||}|d u rqt||j\}}t|ds$||_t|ds,||_|jdd  \}}	t	
||j|jd ||	}t|||j|jd}t||| q| jD ]}
t||
}|d u rbqVt|
||j|jd}t||
| qV| || ||fS rZ   )r/   rC   r\   r
   r]   r^   rU   rW   r(   r   r_   r#   r   r   r`   rD   r2   ra   r   r   r   r5      sD   






z(FluxKontextAdditionalInputsStep.__call__N)r6   r7   r8   r9   r   r   r5   r   r   r   r   rf      s    rf   c                   @   sj   e Zd ZdZedd Zedee fddZedee	 fddZ
ed	d
 ZdededefddZdS )FluxKontextSetResolutionSteprg   c                 C   r   )NzDetermines the height and width to be used during the subsequent computations.
It should always be placed _before_ the latent preparation step.r   r   r   r   r   r   2  r   z(FluxKontextSetResolutionStep.descriptionr   c                 C   s$   t ddt ddt dtddg}|S )NrN   rO   rP   max_areai   )rM   r   r   )r   r%   )r   r"   r   r   r   r"   9  s
   z#FluxKontextSetResolutionStep.inputsc                 C   rT   )NrN   z'The height of the initial noisy latentsrV   rP   z&The width of the initial noisy latentsrX   r   r   r   r   r&   B  rY   z1FluxKontextSetResolutionStep.intermediate_outputsc                 C   sh   | d ur| |d  dkrt d|d  d|  |d ur0||d  dkr2t d|d  d| d S d S )Nr   r   zHeight must be divisible by z but is zWidth must be divisible by )r)   )rN   rP   r]   r   r   r   r,   I  s
   z)FluxKontextSetResolutionStep.check_inputsr*   r-   c                 C   s   |  |}|jp
|j}|jp|j}| |||j ||}}|j}|| }	t||	 d }t||	 d }|jd }
||
 |
 }||
 |
 }||ksN||krZt	
d| d| d ||_||_| || ||fS )Ng      ?r   z6Generation `height` and `width` have been adjusted to z and z to fit the model requirements.)r/   rN   default_heightrP   default_widthr,   r]   ri   roundloggerwarningr2   )r   r*   r-   r+   rN   rP   original_heightoriginal_widthri   aspect_ratiomultiple_ofr   r   r   r5   Q  s(   


z%FluxKontextSetResolutionStep.__call__N)r6   r7   r8   r9   r:   r   r<   r   r"   r	   r&   staticmethodr,   r   r   r5   r   r   r   r   rh   /  s    

rh   )r    	pipelinesr   utilsr   modular_pipeliner   r   modular_pipeline_utilsr   r	   qwenimage.inputsr
   r   r   
get_loggerr6   rm   r   r>   rf   rh   r   r   r   r   <module>   s   
]z9