o
    ۷iZ1                     @   s   d dl mZ d dlZddlmZ ddlmZ ddlmZ ddl	m
Z
 ddlmZ d	d
lmZmZmZmZ d	dlmZmZ ddlmZ eeZG dd deZG dd deZG dd deZG dd deZG dd deZdS )    )AnyN   )
FrozenDict)ClassifierFreeGuidance)ZImageTransformer2DModel)FlowMatchEulerDiscreteScheduler)logging   )
BlockStateLoopSequentialPipelineBlocksModularPipelineBlocksPipelineState)ComponentSpec
InputParam   )ZImageModularPipelinec                	   @   s\   e Zd ZdZedefddZedee fddZ	e
 deded	ed
e
jfddZdS )ZImageLoopBeforeDenoiserz-imagereturnc                 C      	 dS )Nzstep within the denoising loop that prepares the latent input for the denoiser. This block should be used to compose the `sub_blocks` attribute of a `LoopSequentialPipelineBlocks` object (e.g. `ZImageDenoiseLoopWrapper`) selfr   r   a/home/ubuntu/vllm_env/lib/python3.10/site-packages/diffusers/modular_pipelines/z_image/denoise.pydescription(      z$ZImageLoopBeforeDenoiser.descriptionc                 C   s$   t ddtjddt ddtjddgS )NlatentsTz^The initial latents to use for the denoising process. Can be generated in prepare_latent step.required	type_hintr   dtypez>The dtype of the model inputs. Can be generated in input step.)r   torchTensorr    r   r   r   r   inputs0   s   zZImageLoopBeforeDenoiser.inputs
componentsblock_stateitc                 C   sX   |j d|j}t|jdd|_||jd |j}d| d }||_	||fS )Nr	   r   dimi  )
r   	unsqueezetor    listunbindlatent_model_inputexpandshapetimestep)r   r$   r%   r&   r'   r   r1   r   r   r   __call__A   s   z!ZImageLoopBeforeDenoiser.__call__N)__name__
__module____qualname__
model_namepropertystrr   r,   r   r#   r!   no_gradr   r
   intr"   r2   r   r   r   r   r   %   s    "r   c                       s   e Zd ZdZddifdeeef f fddZede	e
 fdd	Zedefd
dZede	eeef  fddZe dedededejdef
ddZ  ZS )ZImageLoopDenoiserr   	cap_featsprompt_embedsnegative_prompt_embedsguider_input_fieldsc                    s0   t |tstdt| || _t   dS )av  Initialize a denoiser block that calls the denoiser model. This block is used in Z-Image.

        Args:
            guider_input_fields: A dictionary that maps each argument expected by the denoiser model
                (for example, "encoder_hidden_states") to data stored on 'block_state'. The value can be either:

                - A tuple of strings. For instance, {"encoder_hidden_states": ("prompt_embeds",
                  "negative_prompt_embeds")} tells the guider to read `block_state.prompt_embeds` and
                  `block_state.negative_prompt_embeds` and pass them as the conditional and unconditional batches of
                  'encoder_hidden_states'.
                - A string. For example, {"encoder_hidden_image": "image_embeds"} makes the guider forward
                  `block_state.image_embeds` for both conditional and unconditional batches.
        z0guider_input_fields must be a dictionary but is N)
isinstancedict
ValueErrortype_guider_input_fieldssuper__init__)r   r@   	__class__r   r   rG   Q   s   
zZImageLoopDenoiser.__init__r   c                 C   s$   t dttdddddt dtgS )Nguiderg      @F)guidance_scaleenabledfrom_config)configdefault_creation_methodtransformer)r   r   r   r   r   r   r   r   expected_componentsg   s   z&ZImageLoopDenoiser.expected_componentsc                 C   r   )NzStep within the denoising loop that denoise the latents with guidance. This block should be used to compose the `sub_blocks` attribute of a `LoopSequentialPipelineBlocks` object (e.g. `ZImageDenoiseLoopWrapper`)r   r   r   r   r   r   s   r   zZImageLoopDenoiser.descriptionc                 C   s   t ddtddt dddg}g }g }| j D ]}t|tr-||d  ||d	  q|| q|D ]}|t |dd
 q5|D ]
}|t |d qC|S )Nnum_inference_stepsTgThe number of inference steps to use for the denoising process. Can be generated in set_timesteps step.r   denoiser_input_fieldsz_The conditional model inputs for the denoiser: e.g. prompt_embeds, negative_prompt_embeds, etc.)kwargs_typer   r   r   )namer   )rV   )r   r:   rE   valuesrA   tupleappend)r   r#   guider_input_namesuncond_guider_input_namesvaluerV   r   r   r   r#   {   s.   
zZImageLoopDenoiser.inputsr$   r%   r&   r'   c           
         s   |j j|j|d |j j}|D ]E}|j |j | } fdd  fdd| D }|jdj	j
dd|d }tj|dd	d
}	|	 |_|j |j q| |d _|fS )N)steprR   r1   c                    s8   t | tjr|  S t | tr fdd| D S | S )Nc                    s   g | ]} |qS r   r   ).0r'   )_convert_dtyper    r   r   
<listcomp>   s    zGZImageLoopDenoiser.__call__.<locals>._convert_dtype.<locals>.<listcomp>)rA   r!   r"   r+   r,   )vr    )r_   )r    r   r_      s
   

z3ZImageLoopDenoiser.__call__.<locals>._convert_dtypec                    s,   i | ]\}}|j  v r| |jqS r   )rE   keysr    )r^   kra   r_   r%   r   r   r   
<dictcomp>   s
    z/ZImageLoopDenoiser.__call__.<locals>.<dictcomp>F)xr'   return_dictr   r(   r	   r   )rJ   	set_staterR   prepare_inputs_from_block_staterE   prepare_modelsrP   as_dictitemsr.   r1   r!   stacksqueeze
noise_predcleanup_models)
r   r$   r%   r&   r'   guider_stateguider_state_batchcond_kwargsmodel_out_listro   r   rd   r   r2      s.   
zZImageLoopDenoiser.__call__)r3   r4   r5   r6   rB   r8   r   rG   r7   r,   r   rQ   r   rX   r#   r!   r9   r   r
   r:   r"   r   r2   __classcell__r   r   rH   r   r;   N   s0    
r;   c                	   @   s\   e Zd ZdZedee fddZedefddZ	e
 deded	ed
e
jfddZdS )ZImageLoopAfterDenoiserr   r   c                 C      t dtgS N	schedulerr   r   r   r   r   r   rQ         z+ZImageLoopAfterDenoiser.expected_componentsc                 C   r   )Nzstep within the denoising loop that update the latents. This block should be used to compose the `sub_blocks` attribute of a `LoopSequentialPipelineBlocks` object (e.g. `ZImageDenoiseLoopWrapper`)r   r   r   r   r   r      r   z#ZImageLoopAfterDenoiser.descriptionr$   r%   r&   r'   c                 C   sP   |j j}|jj|j ||j  ddd |_ |j j|kr$|j ||_ ||fS )NF)rg   r   )r   r    ry   r]   ro   floatr+   )r   r$   r%   r&   r'   latents_dtyper   r   r   r2      s   z ZImageLoopAfterDenoiser.__call__N)r3   r4   r5   r6   r7   r,   r   rQ   r8   r   r!   r9   r   r
   r:   r"   r2   r   r   r   r   rv      s    "rv   c                   @   sl   e Zd ZdZedefddZedee fddZ	edee
 fddZe d	ed
edefddZdS )ZImageDenoiseLoopWrapperr   r   c                 C   r   )NzPipeline block that iteratively denoise the latents over `timesteps`. The specific steps with each iteration can be customized with `sub_blocks` attributesr   r   r   r   r   r      r   z$ZImageDenoiseLoopWrapper.descriptionc                 C   rw   rx   rz   r   r   r   r   loop_expected_components   r{   z1ZImageDenoiseLoopWrapper.loop_expected_componentsc                 C   s"   t ddtjddt ddtddgS )N	timestepsTzWThe timesteps to use for the denoising process. Can be generated in set_timesteps step.r   rR   rS   )r   r!   r"   r:   r   r   r   r   loop_inputs   s   z$ZImageDenoiseLoopWrapper.loop_inputsr$   statec                 C   s   |  |}tt|j|j|jj  d|_| j|jd;}t	|jD ]-\}}| j
||||d\}}|t|jd ksK|d |jkrO|d |jj dkrO|  q"W d    n1 sZw   Y  | || ||fS )Nr   )total)r&   r'   r   )get_block_statemaxlenr   rR   ry   ordernum_warmup_stepsprogress_bar	enumerate	loop_stepupdateset_block_state)r   r$   r   r%   r   r&   r'   r   r   r   r2     s   
"z!ZImageDenoiseLoopWrapper.__call__N)r3   r4   r5   r6   r7   r8   r   r,   r   r   r   r   r!   r9   r   r   r2   r   r   r   r   r~      s    r~   c                   @   s:   e Zd ZeeddidegZg dZede	fddZ
dS )	ZImageDenoiseStepr<   r=   )r@   )before_denoiserdenoiserafter_denoiserr   c                 C   r   )Na_  Denoise step that iteratively denoise the latents. 
Its loop logic is defined in `ZImageDenoiseLoopWrapper.__call__` method 
At each iteration, it runs blocks defined in `sub_blocks` sequentially:
 - `ZImageLoopBeforeDenoiser`
 - `ZImageLoopDenoiser`
 - `ZImageLoopAfterDenoiser`
This block supports text-to-image and image-to-image tasks for Z-Image.r   r   r   r   r   r   0  r   zZImageDenoiseStep.descriptionN)r3   r4   r5   r   r;   rv   block_classesblock_namesr7   r8   r   r   r   r   r   r   $  s    	r   )typingr   r!   configuration_utilsr   guidersr   modelsr   
schedulersr   utilsr   modular_pipeliner
   r   r   r   modular_pipeline_utilsr   r   r   
get_loggerr3   loggerr   r;   rv   r~   r   r   r   r   r   <module>   s    
)~"6