o
    ۷iV                     @   s6  d dl mZ d dlZddlmZ ddlmZ ddlmZ ddl	m
Z
 ddlmZ d	d
lmZmZmZmZ d	dlmZmZmZ ddlmZ eeZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZG dd deZ G dd deZ!G dd deZ"G d d! d!eZ#dS )"    )AnyN   )
FrozenDict)ClassifierFreeGuidance)WanTransformer3DModel)UniPCMultistepScheduler)logging   )
BlockStateLoopSequentialPipelineBlocksModularPipelineBlocksPipelineState)ComponentSpec
ConfigSpec
InputParam   )WanModularPipelinec                	   @   \   e Zd ZdZedefddZedee fddZ	e
 deded	ed
e
jfddZdS )WanLoopBeforeDenoiserwanreturnc                 C      	 dS Nzstep within the denoising loop that prepares the latent input for the denoiser. This block should be used to compose the `sub_blocks` attribute of a `LoopSequentialPipelineBlocks` object (e.g. `WanDenoiseLoopWrapper`) selfr   r   ]/home/ubuntu/vllm_env/lib/python3.10/site-packages/diffusers/modular_pipelines/wan/denoise.pydescription(      z!WanLoopBeforeDenoiser.descriptionc                 C   s$   t ddtjddt ddtjddgS )NlatentsT^The initial latents to use for the denoising process. Can be generated in prepare_latent step.required	type_hintr   dtype>The dtype of the model inputs. Can be generated in input step.r   torchTensorr$   r   r   r   r   inputs0   s   zWanLoopBeforeDenoiser.inputs
componentsblock_stateitc                 C   s   |j |j|_||fS )N)r   tor$   latent_model_inputr   r*   r+   r,   r-   r   r   r   __call__A   s   zWanLoopBeforeDenoiser.__call__N__name__
__module____qualname__
model_namepropertystrr   listr   r)   r'   no_gradr   r
   intr(   r1   r   r   r   r   r   %   s    "r   c                	   @   r   ) WanImage2VideoLoopBeforeDenoiserr   r   c                 C   r   r   r   r   r   r   r   r   J   r   z,WanImage2VideoLoopBeforeDenoiser.descriptionc                 C   s4   t ddtjddt ddtjddt ddtjddgS )	Nr   Tr    r!   image_condition_latentszThe image condition latents to use for the denoising process. Can be generated in prepare_first_frame_latents/prepare_first_last_frame_latents step.r$   r%   r&   r   r   r   r   r)   R   s&   z'WanImage2VideoLoopBeforeDenoiser.inputsr*   r+   r,   r-   c                 C   s(   t j|j|jgdd|j|_||fS )Nr   )dim)r'   catr   r=   r.   r$   r/   r0   r   r   r   r1   i   s   z)WanImage2VideoLoopBeforeDenoiser.__call__Nr2   r   r   r   r   r<   G   s    "r<   c                       s   e Zd ZdZddifdeeef f fddZede	e
 fdd	Zedefd
dZede	eeef  fddZe dedededejdef
ddZ  ZS )WanLoopDenoiserr   encoder_hidden_statesprompt_embedsnegative_prompt_embedsguider_input_fieldsc                    0   t |tstdt| || _t   dS )au  Initialize a denoiser block that calls the denoiser model. This block is used in Wan2.1.

        Args:
            guider_input_fields: A dictionary that maps each argument expected by the denoiser model
                (for example, "encoder_hidden_states") to data stored on 'block_state'. The value can be either:

                - A tuple of strings. For instance, {"encoder_hidden_states": ("prompt_embeds",
                  "negative_prompt_embeds")} tells the guider to read `block_state.prompt_embeds` and
                  `block_state.negative_prompt_embeds` and pass them as the conditional and unconditional batches of
                  'encoder_hidden_states'.
                - A string. For example, {"encoder_hidden_image": "image_embeds"} makes the guider forward
                  `block_state.image_embeds` for both conditional and unconditional batches.
        0guider_input_fields must be a dictionary but is N
isinstancedict
ValueErrortype_guider_input_fieldssuper__init__r   rE   	__class__r   r   rO   t      
zWanLoopDenoiser.__init__r   c                 C   s"   t dttddiddt dtgS )Nguiderguidance_scaleg      @from_configconfigdefault_creation_methodtransformerr   r   r   r   r   r   r   r   expected_components   s   
z#WanLoopDenoiser.expected_componentsc                 C   r   NzStep within the denoising loop that denoise the latents with guidance. This block should be used to compose the `sub_blocks` attribute of a `LoopSequentialPipelineBlocks` object (e.g. `WanDenoiseLoopWrapper`)r   r   r   r   r   r      r   zWanLoopDenoiser.descriptionc                 C   p   t dt ddtddg}g }| j D ]}t|tr || q|| q|D ]}|t |dtj	d q(|S Nattention_kwargsnum_inference_stepsTgThe number of inference steps to use for the denoising process. Can be generated in set_timesteps step.r!   )namer"   r#   
r   r;   rM   valuesrI   tupleextendappendr'   r(   r   r)   guider_input_namesvaluerc   r   r   r   r)          	
zWanLoopDenoiser.inputsr*   r+   r,   r-   c                    s   |j j| j|d |j  j}|D ]A}|j |j | } fdd| D }|jd j	
 j| j	jd 
 j jdd|d |_|j |j q| |d  _| fS )Nstepra   timestepc                    <   i | ]\}}|j  v r|t|tjr| jn|qS r   rM   keysrI   r'   r(   r.   r$   .0kvr+   r   r   r   
<dictcomp>   
    z,WanLoopDenoiser.__call__.<locals>.<dictcomp>r   Fhidden_statesro   r`   return_dictr   )rT   	set_statera   prepare_inputs_from_block_staterM   prepare_modelsrZ   as_dictitemsr/   r.   r$   expandshaper`   
noise_predcleanup_models)r   r*   r+   r,   r-   guider_stateguider_state_batchcond_kwargsr   rw   r   r1      s*   
zWanLoopDenoiser.__call__)r3   r4   r5   r6   rJ   r8   r   rO   r7   r9   r   r\   r   rf   r)   r'   r:   r   r
   r;   r(   r   r1   __classcell__r   r   rQ   r   r@   q   s0    
r@   c                       s   e Zd ZdZddifdeeef f fddZede	e
 fdd	Zedefd
dZede	e fddZede	eeef  fddZe dedededejdef
ddZ  ZS )Wan22LoopDenoiserr   rA   rB   rE   c                    rF   )ay  Initialize a denoiser block that calls the denoiser model. This block is used in Wan2.2.

        Args:
            guider_input_fields: A dictionary that maps each argument expected by the denoiser model
                (for example, "encoder_hidden_states") to data stored on `block_state`. The value can be either:

                - A tuple of strings. For instance, `{"encoder_hidden_states": ("prompt_embeds",
                  "negative_prompt_embeds")}` tells the guider to read `block_state.prompt_embeds` and
                  `block_state.negative_prompt_embeds` and pass them as the conditional and unconditional batches of
                  `encoder_hidden_states`.
                - A string. For example, `{"encoder_hidden_image": "image_embeds"}` makes the guider forward
                  `block_state.image_embeds` for both conditional and unconditional batches.
        rG   NrH   rP   rQ   r   r   rO      rS   zWan22LoopDenoiser.__init__r   c                 C   s@   t dttddiddt dttddiddt dtt d	tgS )
NrT   rU   g      @rV   rW   guider_2g      @rZ   transformer_2r[   r   r   r   r   r\      s   

z%Wan22LoopDenoiser.expected_componentsc                 C   r   r]   r   r   r   r   r   r     r   zWan22LoopDenoiser.descriptionc                 C   s   t ddddgS )Nboundary_ratiog      ?zUThe boundary ratio to divide the denoising loop into high noise and low noise stages.)rc   defaultr   )r   r   r   r   r   expected_configs  s   z"Wan22LoopDenoiser.expected_configsc                 C   r^   r_   rd   ri   r   r   r   r)     rl   zWan22LoopDenoiser.inputsr*   r+   r,   r-   c           	         s   |j j|j }||kr|j _|j _n|j _|j _ jj| j	|d  j
 j}|D ]A} j j | } fdd| D } jd j j| jjd  j jdd|d |_ j j q0 |d  _| fS )Nrm   c                    rp   r   rq   rs   rw   r   r   rx   O  ry   z.Wan22LoopDenoiser.__call__.<locals>.<dictcomp>r   Frz   r   )rX   r   num_train_timestepsrZ   current_modelrT   r   r   r}   ra   r~   rM   r   r   r   r/   r.   r$   r   r   r`   r   r   )	r   r*   r+   r,   r-   boundary_timestepr   r   r   r   rw   r   r1   3  s6   

zWan22LoopDenoiser.__call__)r3   r4   r5   r6   rJ   r8   r   rO   r7   r9   r   r\   r   r   r   rf   r)   r'   r:   r   r
   r;   r(   r   r1   r   r   r   rQ   r   r      s4    
	r   c                	   @   s\   e Zd ZdZedee fddZedefddZ	e
 deded	ed
e
jfddZdS )WanLoopAfterDenoiserr   r   c                 C      t dtgS N	schedulerr   r   r   r   r   r   r\   i     z(WanLoopAfterDenoiser.expected_componentsc                 C   r   )Nzstep within the denoising loop that update the latents. This block should be used to compose the `sub_blocks` attribute of a `LoopSequentialPipelineBlocks` object (e.g. `WanDenoiseLoopWrapper`)r   r   r   r   r   r   o  r   z WanLoopAfterDenoiser.descriptionr*   r+   r,   r-   c                 C   sP   |j j}|jj|j ||j  ddd |_ |j j|kr$|j ||_ ||fS )NF)r|   r   )r   r$   r   rn   r   floatr.   )r   r*   r+   r,   r-   latents_dtyper   r   r   r1   w  s   zWanLoopAfterDenoiser.__call__N)r3   r4   r5   r6   r7   r9   r   r\   r8   r   r'   r:   r   r
   r;   r(   r1   r   r   r   r   r   f  s    "r   c                   @   sl   e Zd ZdZedefddZedee fddZ	edee
 fddZe d	ed
edefddZdS )WanDenoiseLoopWrapperr   r   c                 C   r   )NzPipeline block that iteratively denoise the latents over `timesteps`. The specific steps with each iteration can be customized with `sub_blocks` attributesr   r   r   r   r   r     r   z!WanDenoiseLoopWrapper.descriptionc                 C   r   r   r   r   r   r   r   loop_expected_components  r   z.WanDenoiseLoopWrapper.loop_expected_componentsc                 C   s"   t ddtjddt ddtddgS )N	timestepsTzWThe timesteps to use for the denoising process. Can be generated in set_timesteps step.r!   ra   rb   )r   r'   r(   r;   r   r   r   r   loop_inputs  s   z!WanDenoiseLoopWrapper.loop_inputsr*   statec                 C   s   |  |}tt|j|j|jj  d|_| j|jd;}t	|jD ]-\}}| j
||||d\}}|t|jd ksK|d |jkrO|d |jj dkrO|  q"W d    n1 sZw   Y  | || ||fS )Nr   )total)r,   r-   r   )get_block_statemaxlenr   ra   r   ordernum_warmup_stepsprogress_bar	enumerate	loop_stepupdateset_block_state)r   r*   r   r+   r   r,   r-   r   r   r   r1     s   
"zWanDenoiseLoopWrapper.__call__N)r3   r4   r5   r6   r7   r8   r   r9   r   r   r   r   r'   r:   r   r   r1   r   r   r   r   r     s    r   c                   @   :   e Zd ZeeddidegZg dZede	fddZ
dS )	WanDenoiseSteprA   rB   rE   before_denoiserdenoiserafter_denoiserr   c                 C   r   )Na?  Denoise step that iteratively denoise the latents. 
Its loop logic is defined in `WanDenoiseLoopWrapper.__call__` method 
At each iteration, it runs blocks defined in `sub_blocks` sequentially:
 - `WanLoopBeforeDenoiser`
 - `WanLoopDenoiser`
 - `WanLoopAfterDenoiser`
This block supports text-to-video tasks for wan2.1.r   r   r   r   r   r     r   zWanDenoiseStep.descriptionN)r3   r4   r5   r   r@   r   block_classesblock_namesr7   r8   r   r   r   r   r   r         	r   c                   @   r   )	Wan22DenoiseSteprA   rB   r   r   r   c                 C   r   )NaA  Denoise step that iteratively denoise the latents. 
Its loop logic is defined in `WanDenoiseLoopWrapper.__call__` method 
At each iteration, it runs blocks defined in `sub_blocks` sequentially:
 - `WanLoopBeforeDenoiser`
 - `Wan22LoopDenoiser`
 - `WanLoopAfterDenoiser`
This block supports text-to-video tasks for Wan2.2.r   r   r   r   r   r     r   zWan22DenoiseStep.descriptionN)r3   r4   r5   r   r   r   r   r   r7   r8   r   r   r   r   r   r     r   r   c                   @   s<   e Zd ZeeddddegZg dZede	fddZ
d	S )
WanImage2VideoDenoiseSteprB   image_embeds)rA   encoder_hidden_states_imager   r   r   c                 C   r   )NaK  Denoise step that iteratively denoise the latents. 
Its loop logic is defined in `WanDenoiseLoopWrapper.__call__` method 
At each iteration, it runs blocks defined in `sub_blocks` sequentially:
 - `WanImage2VideoLoopBeforeDenoiser`
 - `WanLoopDenoiser`
 - `WanLoopAfterDenoiser`
This block supports image-to-video tasks for wan2.1.r   r   r   r   r   r     r   z%WanImage2VideoDenoiseStep.descriptionN)r3   r4   r5   r<   r@   r   r   r   r7   r8   r   r   r   r   r   r     s    
r   c                   @   r   )	Wan22Image2VideoDenoiseSteprA   rB   r   r   r   c                 C   r   )NaK  Denoise step that iteratively denoise the latents. 
Its loop logic is defined in `WanDenoiseLoopWrapper.__call__` method 
At each iteration, it runs blocks defined in `sub_blocks` sequentially:
 - `WanImage2VideoLoopBeforeDenoiser`
 - `WanLoopDenoiser`
 - `WanLoopAfterDenoiser`
This block supports image-to-video tasks for Wan2.2.r   r   r   r   r   r     r   z'Wan22Image2VideoDenoiseStep.descriptionN)r3   r4   r5   r<   r   r   r   r   r7   r8   r   r   r   r   r   r   
  r   r   )$typingr   r'   configuration_utilsr   guidersr   modelsr   
schedulersr   utilsr   modular_pipeliner
   r   r   r   modular_pipeline_utilsr   r   r   r   
get_loggerr3   loggerr   r<   r@   r   r   r   r   r   r   r   r   r   r   r   <module>   s,   
"*n "6