o
    ٷir                     @   sF  U d Z ddlZddlmZ ddlmZ ddlmZmZ ddl	Z	ddl
Z
ddl	mZmZmZmZmZ ddlmZ ddlmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZm Z  ee!Z"d1dede#ddfddZ$i Z%e&e'ef e(d< dedefddZ)dededee*gdf fddZ+dededee*gdf fddZ,dededee*gdf fddZ-dededee*gdf fdd Z.dededee*gdf fd!d"Z/G d#d$ d$eZ0G d%d& d&eZ1G d'd( d(eZ2dededee*gdf fd)d*Z3e%4e+e+e+e-e,e,e.e3d+ G d,d- d-eZ5ded.e ded- fd/d0Z6dS )2z
cache-dit integration backend for vllm-omni.

This module provides a CacheDiTBackend class to enable cache-dit acceleration on diffusion
pipelines in vllm-omni, supporting both single and dual-transformer architectures.
    N)Callable)	ExitStack)AnyOptional)BlockAdapterDBCacheConfigForwardPatternParamsModifierTaylorSeerCalibratorConfig)FakeDiffusionPipeline)CachedAdapter)CachedBlocks_Pattern_0_1_2)BasicCacheConfig)CachedContextManager)init_logger)CacheBackend)DiffusionCacheConfigOmniDiffusionConfigTpipelinedetailsreturnc                 C   s2   t j| j|d t| drt j| j|d d S d S )N)r   transformer_2)	cache_ditsummarytransformerhasattrr   )r   r    r   _/home/ubuntu/.local/lib/python3.10/site-packages/vllm_omni/diffusion/cache/cache_dit_backend.pycache_summary    s   
r   CUSTOM_DIT_ENABLERScache_configc              	   C   s"   t d| j| j| j| j| j| jdS )zBuild DBCacheConfig with optional SCM (Step Computation Masking) support.

    Args:
        cache_config: DiffusionCacheConfig instance.

    Returns:
        DBCacheConfig instance with SCM support if configured.
    N)num_inference_stepsFn_compute_blocksBn_compute_blocksmax_warmup_stepsmax_cached_stepsmax_continuous_cached_stepsresidual_diff_threshold)r   r"   r#   r$   r%   r&   r'   r    r   r   r   _build_db_cache_config-   s   
r)   c                    s   t jtjjgjjjjgtjtjgtt	 j
jjddtt	 j
ddddgddt	jjjjjjddd d	td
tttf ffdd ddtd	tdtd
df fdd}|S )a  Enable cache-dit for Wan2.2 dual-transformer architecture.

    Wan2.2 uses two transformers (transformer and transformer_2) that need
    to be enabled together using BlockAdapter.

    Args:
        pipeline: The Wan2.2 pipeline instance.
        cache_config: DiffusionCacheConfig instance with cache configuration.

    Returns:
        A refresh function that can be called to update cache context with new num_inference_steps.
    )r$   r%   r(         T)r   blocksforward_patternparams_modifiershas_separate_cfgN)r"   r#   r$   r%   r&   r'   r!   r!   r   c                    s~    j dur j  jjj }nd}t j j} jj| |d  jj	}d}|D ]}|du s2||kr6|d7 }q(| | }||fS )a  Split inference steps into high-noise and low-noise steps for Wan2.2.

        This is an internal helper function specific to Wan2.2's dual-transformer
        architecture that uses boundary_ratio to determine the split point.

        Args:
            num_inference_steps: Total number of inference steps.

        Returns:
            A tuple of (num_high_noise_steps, num_low_noise_steps).
        N)devicer      )
boundary_ratio	schedulerconfignum_train_timestepsnextr   
parametersr0   set_timesteps	timesteps)r!   boundary_timestepr0   r9   num_high_noise_stepstnum_low_noise_steps)r   r   r   _split_inference_steps|   s   
z6enable_cache_for_wan22.<locals>._split_inference_stepsr   verbosec                    s    |\}}j du rtj| j||d tj| j||d dS tj| jt j|tjj |djd|d tj| jt j|tjj |djd|d dS )zRefresh cache context for both transformers with new num_inference_steps.

        Args:
            pipeline: The Wan2.2 pipeline instance.
            num_inference_steps: New number of inference steps.
        Nr!   r?   mask_policytotal_stepsr!   steps_computation_masksteps_computation_policyr    r?   )	scm_steps_mask_policyr   refresh_contextr   r   r   reset
steps_maskscm_steps_policy)r   r!   r?   r;   r=   )r>   r    r   r   refresh_cache_context   sD   


z5enable_cache_for_wan22.<locals>.refresh_cache_contextT)r   enable_cacher   r   r   r,   r   	Pattern_2r	   r   rJ   r$   r%   r"   r#   r&   r'   inttupler   bool)r   r    rM   r   )r>   r    r   r   enable_cache_for_wan22C   sN   +"/rT   c              	         t  }d} jr j}t|d}td|  t||d}td|j d|j d|j	 d t
jt| j| jj| jjgtjtjg|gd	|d
 ddtdtdtddf fdd}|S )zEnable cache-dit for LongCatImage pipeline.

    Args:
        pipeline: The LongCatImage pipeline instance.
        cache_config: DiffusionCacheConfig instance with cache configuration.
    Ntaylorseer_orderTaylorSeer enabled with order=r    calibrator_configzEEnabling cache-dit on LongCatImage transformer with BlockAdapter: Fn=, Bn=, W=, r   r,   r-   r.   r(   Tr   r!   r?   r   c                    R    j du rtj| j||d dS tj| jt j|tj j |d jd|d dS zRefresh cache context for the transformer with new num_inference_steps.

        Args:
            pipeline: The LongCatImage pipeline instance.
            num_inference_steps: New number of inference steps.
        Nr@   rA   rD   rG   rH   r   rI   r   r   rJ   rK   rL   r   r!   r?   r(   r   r   rM         

z=enable_cache_for_longcat_image.<locals>.refresh_cache_contextrN   r)   enable_taylorseerrW   r
   loggerinfor	   r"   r#   r$   r   rO   r   r   transformer_blockssingle_transformer_blocksr   	Pattern_1r   rQ   rS   r   r    db_cache_config
calibratorrW   modifierrM   r   r(   r   enable_cache_for_longcat_image   s@   


 ro   c              	      rU   )aI  Enable cache-dit for Flux.1-dev pipeline.

    Args:
        pipeline: The Flux pipeline instance.
        cache_config: DiffusionCacheConfig instance with cache configuration.
    Returns:
        A refresh function that can be called with a new ``num_inference_steps``
        to update the cache context for the pipeline.
    NrV   rX   rY   z=Enabling cache-dit on Flux transformer with BlockAdapter: Fn=r[   r\   r]   r^   r(   Tr   r!   r?   r   c                    r_   )zRefresh cache context for the transformer with new num_inference_steps.

        Args:
            pipeline: The Flux pipeline instance.
            num_inference_steps: New number of inference steps.
        Nr@   rA   rD   rG   ra   rb   r(   r   r   rM   C  rc   z4enable_cache_for_flux.<locals>.refresh_cache_contextrN   rd   rk   r   r(   r   enable_cache_for_flux  s@   


 rp   c              	      s   t  }d} jr j}t|d}td|  t||d}td|j d|j d|j	 d t
jt| j| jjtj|gd	|d
 ddtdtdtddf fdd}|S )zEnable cache-dit for StableDiffusion3Pipeline.

    Args:
        pipeline: The StableDiffusion3 pipeline instance.
        cache_config: DiffusionCacheConfig instance with cache configuration.
    NrV   rX   rY   zIEnabling cache-dit on StableDiffusion3 transformer with BlockAdapter: Fn=r[   r\   r]   r^   r(   Tr   r!   r?   r   c                    r_   r`   ra   rb   r(   r   r   rM     rc   z3enable_cache_for_sd3.<locals>.refresh_cache_contextrN   )r)   re   rW   r
   rf   rg   r	   r"   r#   r$   r   rO   r   r   rh   r   rj   r   rQ   rS   rk   r   r(   r   enable_cache_for_sd3]  s<   
 rq   c              	      s   t  }d} jr j}t|d}td|  td|j d|j d|j d t	j
| j||d dd
tdtdtddf fdd}|S )a>  Enable cache-dit for regular single-transformer DiT models.

    Args:
        pipeline: The diffusion pipeline instance.
        cache_config: DiffusionCacheConfig instance with cache configuration.

    Returns:
        A refresh function that can be called to update cache context with new num_inference_steps.
    NrV   rX   z&Enabling cache-dit on transformer: Fn=r[   r\   r]   rY   Tr   r!   r?   r   c                    r_   )zRefresh cache context for the transformer with new num_inference_steps.

        Args:
            pipeline: The diffusion pipeline instance.
            num_inference_steps: New number of inference steps.
        Nr@   rA   rD   rG   ra   rb   r(   r   r   rM     rc   z3enable_cache_for_dit.<locals>.refresh_cache_contextrN   )r)   re   rW   r
   rf   rg   r"   r#   r$   r   rO   r   r   rQ   rS   )r   r    rl   rZ   rW   rM   r   r(   r   enable_cache_for_dit  s,   
 rr   c                   @   sR   e Zd ZdZejj			ddejdejdeded	e	ejejdB f f
d
dZ
dS )BagelCachedContextManagerz
    Custom CachedContextManager for Bagel that safely handles NaiveCache objects
    (mapped to encoder_hidden_states) by skipping tensor operations on them.
    NBn
Bn_encoderhidden_statesencoder_hidden_statesprefixencoder_prefixr   c                 C   s   d|v r
|  |}n| |}|d usJ | d|  r#|| }n|}| }|d urad|v r7| |}n| |}|d urW|  rUt|tj	rTt|tj	rT|| }n|}t|tj	ra| }||fS )Nrt   z_buffer must be set before)
get_Bn_bufferget_Fn_bufferis_cache_residual
contiguousget_Bn_encoder_bufferget_Fn_encoder_bufferis_encoder_cache_residual
isinstancetorchTensor)selfrv   rw   rx   ry   hidden_states_prevencoder_hidden_states_prevr   r   r   apply_cache  s.   	


z%BagelCachedContextManager.apply_cache)Nrt   ru   )__name__
__module____qualname____doc__r   compilerdisabler   strrR   r   r   r   r   r   rs     s"    rs   c                   @   s@   e Zd ZdZdejdejfddZdedejdejfddZd	S )
BagelCachedBlocksz
    Custom CachedBlocks for Bagel that safely handles NaiveCache objects
    by adding isinstance checks in call_Mn_blocks and compute_or_prune.
    rv   rw   c           
      O   s   |}|}|   D ]}|||g|R i |}| ||\}}q| }|| }|d ur>|d ur>t|tjr>| }|| }	nd }	||||	fS N)
_Mn_blocks_process_block_outputsr}   r   r   r   )
r   rv   rw   argskwargsoriginal_hidden_statesoriginal_encoder_hidden_statesblockhidden_states_residualencoder_hidden_states_residualr   r   r   call_Mn_blocks  s4   

z BagelCachedBlocks.call_Mn_blocksblock_idc                 O   s  |}|}| j ||| j d| dd}	tj  |	r[| j  | jj||| j r2| j d| dn| j d| j	 rE| j d| dn| j d| dd\}}tj  ||fS |||g|R i |}| 
||\}}| |s| }|| }
|d ur|d urt|tjr| }|| }nd }| jj|| j d| dd | j r| jj|
| j d| dd n| jj|| j d| dd |d ur| j	 r| jj|| j d| dd n| jj|| j d| dd tj  ||fS )	N__Fn_original)rx   _Bn_residual_Bn_hidden_states_Bn_encoder_residual_Bn_encoder_hidden_states)rx   ry   )_maybe_prunecache_prefixr   _dynamograph_breakcontext_manageradd_pruned_stepapply_pruner|   r   r   _skip_pruner}   r   r   set_Fn_bufferset_Bn_bufferset_Bn_encoder_buffer)r   r   r   rv   rw   r   r   r   r   can_use_pruner   r   r   r   r   compute_or_pruneA  s   




3





z"BagelCachedBlocks.compute_or_pruneN)	r   r   r   r   r   r   r   rQ   r   r   r   r   r   r     s    
(r   c                
   @   sl   e Zd ZdZededeee ee	ee
f  f fddZededee	 dee	eejjf  fddZd	S )
BagelCachedAdapterzc
    Custom CachedAdapter for Bagel that uses BagelCachedContextManager and BagelCachedBlocks.
    block_adapterr   c                    s$  t   t  jr jS j fi |}|dd }|d us%J d jjj}t| dt	t
 j t jtdj fi |\ j_jsp jjjt fdd}| jj_ jj_ntD ]\}}j|fi | qud jj_  fS )Nr    zcache_config can not be None.r   )namepersistent_contextc              
      s   t  3}tD ]\}}|j|fi | q	| g|R i |}  |W  d    S 1 s9w   Y  d S r   )r   zipenter_contextreset_contextapply_stats_hooks)r   r   r   stackcontext_namecontext_kwargsoutputsr   clsr   contexts_kwargsflatten_contextsoriginal_callr   r   new_call  s   
$z3BagelCachedAdapter.create_context.<locals>.new_callT)r   assert_normalized	is_cachedpipecheck_context_kwargsget	__class__r   rs   hashidr   r   modify_context_params_context_managerr   __call__	functoolswraps_original_callr   r   
_is_cachedapply_params_hooks)r   r   r   r    pipe_cls_namer   r   r   r   r   create_context  s8   





z!BagelCachedAdapter.create_contextr   c                 C   s   t | g }t|jdsJ tt|jD ]Y}i }tt|j| D ]F}||t|j|  |  d }tj	
t|j| | |j| |j| | |j|j|j| | |j| | |jj|jd	g||j| | < q#|| q|S )Nr   r    )r   r-   check_forward_patterncheck_num_outputsr   cache_contextr   
cache_type)r   r   r   r   rangelenr   r,   r   nn
ModuleListr   r-   r   r   blocks_nameunique_blocks_namer   r   append)r   r   r   total_cached_blocksiunified_blocks_bind_contextjr    r   r   r   collect_unified_blocks  s.   
z)BagelCachedAdapter.collect_unified_blocksN)r   r   r   r   classmethodr   rR   listr   dictr   r   r   r   r   r   r   r   r   r   r     s     Er   c              	      s   t  }d} jr j}t|d}td|  | jj}td|j d|j	 d|j
 d tjt||jtjd||d	 ddtdtdtddf fdd}|S )aE  Enable cache-dit for Bagel model (via OmniDiffusion pipeline).

    Args:
        pipeline: The OmniDiffusion pipeline instance.
        cache_config: DiffusionCacheConfig instance with cache configuration.

    Returns:
        A refresh function that can be called to update cache context with new num_inference_steps.
    NrV   rX   z,Enabling cache-dit on Bagel transformer: Fn=r[   r\   r]   )r   r,   r-   rY   Tr   r!   r?   r   c                    sV   | j j} jd u rtj|||d d S tj|t j|tj j|d jd|d d S )Nr@   rA   rD   rG   )	language_modelmodelrH   r   rI   r   rJ   rK   rL   )r   r!   r?   r   r(   r   r   rM   B  s   

z5enable_cache_for_bagel.<locals>.refresh_cache_contextrN   )r)   re   rW   r
   rf   rg   r   r   r"   r#   r$   r   applyr   layersr   	Pattern_0r   rQ   rS   )r   r    rl   rZ   rW   r   rM   r   r(   r   enable_cache_for_bagel  s6   
 
r   )Wan22PipelineWan22I2VPipelineWan22TI2VPipelineFluxPipelineLongCatImagePipelineLongCatImageEditPipelineStableDiffusion3PipelineBagelPipelinec                	       sd   e Zd ZdZddef fddZdeddfdd	ZddedededdfddZ	defddZ
  ZS )CacheDiTBackenda  Backend class for cache-dit acceleration on diffusion pipelines.

    This class implements cache-dit acceleration (DBCache, SCM, TaylorSeer) using
    the cache-dit library. It inherits from CacheBackend and provides a unified
    interface for managing cache-dit acceleration on diffusion models.

    Attributes:
        config: Cache configuration (DiffusionCacheConfig instance), inherited from CacheBackend.
        enabled: Whether cache-dit is enabled on this pipeline, inherited from CacheBackend.
        _refresh_func: Internal refresh function for updating cache context.
        _last_num_inference_steps: Last num_inference_steps used for refresh optimization.
    Nr    c                    sF   |du rt  }nt|trt |}n|}t | d| _d| _dS )zInitialize the cache-dit backend.

        Args:
            cache_config: Cache configuration (DiffusionCacheConfig instance, dict, or None).
                         If None or empty, uses default DiffusionCacheConfig().
        N)r   r   r   	from_dictsuper__init___refresh_func_last_num_inference_steps)r   r    r4   r   r   r   r   t  s   

zCacheDiTBackend.__init__r   r   c                 C   s\   |j j}|tv rtd|  t| || j| _nt|| j| _d| _td|  dS )aK  Enable cache-dit on the pipeline if configured.

        This method applies cache-dit acceleration to the appropriate transformer(s)
        in the pipeline. It handles both single-transformer and dual-transformer
        architectures (e.g., Wan2.2).

        Args:
            pipeline: The diffusion pipeline instance.
        z*Using custom cache-dit enabler for model: Tz"Cache-dit enabled successfully on N)	r   r   r   rf   rg   r4   r   rr   enabled)r   r   pipeline_namer   r   r   enable  s   zCacheDiTBackend.enableTr!   r?   c                 C   sb   | j r| jdu rtd dS | jdu s|| jkr/|r#td|  | ||| || _dS dS )a  Refresh cache context with new num_inference_steps.

        This method updates the cache context when num_inference_steps changes
        during inference. For dual-transformer models (e.g., Wan2.2), it automatically
        splits the steps based on boundary_ratio.

        Args:
            pipeline: The diffusion pipeline instance.
            num_inference_steps: New number of inference steps.
            verbose: Whether to log refresh operations.
        Nz7Cache-dit is not enabled. Cannot refresh cache context.zCRefreshing cache context for transformer with num_inference_steps: )r   r   rf   warningr   rg   )r   r   r!   r?   r   r   r   refresh  s   

zCacheDiTBackend.refreshc                 C   s   | j S )zCheck if cache-dit is enabled on this pipeline.

        Returns:
            True if cache-dit is enabled, False otherwise.
        )r   )r   r   r   r   
is_enabled  s   zCacheDiTBackend.is_enabledr   rN   )r   r   r   r   r   r   r   rQ   rS   r   r   __classcell__r   r   r   r   r   f  s    r   	od_configc                 C   s8   |j dks|js
dS t|j}||  | r|S dS )a  Enable cache-dit on the pipeline if configured (convenience function).

    This is a convenience function that creates and enables a CacheDiTBackend.
    For new code, consider using CacheDiTBackend directly.

    Args:
        pipeline: The diffusion pipeline instance.
        od_config: OmniDiffusionConfig with cache configuration.

    Returns:
        A CacheDiTBackend instance if cache-dit is enabled, None otherwise.
    z	cache-ditN)cache_backendr    r   r   r   )r   r  backendr   r   r   may_enable_cache_dit  s
   

r  rN   )7r   r   collections.abcr   
contextlibr   typingr   r   r   r   r   r   r   r	   r
    cache_dit.caching.block_adaptersr   .cache_dit.caching.cache_adapters.cache_adapterr   ,cache_dit.caching.cache_blocks.pattern_0_1_2r    cache_dit.caching.cache_contextsr   .cache_dit.caching.cache_contexts.cache_managerr   vllm.loggerr   vllm_omni.diffusion.cache.baser   vllm_omni.diffusion.datar   r   r   rf   rS   r   r   r   r   __annotations__r)   rQ   rT   ro   rp   rq   rr   rs   r   r   r   updater   r  r   r   r   r   <module>   sX   
   
 G J D<6  vD]