o
    ٷi*                     @  s   d Z ddlmZ ddlmZ ddlZddlZddlm	Z	 ddl
mZ ddlmZ ddlmZmZ dd	lmZmZmZ G d
d deZdddZdS )a;  
Hook-based TeaCache implementation for vLLM-Omni.

This module implements a diffusers-style hook system that completely intercepts
the transformer forward pass, eliminating the need for any TeaCache-specific
code in model definitions. Model developers only need to add an extractor function
to support new models.
    )annotations)AnyN)TeaCacheConfig)get_extractor)TeaCacheState)!get_classifier_free_guidance_rank'get_classifier_free_guidance_world_size)HookRegistry	ModelHookStateManagerc                      sN   e Zd ZdZdZd fddZdd
dZdddZdddZdddZ	  Z
S )TeaCacheHookab  
    ModelHook implementing TeaCache for transformer models.

    This hook completely intercepts the transformer's forward pass and implements
    adaptive caching based on timestep embedding similarity. It's model-agnostic
    and supports multiple model types through extractor functions.

    Key features:
    - Zero changes to model code
    - CFG-aware with separate states for positive/negative branches
    - CFG-parallel compatible: properly detects branch identity across ranks
    - Model-specific polynomial rescaling
    - Auto-detection of model types

    Attributes:
        config: TeaCache configuration with thresholds and callbacks
        rescale_func: Polynomial function for rescaling L1 distances
        state_manager: Manages TeaCacheState across forward passes
        extractor_fn: Model-specific function to extract modulated input
    teacacheconfigr   c                   s8   t    || _t|j| _tt| _	d| _
d| _dS )zl
        Initialize TeaCacheHook.

        Args:
            config: TeaCache configuration object.
        Nr   )super__init__r   nppoly1dcoefficientsrescale_funcr   r   state_managerextractor_fn_forward_cnt)selfr   	__class__ [/home/ubuntu/.local/lib/python3.10/site-packages/vllm_omni/diffusion/cache/teacache/hook.pyr   6   s   


zTeaCacheHook.__init__moduletorch.nn.Modulereturnc                 C  s   t | jj| _| jd |S )z
        Initialize hook with extractor from config transformer model type.

        Args:
            module: The module to initialize the hook for.

        Returns:
            The initialized module.
        r   )r   r   transformer_typer   r   set_contextr   r   r   r   r   initialize_hookD   s   zTeaCacheHook.initialize_hookargsr   kwargsc                 O  s  | j |g|R i |}t|ddr1t }|dkr%t }|dkr"dnd}n| jd dkr.dnd}nd}d| }| j| | j }	| |	|j	}
|
sm|	j
d	urm|j|	j
 |_|	jd	uri|jd	uri|j|	j |_|j}nA|j }|jd	ur||j nd	}| }|d |_t|dkr|jd	ur|d |_|j|  |	_
|d	ur|j|  |	_|j}|j	 |	_|	 jd7  _|  jd7  _||S )
aJ  
        Generic forward handler that works for ANY model.

        This method is completely model-agnostic. All model-specific logic
        is encapsulated in the extractor function that returns a CacheContext.

        The extractor does:
        - Model-specific preprocessing
        - Extraction of modulated input for cache decision
        - Providing transformer execution callable
        - Providing postprocessing callable

        This hook does:
        - CFG-aware state management
        - Cache decision logic (generic)
        - Residual caching and reuse

        Args:
            module: Transformer module (any architecture)
            *args: Positional arguments for model forward
            **kwargs: Keyword arguments for model forward

        Returns:
            Model output (format depends on model)
        do_true_cfgF   r   negativepositive   	teacache_N)r   getattrr   r   r   r   r!   	get_state _should_compute_full_transformermodulated_inputprevious_residualhidden_statesprevious_residual_encoderencoder_hidden_statesclonerun_transformer_blockslendetachprevious_modulated_inputcntpostprocess)r   r   r$   r%   ctxcfg_parallel_sizecfg_rankcache_branchcontext_namestateshould_computeoutputori_hidden_statesori_encoder_hidden_statesoutputsr   r   r   new_forwardW   s@   






zTeaCacheHook.new_forwardr@   r   modulated_inptorch.Tensorboolc                 C  s   |j dkr
d|_dS |jdu rdS ||j   |j  d    }t| |}| jt|7  _|j| j	j
k r@dS d|_dS )a  
        Determine whether to compute full transformer or reuse cached residual.

        This implements the core TeaCache algorithm:
        1. Always compute first timestep
        2. For intermediate steps:
           - Compute relative L1 distance between current and previous modulated inputs
           - Apply polynomial rescaling with model-specific coefficients
           - Accumulate rescaled distances
           - Compare to threshold: below = cache, above = compute

        Args:
            state: Current TeaCacheState containing counters and cached values
            modulated_inp: Modulated input extracted from first transformer block

        Returns:
            True to compute full transformer, False to reuse cached residual
        r   g        TNg:0yE>F)r9   accumulated_rel_l1_distancer8   absmeancpuitemfloatr   r   rel_l1_thresh)r   r@   rG   rel_distancerescaled_distancer   r   r   r.      s"   


z-TeaCacheHook._should_compute_full_transformerc                 C  s   | j   d| _|S )z
        Reset all cached states for a new inference run.

        Args:
            module: The module to reset state for.

        Returns:
            The module with reset state.
        r   )r   resetr   r"   r   r   r   reset_state   s   

zTeaCacheHook.reset_state)r   r   )r   r   r   r   )r   r   r$   r   r%   r   r   r   )r@   r   rG   rH   r   rI   )__name__
__module____qualname____doc__
_HOOK_NAMEr   r#   rF   r.   rT   __classcell__r   r   r   r   r      s    


b1r   r   r   r   r   r   Nonec                 C  s$   t | }t|}|tj| dS )a  
    Apply TeaCache optimization to a transformer module.

    This function registers a TeaCacheHook that completely intercepts the
    module's forward pass, implementing adaptive caching without any changes
    to the model code.

    Args:
        module: Transformer model to optimize (e.g., QwenImageTransformer2DModel)
        config: TeaCacheConfig specifying caching parameters

    Example:
        >>> config = TeaCacheConfig(
        ...     rel_l1_thresh=0.2,
        ...     transformer_type="QwenImageTransformer2DModel"
        ... )
        >>> apply_teacache_hook(transformer, config)
        >>> # Transformer bound to the pipeline now uses TeaCache automatically,
        ... # no code changes needed!
    N)r	   get_or_creater   register_hookrY   )r   r   registryhookr   r   r   apply_teacache_hook   s   
r`   )r   r   r   r   r   r[   )rX   
__future__r   typingr   numpyr   torch)vllm_omni.diffusion.cache.teacache.configr   -vllm_omni.diffusion.cache.teacache.extractorsr   (vllm_omni.diffusion.cache.teacache.stater   .vllm_omni.diffusion.distributed.parallel_stater   r   vllm_omni.diffusion.hooksr	   r
   r   r   r`   r   r   r   r   <module>   s   	 \