o
    GiYH                     @   sV  d dl mZ d dlmZmZmZmZ d dlZddlm	Z	 ddl
mZ ddlmZ dd	lmZ dd
lmZmZmZmZ e	eZdZdZedgg d ZdejdedejfddZeG dd dZG dd deZG dd deZ G dd deZ!dej"j#deddfddZ$d ej"j#d!ededdfd"d#Z%	$d(d ej"j#d!eded%e&ddf
d&d'Z'dS ))    )	dataclass)ListOptionalTupleUnionN   )
get_logger)unwrap_module   )"_ALL_TRANSFORMER_BLOCK_IDENTIFIERS)TransformerBlockRegistry)	BaseStateHookRegistry	ModelHookStateManagermag_cache_leader_block_hookmag_cache_block_hook      ?)g`?g?gǝ??g      ?N`?     ?r   `?r   r   `?gǝ??N`?r   r   r   r   r   r   "?r   r   gN`?g"?gN`?g"?g;?	src_arraytarget_lengthreturnc                 C   sZ   t | }|dkr| dd S |d |d  }tj|| jtjd}t||  }| | S )za
    Interpolate the source array to the target length using nearest neighbor interpolation.
    r
   N)devicedtype)lentorcharanger   float32roundlong)r   r   
src_lengthscalegridmapped_indices r*   M/home/ubuntu/.local/lib/python3.10/site-packages/diffusers/hooks/mag_cache.pynearest_interpE   s   r,   c                   @   st   e Zd ZU dZdZeed< dZeed< dZ	eed< dZ
eed	< d
Zeeejee f  ed< dZeed< dd Zd
S )MagCacheConfigaY  
    Configuration for [MagCache](https://github.com/Zehong-Ma/MagCache).

    Args:
        threshold (`float`, defaults to `0.06`):
            The threshold for the accumulated error. If the accumulated error is below this threshold, the block
            computation is skipped. A higher threshold allows for more aggressive skipping (faster) but may degrade
            quality.
        max_skip_steps (`int`, defaults to `3`):
            The maximum number of consecutive steps that can be skipped (K in the paper).
        retention_ratio (`float`, defaults to `0.2`):
            The fraction of initial steps during which skipping is disabled to ensure stability. For example, if
            `num_inference_steps` is 28 and `retention_ratio` is 0.2, the first 6 steps will never be skipped.
        num_inference_steps (`int`, defaults to `28`):
            The number of inference steps used in the pipeline. This is required to interpolate `mag_ratios` correctly.
        mag_ratios (`torch.Tensor`, *optional*):
            The pre-computed magnitude ratios for the model. These are checkpoint-dependent. If not provided, you must
            set `calibrate=True` to calculate them for your specific model. For Flux models, you can use
            `diffusers.hooks.mag_cache.FLUX_MAG_RATIOS`.
        calibrate (`bool`, defaults to `False`):
            If True, enables calibration mode. In this mode, no blocks are skipped. Instead, the hook calculates the
            magnitude ratios for the current run and logs them at the end. Use this to obtain `mag_ratios` for new
            models or schedulers.
    gQ?	threshold   max_skip_stepsg?retention_ratio   num_inference_stepsN
mag_ratiosF	calibratec                 C   s   | j d u r| jstd| jsB| j d urDt| j s!t| j | _ t| j | jkrFt	dt| j  d| j  t
| j | j| _ d S d S d S d S )Nat   `mag_ratios` must be provided for MagCache inference because these ratios are model-dependent.
To get them for your model:
1. Initialize `MagCacheConfig(calibrate=True, ...)`
2. Run inference on your model once.
3. Copy the printed ratios array and pass it to `mag_ratios` in the config.
For Flux models, you can import `FLUX_MAG_RATIOS` from `diffusers.hooks.mag_cache`.z%Interpolating mag_ratios from length z to )r4   r5   
ValueErrorr!   	is_tensortensorr    r3   loggerdebugr,   selfr*   r*   r+   __post_init__u   s   	zMagCacheConfig.__post_init__)__name__
__module____qualname____doc__r.   float__annotations__r0   intr1   r3   r4   r   r   r!   Tensorr   r5   boolr=   r*   r*   r*   r+   r-   S   s   
 r-   c                       s&   e Zd Zd fddZdd Z  ZS )MagCacheStater   Nc                    s>   t    d | _d | _d| _d| _d| _d| _d| _g | _	d S NTr           r   )
super__init__previous_residualhead_block_inputshould_computeaccumulated_ratioaccumulated_erraccumulated_steps
step_indexcalibration_ratiosr;   	__class__r*   r+   rK      s   

zMagCacheState.__init__c                 C   s.   d | _ d| _d| _d| _d| _d| _g | _d S rH   )rL   rN   rO   rP   rQ   rR   rS   r;   r*   r*   r+   reset   s   
zMagCacheState.reset)r   N)r>   r?   r@   rK   rV   __classcell__r*   r*   rT   r+   rG      s    rG   c                   @   sL   e Zd ZdZdedefddZdd Zej	j
dejjfd	d
Zdd ZdS )MagCacheHeadHookTstate_managerconfigc                 C   s   || _ || _d | _d S N)rY   rZ   	_metadata)r<   rY   rZ   r*   r*   r+   rK      s   
zMagCacheHeadHook.__init__c                 C      t |}t|j| _|S r[   r	   r   getrU   r\   r<   moduleunwrapped_moduler*   r*   r+   initialize_hook      z MagCacheHeadHook.initialize_hookra   c                 O   s  | j jd u r| j d | jj}| j|||}| j  }||_d}| jj	r)d}n]|j
}|t| jjkr7d}	n| jj| }	t| jj| jj d }
||
kr| j|	9  _| jd7  _| jtd|j 7  _|jd ur}|j| jjkr}|j| jjkr}d}n	d|_d|_d|_||_|sOtd	|j
  |}|j}|j|jkr||j}|j|jkr|| }nm|jd
kr|jd
kr|jd |jd kr|jd |jd kr|jd |jd  }|dkr| }|d d |d d d f | |d d |d d d f< ntd|j d|j d ntd|j d|j d | jj d urM| jd||}t!| jj"| jj }d g|d  }||| jj"< ||| jj < t#|S |S | j$j%|i |}|S )N	inferenceTr   g      ?r
   Fr   rI   zMagCache: Skipping step r/   r   z$MagCache: Dimension mismatch. Input z, Residual zA. Cannot apply residual safely. Returning input without residual.encoder_hidden_states)&rY   _current_contextset_contextr\   hidden_states_argument_name_get_parameter_from_args_kwargs	get_staterM   rZ   r5   rR   r    r4   rD   r1   r3   rO   rQ   rP   absrL   r.   r0   rN   r9   r:   r   toshapendimclonewarning"return_encoder_hidden_states_indexmaxreturn_hidden_states_indextuplefn_reforiginal_forward)r<   ra   argskwargsarg_namehidden_statesstaterN   current_stepcurrent_scaleretention_stepoutputresdifforiginal_encoder_hidden_statesmax_idxret_listr*   r*   r+   new_forward   s|   



6zMagCacheHeadHook.new_forwardc                 C   s   | j   |S r[   )rY   rV   )r<   ra   r*   r*   r+   reset_state  s   
zMagCacheHeadHook.reset_stateN)r>   r?   r@   _is_statefulr   r-   rK   rc   r!   compilerdisablennModuler   r   r*   r*   r*   r+   rX      s    arX   c                       sp   e Zd Zddededef fddZdd	 Zej	j
d
ejjfddZdedejfddZdefddZ  ZS )MagCacheBlockHookFNrY   is_tailrZ   c                    s&   t    || _|| _|| _d | _d S r[   )rJ   rK   rY   r   rZ   r\   )r<   rY   r   rZ   rT   r*   r+   rK      s
   

zMagCacheBlockHook.__init__c                 C   r]   r[   r^   r`   r*   r*   r+   rc   '  rd   z!MagCacheBlockHook.initialize_hookra   c                 O   s  | j jd u r| j d | j  }|jsX| jj}| j|||}| jr(| 	| | jj
d urV| jd||}t| jj| jj
}d g|d  }	||	| jj< ||	| jj
< t|	S |S | jj|i |}
| jrt|
trp|
| jj }n|
}|j}|d u r{|
S |j|jkr|| }n.|jdkr|jdkr|jd |jd kr|jd |jd  }|dkr|| }n|| }n|}| jjr| || ||_| 	| |
S )Nre   rf   r
   r/   r   r   )rY   rg   rh   rk   rN   r\   ri   rj   r   _advance_steprr   rs   rt   ru   rv   rw   
isinstancerM   rn   ro   rZ   r5   _perform_calibration_steprL   )r<   ra   rx   ry   r|   rz   r{   rf   r   r   r   
out_hidden	in_hiddenresidualr   r*   r*   r+   r   ,  sR   



(


zMagCacheBlockHook.new_forwardr|   current_residualc                 C   s^   |j d u rd}ntjj| dd}tjj|j  dd}||d    }|j| d S )Nr   r   )dimg:0yE>)	rL   r!   linalgnormrB   meanitemrS   append)r<   r|   r   ratio	curr_norm	prev_normr*   r*   r+   r   k  s   
z+MagCacheBlockHook._perform_calibration_stepc                 C   sz   | j d7  _ |j | jjkr;| jjr'td t|j d td|j  d|_ d|_d|_	d|_
d |_g |_d S d S )Nr
   zV
[MagCache] Calibration Complete. Copy these values to MagCacheConfig(mag_ratios=...):
zMagCache Calibration Results: r   r   rI   )rR   rZ   r3   r5   printrS   r9   inforO   rQ   rP   rL   )r<   r|   r*   r*   r+   r   {  s   
zMagCacheBlockHook._advance_step)FN)r>   r?   r@   r   rF   r-   rK   rc   r!   r   r   r   r   r   rG   rE   r   r   rW   r*   r*   rT   r+   r     s    >r   ra   rZ   c                 C   s@  t |  ttdi }g }|  D ]%\}}|tvs t|tjj	s!qt
|D ]\}}|| d| |f q%q|s@td dS t|dkre|d \}}td| d t|||d	d
 t||| dS |d\}}	|d\}
}td|  t|	|| |D ]
\}}t||| qtd|
  t|||d	d
 dS )z
    Applies MagCache to a given module (typically a Transformer).

    Args:
        module (`torch.nn.Module`):
            The module to apply MagCache to.
        config (`MagCacheConfig`):
            The configuration for MagCache.
    r*   .z5MagCache: No transformer blocks found to apply hooks.Nr
   r   z4MagCache: Applying Head+Tail Hooks to single block ''T)r   r   z MagCache: Applying Head Hook to z MagCache: Applying Tail Hook to )r   check_if_exists_or_initializer   rG   named_childrenr   r   r!   r   
ModuleList	enumerater   r9   rq   r    r   _apply_mag_cache_block_hook_apply_mag_cache_head_hookpop)ra   rZ   rY   remaining_blocksname	submoduleindexblockhead_block_name
head_blocktail_block_name
tail_blockr*   r*   r+   apply_mag_cache  s4   

r   r   rY   c                 C   s<   t | }|td ur|t t||}||t d S r[   )r   r   get_hook_MAG_CACHE_LEADER_BLOCK_HOOKremove_hookrX   register_hook)r   rY   rZ   registryhookr*   r*   r+   r     s
   


r   Fr   c                 C   s>   t | }|td ur|t t|||}||t d S r[   )r   r   r   _MAG_CACHE_BLOCK_HOOKr   r   r   )r   rY   rZ   r   r   r   r*   r*   r+   r     s
   

r   )F)(dataclassesr   typingr   r   r   r   r!   utilsr   utils.torch_utilsr	   _commonr   _helpersr   hooksr   r   r   r   r>   r9   r   r   r8   FLUX_MAG_RATIOSrE   rD   r,   r-   rG   rX   r   r   r   r   r   rF   r   r*   r*   r*   r+   <module>   sH   "8tn/