o
    ٷij                     @   s   d dl Z d dlZd dlZd dlmZ d dlmZ ddlm	Z	 ddl
mZ ddlmZmZ ddlmZ ddlmZ dd	lmZ d d
lmZ eeZzd dlmZ W n ey`   dZed Y nw G dd dejjZG dd deZ dS )    N)HookRegistry   )CachedContext)PrunedContext)CachedContextManagerContextNotExistError)PrunedContextManager)ForwardPattern)	CacheType)init_logger)ContextParallelSplitHookzContext parallelism in cache-dit requires 'diffusers>=0.36.dev0.
Please install latest version of diffusers from source via: 
pip3 install git+https://github.com/huggingface/diffusers.gitc                       s
  e Zd ZejejejgZdejdddddej	fde
jjde
jjdedededed	eeB d
edef fddZdd Ze
jjdd Ze
jjdd Zde
jde
jfddZe
jjde
jeB de
jdB dee
je
jdB f fddZe
jjde
jde
jdB dee
je
jdB f e
jB fddZe
jjde
jjdefddZd e
jde
jde
jfd!d"Zde
jde
jfd#d$Ze
jjd%d& Z e
jjd'd( Z!e
jjd)d* Z"e
jjd+d, Z#e
jjd-d. Z$de
jde
jfd/d0Z%de
jde
jfd1d2Z&de
jde
jfd3d4Z'  Z(S )5CachedBlocks_Pattern_BaseNTtransformer_blockstransformerforward_patterncheck_forward_patterncheck_num_outputscache_prefixcache_contextcontext_manager
cache_typec
                    s   t    || _|| _|| _|| _|| _|| _|| _|| _	|	| _
|   |   td| jj d| j d| j d| j	j d	 d S )NzMatch Blocks: z, for z, cache_context: z, context_manager: .)super__init__r   r   r   r   r   r   r   r   r   _check_forward_pattern_check_cache_typeloggerinfo	__class____name__nameselfr   r   r   r   r   r   r   r   r   kwargsr    _/home/ubuntu/.local/lib/python3.10/site-packages/cache_dit/caching/cache_blocks/pattern_base.pyr   '   s*   
z"CachedBlocks_Pattern_Base.__init__c              	   C   s  | j std| j  d S | jjr| j| jv s!J d| j d| jd ur| jD ]`}t|dr;t|j	t
jjr;|j	}tt|jj }| jrttt|jjd}|dkrtt| jj|kstJ d| d| j d	t| jj d
| jjD ]}||v sJ d| d
qxq)d S d S )NzSkipped Forward Pattern Check: zPattern z is not supported now!blockztorch.Tensorr   z!The number of block's outputs is z, don't not match the number of the pattern: z, Out: r   z$The input parameters must contains: )r   r   warningr   	Supported_supported_patternsr   hasattr
isinstancer'   torchnnModulesetinspect	signatureforward
parameterskeysr   strreturn_annotationcountlenOutIn)r"   r'   forward_parametersnum_outputsrequired_paramr%   r%   r&   r   L   sB   





z0CachedBlocks_Pattern_Base._check_forward_patternc                 C   "   | j tjksJ d| j  dd S )NCache type z# is not supported for CachedBlocks.)r   r
   DBCacher"   r%   r%   r&   r   o      z+CachedBlocks_Pattern_Base._check_cache_typec                 C   st   |    | j t| jksJ d| j  dt| j | j t| jks8J d| j  dt| j d S )NzFn_compute_blocks z4 must be less than the number of transformer blocks zBn_compute_blocks )r   r   Fn_compute_blocksr9   r   Bn_compute_blocksrB   r%   r%   r&   _check_cache_paramsu   s   z-CachedBlocks_Pattern_Base._check_cache_paramshidden_statesencoder_hidden_statesc                 O   s<   | j D ]}|||g|R i |}| ||\}}q||fS N)r   _process_block_outputsr"   rG   rH   argsr#   r'   r%   r%   r&   call_blocks   s   

z%CachedBlocks_Pattern_Base.call_blocksreturnc                 C   s.   t |tjs|\}}| jjs||}}||fS rI   )r,   r-   Tensorr   Return_H_Firstr"   rG   rH   r%   r%   r&   rJ      s   z0CachedBlocks_Pattern_Base._process_block_outputsc                 C   s$   | j jr|S | j jr||fS ||fS rI   )r   Return_H_OnlyrP   rQ   r%   r%   r&   _process_forward_outputs   s   z2CachedBlocks_Pattern_Base._process_forward_outputsmodulec                 C   s@   t d u rdS t|dr|j}|j D ]
}t|t r dS qdS )NF_diffusers_hookT)r   r+   rU   hooksvaluesr,   )r"   rT   rU   hookr%   r%   r&   "_check_if_context_parallel_enabled   s   

z<CachedBlocks_Pattern_Base._check_if_context_parallel_enabledoriginal_hidden_statesc                 C   s`   |  | jd r&|j|jkr&|}ttjr$td|j d|j d |S |||j	 }|S )Nr   zUContext parallelism is enabled in Fn blocks, and the shape of original_hidden_states z and hidden_states zX are different after Fn blocks. Use hidden_states as Fn_hidden_states_residual directly.)
rY   r   shaper   isEnabledForloggingDEBUGdebugtodevice)r"   rZ   rG   Fn_hidden_states_residualr%   r%   r&   _get_Fn_residual   s    	
z*CachedBlocks_Pattern_Base._get_Fn_residualc              
   O   s  z| j | j |   W n0 ty= } z$td| d | j||g|R i |\}}| ||W  Y d }~S d }~ww |}| j	||g|R i |\}}| 
||}~| j   | j j| j  se|n||  | j  st| j dn| j dd}tj  |r| j   ~| j j||| j  r| j dn| j d| j  r| j dn| j dd\}}tj  | j||g|R i |\}}n| j j|| j dd	 | j  r| j || j d ~tj  | j||g|R i |\}}}	}
tj  | j  r| j j|	| j dd	 n| j j|| j dd	 | j  r2| j j|
| j dd	 n| j j|| j dd	 tj  | j||g|R i |\}}tj  | ||S )
NCache context not exist: z, skip cache._Fn_residual_Fn_hidden_statesparallelizedprefix_Bn_residual_Bn_hidden_statesri   encoder_prefixri   )r   set_contextr   rF   r   r   r(   rM   rS   call_Fn_blocksrc   mark_step_begin	can_cacheis_l1_diff_enabled_is_parallelizedr   r-   _dynamograph_breakadd_cached_stepapply_cacheis_cache_residualis_encoder_cache_residualcall_Bn_blocksset_Fn_buffercall_Mn_blocksset_Bn_bufferset_Bn_encoder_buffer)r"   rG   rH   rL   r#   erZ   rb   can_use_cachehidden_states_residualencoder_hidden_states_residualr%   r%   r&   r3      s   





















z!CachedBlocks_Pattern_Base.forwardc                 C   s4   t t| jd ut| jddft ot dkfS )Nrt   F   )anyallr   getattrdistis_initializedget_world_sizerB   r%   r%   r&   rt   o  s   z*CachedBlocks_Pattern_Base._is_parallelizedc                 C   s(   | j  | j  v p| j  | j  v S rI   )r   get_current_stepget_cached_stepsget_cfg_cached_stepsrB   r%   r%   r&   _is_in_cache_step~  s   z+CachedBlocks_Pattern_Base._is_in_cache_stepc                 C   s   | j d | j  }|S rI   )r   r   rD   )r"   selected_Fn_blocksr%   r%   r&   
_Fn_blocks  s   z$CachedBlocks_Pattern_Base._Fn_blocksc                 C   sF   | j  dkr| j| j  d  }|S | j| j  | j    }|S Nr   )r   rE   r   rD   )r"   selected_Mn_blocksr%   r%   r&   
_Mn_blocks  s   z$CachedBlocks_Pattern_Base._Mn_blocksc                 C   s   | j | j  d  }|S rI   )r   r   rE   )r"   selected_Bn_blocksr%   r%   r&   
_Bn_blocks  s   z$CachedBlocks_Pattern_Base._Bn_blocksc                 O   s>   |   D ]}|||g|R i |}| ||\}}q||fS rI   )r   rJ   rK   r%   r%   r&   rp     s   
z(CachedBlocks_Pattern_Base.call_Fn_blocksc           
      O   s   |}|}|   D ]}|||g|R i |}| ||\}}q| }|| }|d ur8|d ur8| }|| }	nd }	||||	fS rI   )r   rJ   
contiguous)
r"   rG   rH   rL   r#   rZ   original_encoder_hidden_statesr'   r   r   r%   r%   r&   r}     s2   

z(CachedBlocks_Pattern_Base.call_Mn_blocksc                 O   sT   | j  dkr||fS |  D ]}|||g|R i |}| ||\}}q||fS r   )r   rE   r   rJ   rK   r%   r%   r&   r{     s   
z(CachedBlocks_Pattern_Base.call_Bn_blocks))r   
__module____qualname__r	   	Pattern_0	Pattern_1	Pattern_2r*   r
   rA   r-   r.   
ModuleListr/   boolr6   r   r   r   r   compilerdisabler   rF   rO   rM   tuplerJ   rS   rY   rc   r3   rt   r   r   r   r   rp   r}   r{   __classcell__r%   r%   r$   r&   r       s    
	
%#




 








'r   c                       s  e Zd ZU dZeed< dejdddddej	fde
jjde
jjdeded	ed
edeeB dedef fddZe
jjdd Zde
jde
jfddZee
jjdd Ze
jjdedefddZe
jj	d"dede
jdefddZdede
jde
jfd d!Z  ZS )#PrunedBlocks_Pattern_Baser   pruned_blocks_stepNTr   r   r   r   r   r   r   r   r   c
                    sF   t  j|f||||||||	d|
 t| jtsJ d| j| _d S )N)r   r   r   r   r   r   r   r   z>context_manager must be PrunedContextManager for PrunedBlocks.)r   r   r,   r   r   r!   r$   r%   r&   r     s(   z"PrunedBlocks_Pattern_Base.__init__c                 C   r?   )Nr@   z# is not supported for PrunedBlocks.)r   r
   DBPrunerB   r%   r%   r&   r     rC   z+PrunedBlocks_Pattern_Base._check_cache_typerG   rH   c              
   O   s  d| _ z| j| j |   W n0 ty@ } z$td| d | j||g|R i |\}}| 	||W  Y d }~S d }~ww | j
  | | jd rRtdt| jD ]\}}| j||||g|R i |\}}qW| j| j  | j| j | 	||S )Nr   rd   z, skip prune.zABlock level Context parallelism is not supported in PrunedBlocks.)r   r   ro   r   rF   r   r   r(   rM   rS   rq   rY   r   RuntimeError	enumeratecompute_or_pruneadd_pruned_blockadd_actual_block
num_blocks)r"   rG   rH   rL   r#   r   ir'   r%   r%   r&   r3      sP   

	z!PrunedBlocks_Pattern_Base.forwardc                 C   s
   t | jS rI   )r9   r   rB   r%   r%   r&   r   S  s   
z$PrunedBlocks_Pattern_Base.num_blocksblock_idrN   c                 C   s   || j | jv S rI   )r   get_non_prune_blocks_idsr   )r"   r   r%   r%   r&   _skip_pruneX  s   z%PrunedBlocks_Pattern_Base._skip_pruneBn_originalri   c                 C   s:   d}|  |s| jj||  |d}|  jt|7  _|S )NFrg   )r   r   	can_prunert   r   int)r"   r   rG   ri   can_use_pruner%   r%   r&   _maybe_prune]  s   
z&PrunedBlocks_Pattern_Base._maybe_prunec                 O   s  |}|}| j ||| j d| dd}	tj  |	r^| j  | jj||| j r2| j d| dn| j d| d| j	 rH| j d| dn| j d| dd\}}tj  ||fS |||g|R i |}| 
||\}}| |s| }|| }
|d ur|d ur| }|| }nd }| jj|| j d| dd | j r| jj|
| j d| dd n| jj|| j d| dd |d ur| j	 r| jj|| j d| dd n| jj|| j d| dd tj  ||fS )	N__Fn_originalrn   rj   rk   _Bn_encoder_residual_Bn_encoder_hidden_statesrl   )r   r   r-   ru   rv   r   add_pruned_stepapply_prunery   rz   rJ   r   r   r|   r~   r   )r"   r   r'   rG   rH   rL   r#   rZ   r   r   r   r   r%   r%   r&   r   o  s   




3



z*PrunedBlocks_Pattern_Base.compute_or_prune)r   )r   r   r   r   r   __annotations__r	   r   r
   r   r-   r.   r   r/   r   r6   r   r   r   r   r   r   rO   r3   propertyr   r   r   r   r   r%   r%   r$   r&   r     sr   
 	
"

3r   )!r1   r]   r-   torch.distributeddistributedr   diffusers.hooksr   cache_contexts.cache_contextr   cache_contexts.prune_contextr   cache_contexts.cache_managerr   r   cache_contexts.prune_managerr   r   r	   cache_typesr
   cache_dit.loggerr   r   r    diffusers.hooks.context_parallelr   ImportErrorr_   r.   r/   r   r   r%   r%   r%   r&   <module>   s4    	   X