o
    ۷i                     @   s   d dl Z d dl mZ d dlmZ d dlmZmZ d dlmZ ddl	m
Z
mZ ddlmZ eeZG d	d
 d
eZ	ddeej deej de jdeddf
ddZdeej ddfddZG dd de
ZdS )    N)nn)init_logger)HookRegistry	ModelHook)current_omni_platform   )OffloadBackendOffloadConfig)ModuleDiscoveryc                   @   sz   e Zd ZdZdZ	ddeej dej	de
fddZd	ejd
dfddZd	ejd
dfddZd	ejd
eeef fddZdS )SequentialOffloadHooka   Hook for sequential offloading with mutual exclusion on encoder and DiT modules.

    To be used as a model-level (or "component-level") of CPU offloading method;
    When a module's forward is called, this hook offloads target modules to CPU
    and loads the current module to GPU.
    sequential_offloadToffload_targetsdevice
pin_memoryc                 C   s   || _ || _|| _d S Nr   r   r   )selfr   r   r    r   f/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm_omni/diffusion/offloader/sequential_backend.py__init__   s   
zSequentialOffloadHook.__init__modulereturnNc                 C   s   zt | }W n
 ty   Y dS w |j}|jdkrdS |d tj  | j	rC| D ]}|j
jjdkrB|j
 sB|j
	 |_
q.dS dS )zMove module to CPU.Ncpu)next
parametersStopIterationr   typetotorchcudaempty_cacher   data	is_pinned)r   r   paramprevious_devicepr   r   r   _to_cpu&   s"   


zSequentialOffloadHook._to_cpuc                 C   sD   zt | j| jkrW dS W n
 ty   Y dS w || j dS )zMove module to GPU.N)r   r   r   r   r   )r   r   r   r   r   _to_gpu:   s   zSequentialOffloadHook._to_gpuc                 O   st   | j D ]}| | q| | t  tddd | j D |jj| j	j
 d| j	j t d d d  ||fS )Nz2Swapped: %s -> CPU, %s -> %s, free memory: %.4f GBc                 S   s   g | ]}|j jqS r   )	__class____name__).0tr   r   r   
<listcomp>P   s    z5SequentialOffloadHook.pre_forward.<locals>.<listcomp>:i   )r   r&   r'   r   synchronizeloggerdebugr(   r)   r   r   indexget_free_memory)r   r   argskwargstargetr   r   r   pre_forwardE   s   

z!SequentialOffloadHook.pre_forwardT)r)   
__module____qualname____doc__
_HOOK_NAMElistr   Moduler   r   boolr   r&   r'   tupledictr6   r   r   r   r   r      s    
 r   Tdit_modulesencoder_modulesr   r   r   c                 C   s   | D ]}t |}t|||d}|tj| td|jj q|D ]}t |}t| ||d}|tj| td|jj q"dS )a7  Apply sequential offloading hooks to DiT and encoder modules.

    Registers hooks on modules to implement mutual-exclusion GPU allocation.
        - Before DiT runs, encoders are offloaded to CPU.
        - Before encoders run, DiT is offloaded to CPU.

    Args:
        dit_modules: DiT/transformer modules to register hooks on
        encoder_modules: Encoder modules to register hooks on
        device: Target GPU device for loading
        pin_memory: Whether to pin CPU memory for faster transfers

    Example:
        >>> apply_sequential_offload(
        ...     dit_modules=[pipeline.transformer],
        ...     encoder_modules=[pipeline.text_encoder, pipeline.vae],
        ...     device=torch.device("cuda:0"),
        ... )
        >>> # Modules of pipeline now automatically swap between CPU and GPU
    r   zRegistered offload hook for %sN)	r   get_or_creater   register_hookr;   r/   r0   r(   r)   )rA   rB   r   r   dit_modregistryhookencr   r   r   apply_sequential_offloadY   s&   

rI   modulesc                 C   s>   | D ]}t |dd}|dur|tj td|jj qdS )zRemove sequential offloading hooks from modules.

    Args:
        modules: Modules to remove hooks from

    Example:
        >>> all_modules = [*dit_modules, *encoder_modules]
        >>> remove_sequential_offload(all_modules)
    _hook_registryNzRemoved offload hook from %s)getattrremove_hookr   r;   r/   r0   r(   r)   )rJ   r   rF   r   r   r   remove_sequential_offload   s   
rN   c                       sJ   e Zd ZdZdedejf fddZdej	ddfd	d
Z
dddZ  ZS )ModelLevelOffloadBackendzModel-level (sequential) offloading backend.

    Uses SequentialOffloadHook registered via HookRegistry for automatic module swapping.
    configr   c                    s   t  || g | _d S r   )superr   _offload_modules)r   rP   r   r(   r   r   r      s   
z!ModelLevelOffloadBackend.__init__pipeliner   Nc              
   C   s  | j r
td d S t|}|jstd d S |js#td d S |jD ]}|| j q&|j	d urXz|j	j| jdd W n t
yW } ztd| W Y d }~nd }~ww t|j|j| j| jjd g |j|j| _d| _ tdd	|jd	|j d S )
Nz(ModelLevelOffloadBackend already enabledzANo DiT/transformer modules found, skipping model-level offloadingz9No encoder modules found, skipping model-level offloadingT)non_blockingzFailed to move VAE to GPU: %s)rA   rB   r   r   z<Model-level offloading enabled: %s <-> %s (mutual exclusion)z, )enabledr/   warningr
   discoverditsencodersr   r   vae	Exceptionr0   rI   rP   pin_cpu_memoryrR   infojoin	dit_namesencoder_names)r   rT   rJ   rH   excr   r   r   enable   s@   







zModelLevelOffloadBackend.enablec                 C   s2   | j sd S t| j | j  d| _ td d S )NFzModel-level offloading disabled)rV   rN   rR   clearr/   r^   )r   r   r   r   disable   s   

z ModelLevelOffloadBackend.disable)r   N)r)   r8   r9   r:   r	   r   r   r   r   r=   rc   re   __classcell__r   r   rS   r   rO      s
    8rO   r7   )r   r   vllm.loggerr   vllm_omni.diffusion.hooksr   r   vllm_omni.platformsr   baser   r	   module_collectorr
   r)   r/   r   r<   r=   r   r>   rI   rN   rO   r   r   r   r   <module>   s.   L
1