o
    i#                     @   s   U d dl Z d dlZd dlmZ d dlmZ d dlZd dlmZ d dl	m
Z
 d dlmZ d dlmZ e
eZdeedf d	ejd
efddZdZdZdaded< G dd dZd
efddZdddZ	d dejdedB d
dfddZd!ddZd!ddZ dS )"    N)
accumulate)prod)init_loggerround_up)dbo_current_ubatch_idshape.dtypereturnc                 C   s   t | |j S N)r   itemsize)r   r	    r   N/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/v1/worker/workspace.py_compute_bytes   s   r   i   i   @zWorkspaceManager | None_managerc                   @   s   e Zd ZdZddejdedB fddZedej	dB defd	d
Z
dddZdefddZdeeedf ejf deej	 fddZdedej	fddZdS )WorkspaceManagerzManager for workspace allocation.

    Manages workspace buffers for DBO (Dual Batch Overlap) execution.
    Can be locked to prevent further growth during execution.
    Ndevicenum_ubatchesc                 C   s,   || _ |d ur	|nd| _d d g| _d| _d S )N   F)_device_num_ubatches_current_workspaces_locked)selfr   r   r   r   r   __init__&   s   

zWorkspaceManager.__init__	workspacer
   c                 C   s   | du rdS |   |   S )zGet size of workspace in bytes.Nr   )numelelement_size)r   r   r   r   _workspace_size_bytes-   s   z&WorkspaceManager._workspace_size_bytesc                    s0   d _ tjrtd fdd jD  dS dS )zLock the workspace to prevent further growth.

        After locking, any attempt to allocate a larger workspace will raise
        an assertion error. This ensures workspace size is fixed during execution.
        Tz5[WORKSPACE DEBUG] Workspace locked. Current sizes: %sc                    s"   g | ]}|d ur  |t qS r   )r   _MB).0wsr   r   r   
<listcomp>>   s
    z)WorkspaceManager.lock.<locals>.<listcomp>N)r   envsVLLM_DEBUG_WORKSPACEloggerinfor   r"   r   r"   r   lock4   s   
zWorkspaceManager.lockc                 C   s   | j S )zCheck if workspace is locked.)r   r"   r   r   r   	is_lockedE   s   zWorkspaceManager.is_lockedshapes_and_dtypes.c                    sh   dd D  dd  D }t |}ttdg|dd  | | fddttD S )a
  Get multiple workspace tensors simultaneously from a single allocation.

        Args:
            *shapes_and_dtypes: One or more (shape, dtype) tuples.

        Returns:
            List of tensor views into the workspace buffer, one per shape/dtype pair.
        c                 S   s   g | ]	\}}t ||qS r   )r   )r    sdr   r   r   r#   T   s    z5WorkspaceManager.get_simultaneous.<locals>.<listcomp>c                 S   s   g | ]}t |d qS )   r   )r    actualr   r   r   r#   U   s    r   Nc                    sD   g | ]}| |  |    | d  | d qS )r   r   )viewreshape)r    iactual_bytescurrent_workspaceoffsetsr*   r   r   r#   ]   s    )sumlistr   _ensure_workspace_sizerangelen)r   r*   aligned_bytestotal_bytesr   r3   r   get_simultaneousI   s   

z!WorkspaceManager.get_simultaneousrequired_bytesc              	   C   s   t  }| j| }| |}||k rzdtfdd}| jr0td|  d|t dd|t ddt| jD ]&}| j| }|d	u sG| ||k r[d	| j|< ~t	j
|ft	j| jd
| j|< q5tjrttd| |t |t | j|| j t  | jt   }|S )zEnsure workspace is allocated and large enough, return current workspace.

        Args:
            required_bytes: The number of bytes required.

        Returns:
            The current workspace tensor.
        r
   c                  S   sj   t  } | du r
dS | j} | dur3t| jdtr| j} qtj	| j
j}| d| j d| j
j S dS )z*Find first frame outside WorkspaceManager.Nunknownr   :)inspectcurrentframef_back
isinstancef_localsgetr   ospathbasenamef_codeco_filenamef_linenoco_name)
curr_framefilenamer   r   r   get_caller_infos   s   z@WorkspaceManager._ensure_workspace_size.<locals>.get_caller_infoz)Workspace is locked but allocation from 'z' requires z.2fz MB, current size is z3 MB. Workspace growth is not allowed after locking.N)r	   r   ze[WORKSPACE DEBUG] Resized workspace from '%s': %.2f MB -> %.2f MB (%d ubatches, total memory %.2f MB))r   r   r   strr   AssertionErrorr   r:   r   torchemptyuint8r   r$   r%   r&   r'   )r   r?   	ubatch_idr5   current_sizerQ   r   r   r   r9   d   sD   	





z'WorkspaceManager._ensure_workspace_sizer   r
   N)__name__
__module____qualname____doc__rT   r   intr   staticmethodTensorr   r(   boolr)   tupler	   r8   r>   r9   r   r   r   r   r      s    

r   c                   C   s   t duS )zCheck if workspace manager has been initialized.

    Returns:
        True if workspace manager is initialized, False otherwise.
    Nr   r   r   r   r    is_workspace_manager_initialized   s   rd   c                   C   s   t dusJ dt S )zGet the current workspace manager instance.

    Raises:
        AssertionError: If workspace manager has not been initialized.
    NzoWorkspaceManager not initialized. Call init_workspace_manager() with a device before using workspace functions.rc   r   r   r   r   current_workspace_manager   s   
re   r   r   c                 C   s&   t durtdt j|  t| |a dS )a  Initialize the workspace manager with a device.

    Must be called before using any workspace functions. Typically called
    from GPUModelRunner.__init__.

    Args:
        device: The device to allocate workspace on.
        num_ubatches: Number of micro-batches. Defaults to 1.
    NzNWorkspaceManager already initialized on device %s, reinitializing on device %s)r   r&   warningr   r   )r   r   r   r   r   init_workspace_manager   s   rg   c                   C   s   t    dS )ae  Lock the workspace to prevent further growth.

    After calling this function, any attempt to allocate a workspace larger
    than the current size will raise an AssertionError. This ensures that
    workspace size is fixed during execution and prevents unexpected memory
    allocations in the hot path.

    Example:
        # During initialization
        init_workspace_manager(device)
        reserve_workspace(shape1, dtype1)
        reserve_workspace(shape2, dtype2)

        # Lock after warmup/profiling
        lock_workspace()

        # Now all get_workspace calls must fit in pre-allocated size
    N)re   r(   r   r   r   r   lock_workspace   s   rh   c                   C   s   da dS )zReset the workspace manager to uninitialized state.

    This is primarily intended for testing purposes to allow tests
    to reinitialize the workspace manager cleanly.
    Nrc   r   r   r   r   reset_workspace_manager   s   ri   )r
   r   r   rY   )!rB   rH   	itertoolsr   mathr   rT   	vllm.envsr$   vllm.loggerr   vllm.utils.math_utilsr   vllm.v1.worker.ubatchingr   rZ   r&   rb   r^   r	   r   r   _GiBr   __annotations__r   ra   rd   re   r   rg   rh   ri   r   r   r   r   <module>   s8   
  
	

