o
    .i#                     @   s  U d dl Z d dlZd dlmZ d dlmZ d dlmZ d dlZd dl	m
Z
 d dlmZ d dlmZ d dlmZ eeZdeed	f d
ejdefddZdZdZdaed ed< G dd dZdefddZdddZ	d dejdedB ddfddZ d!ddZ!d!ddZ"dS )"    N)
accumulate)prod)Optional)init_loggerround_up)dbo_current_ubatch_idshape.dtypereturnc                 C   s   t | |j S N)r   itemsize)r	   r
    r   U/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/v1/worker/workspace.py_compute_bytes   s   r   i   i   @WorkspaceManager_managerc                   @   s   e Zd ZdZddejdedB fddZedej	dB defd	d
Z
dddZdefddZdeeedf ejf deej	 fddZdedej	fddZdS )r   zManager for workspace allocation.

    Manages workspace buffers for DBO (Dual Batch Overlap) execution.
    Can be locked to prevent further growth during execution.
    Ndevicenum_ubatchesc                 C   s,   || _ |d ur	|nd| _d d g| _d| _d S )N   F)_device_num_ubatches_current_workspaces_locked)selfr   r   r   r   r   __init__'   s   

zWorkspaceManager.__init__	workspacer   c                 C   s   | du rdS |   |   S )zGet size of workspace in bytes.Nr   )numelelement_size)r   r   r   r   _workspace_size_bytes.   s   z&WorkspaceManager._workspace_size_bytesc                    s0   d _ tjrtd fdd jD  dS dS )zLock the workspace to prevent further growth.

        After locking, any attempt to allocate a larger workspace will raise
        an assertion error. This ensures workspace size is fixed during execution.
        Tz5[WORKSPACE DEBUG] Workspace locked. Current sizes: %sc                    s"   g | ]}|d ur  |t qS r   )r   _MB).0wsr   r   r   
<listcomp>?   s
    z)WorkspaceManager.lock.<locals>.<listcomp>N)r   envsVLLM_DEBUG_WORKSPACEloggerinfor   r#   r   r#   r   lock5   s   
zWorkspaceManager.lockc                 C   s   | j S )zCheck if workspace is locked.)r   r#   r   r   r   	is_lockedF   s   zWorkspaceManager.is_lockedshapes_and_dtypes.c                    sh   dd D  dd  D }t |}ttdg|dd  | | fddttD S )a
  Get multiple workspace tensors simultaneously from a single allocation.

        Args:
            *shapes_and_dtypes: One or more (shape, dtype) tuples.

        Returns:
            List of tensor views into the workspace buffer, one per shape/dtype pair.
        c                 S   s   g | ]	\}}t ||qS r   )r   )r!   sdr   r   r   r$   U   s    z5WorkspaceManager.get_simultaneous.<locals>.<listcomp>c                 S   s   g | ]}t |d qS )   r   )r!   actualr   r   r   r$   V   s    r   Nc                    sD   g | ]}| |  |    | d  | d qS )r   r   )viewreshape)r!   iactual_bytescurrent_workspaceoffsetsr+   r   r   r$   ^   s    )sumlistr   _ensure_workspace_sizerangelen)r   r+   aligned_bytestotal_bytesr   r4   r   get_simultaneousJ   s   

z!WorkspaceManager.get_simultaneousrequired_bytesc              	   C   s   t  }| j| }| |}||k rzdtfdd}| jr0td|  d|t dd|t ddt| jD ]&}| j| }|d	u sG| ||k r[d	| j|< ~t	j
|ft	j| jd
| j|< q5tjrttd| |t |t | j|| j t  | jt   }|S )zEnsure workspace is allocated and large enough, return current workspace.

        Args:
            required_bytes: The number of bytes required.

        Returns:
            The current workspace tensor.
        r   c                  S   sj   t  } | du r
dS | j} | dur3t| jdtr| j} qtj	| j
j}| d| j d| j
j S dS )z*Find first frame outside WorkspaceManager.Nunknownr   :)inspectcurrentframef_back
isinstancef_localsgetr   ospathbasenamef_codeco_filenamef_linenoco_name)
curr_framefilenamer   r   r   get_caller_infot   s   z@WorkspaceManager._ensure_workspace_size.<locals>.get_caller_infoz)Workspace is locked but allocation from 'z' requires z.2fz MB, current size is z3 MB. Workspace growth is not allowed after locking.N)r
   r   ze[WORKSPACE DEBUG] Resized workspace from '%s': %.2f MB -> %.2f MB (%d ubatches, total memory %.2f MB))r   r   r   strr   AssertionErrorr    r;   r   torchemptyuint8r   r%   r&   r'   r(   )r   r@   	ubatch_idr6   current_sizerR   r   r   r   r:   e   sD   	





z'WorkspaceManager._ensure_workspace_sizer   r   N)__name__
__module____qualname____doc__rU   r   intr   staticmethodTensorr   r)   boolr*   tupler
   r9   r?   r:   r   r   r   r   r       s    

c                   C   s   t duS )zCheck if workspace manager has been initialized.

    Returns:
        True if workspace manager is initialized, False otherwise.
    Nr   r   r   r   r    is_workspace_manager_initialized   s   re   c                   C   s   t dusJ dt S )zGet the current workspace manager instance.

    Raises:
        AssertionError: If workspace manager has not been initialized.
    NzoWorkspaceManager not initialized. Call init_workspace_manager() with a device before using workspace functions.rd   r   r   r   r   current_workspace_manager   s   
rf   r   r   c                 C   s&   t durtdt j|  t| |a dS )a  Initialize the workspace manager with a device.

    Must be called before using any workspace functions. Typically called
    from GPUModelRunner.__init__.

    Args:
        device: The device to allocate workspace on.
        num_ubatches: Number of micro-batches. Defaults to 1.
    NzNWorkspaceManager already initialized on device %s, reinitializing on device %s)r   r'   warningr   r   )r   r   r   r   r   init_workspace_manager   s   rh   c                   C   s   t    dS )ae  Lock the workspace to prevent further growth.

    After calling this function, any attempt to allocate a workspace larger
    than the current size will raise an AssertionError. This ensures that
    workspace size is fixed during execution and prevents unexpected memory
    allocations in the hot path.

    Example:
        # During initialization
        init_workspace_manager(device)
        reserve_workspace(shape1, dtype1)
        reserve_workspace(shape2, dtype2)

        # Lock after warmup/profiling
        lock_workspace()

        # Now all get_workspace calls must fit in pre-allocated size
    N)rf   r)   r   r   r   r   lock_workspace   s   ri   c                   C   s   da dS )zReset the workspace manager to uninitialized state.

    This is primarily intended for testing purposes to allow tests
    to reinitialize the workspace manager cleanly.
    Nrd   r   r   r   r   reset_workspace_manager   s   rj   )r   r   r   rZ   )#rC   rI   	itertoolsr   mathr   typingr   rU   	vllm.envsr%   vllm.loggerr   vllm.utils.math_utilsr   vllm.v1.worker.ubatchingr   r[   r'   rc   r_   r
   r   r    _GiBr   __annotations__r   rb   re   rf   r   rh   ri   rj   r   r   r   r   <module>   s:   
  
	

