o
    iq                   
   @   s  d dl Z d dl mZmZmZmZmZmZmZmZm	Z	 d dl
Z
G dd de jZG dd de jZG dd de jZedee jZG d	d
 d
e jZdefdefdedeefge_edeedd ZG dd dZdd Zdedededede
jf
ddZdS )    N)		CFUNCTYPEPOINTERc_intc_int64c_size_tc_uint8c_uint16c_void_ppointerc                   @   s"   e Zd ZdefdefdefgZdS )
DLDataTypecodebitslanesN)__name__
__module____qualname__r   r   _fields_ r   r   R/home/ubuntu/vllm_env/lib/python3.10/site-packages/flashinfer/comm/dlpack_utils.pyr       s
    r   c                   @   s   e Zd ZdefdefgZdS )DLDevicedevice_type	device_idN)r   r   r   r   r   r   r   r   r   r   (   s    r   c                   @   sB   e Zd Zdefdefdefdefdeefdeefde	fgZ
dS )	DLTensordatadevicendimdtypeshapestridesbyte_offsetN)r   r   r   r	   r   r   r   r   r   r   r   r   r   r   r   r   /   s    
r   c                   @   s   e Zd ZdS )DLManagedTensorN)r   r   r   r   r   r   r   r    E   s    r    	dl_tensormanager_ctxdeleterc                 C   s   d S )Nr   )dmt_ptrr   r   r   no_op_deleterQ   s   r%   c                   @   s   e Zd ZdZdd ZdS )CapsuleWrapperaE  
    A wrapper class that holds references to the PyCapsule and its associated data.

    This class prevents Python's garbage collector from collecting the shape_array and
    managed_tensor objects while the capsule is still in use. It serves as a container
    to maintain the lifecycle of all DLPack-related objects.
    c                 C   s   || _ || _|| _dS )aF  
        Initialize the CapsuleWrapper with the necessary objects.

        Parameters:
            capsule: The PyCapsule object that follows the DLPack protocol
            shape_array: The array containing tensor shape information
            managed_tensor: The DLManagedTensor instance that the capsule points to
        N)capsule_shape_array_managed_tensor)selfr'   shape_arraymanaged_tensorr   r   r   __init__a   s
   
zCapsuleWrapper.__init__N)r   r   r   __doc__r-   r   r   r   r   r&   X   s    r&   c                 C   s  d}d}|t jt jt jt jt jt jfv rt |j}d}n.|t j	t j
t jt jfv r2t |j}d}n|t jt jt jt jfv rGt |j}d}nt||d }td }	|	||| }
|	|| d}td|d}t||dd}t }t| |_||_d|_||_t|
tt|_t|tt|_ d|_!t" }||_#d|_$t%|_&tj'j(}t|_)ttj*tg|_+t,|}||dd}t|tj-j.}t/||
|}|S )	a  
    Parameters:
      ptr: GPU memory address obtained from cudaMalloc (Python int)
      segment_size: Memory size of each segments in bytes
      segment_stride: Memory stride size between segments in bytes
      num_segments: Number of segments
      torch_dtype: torch dtype
      dev_id: device id.
    Returns:
      A PyCapsule object compliant with DLPack specification, which can be directly converted to a
      tensor using torch.utils.dlpack.from_dlpack
    r            )r   r   )r   r   r   Ns   dltensor)0torchfloat8_e5m2float8_e4m3fnbfloat16float16float32float64finfor   int8int16int32int64iinfouint8uint16uint32uint64NotImplementedErrorr   r   r   r   r	   r   r   r   r   ctypescastr   r   r   r   r    r!   r"   r%   r#   	pythonapiPyCapsule_Newrestypec_char_pargtypesr
   	py_objectvaluer&   )ptrsegment_sizesegment_stridenum_segmentstorch_dtypedev_idbits_per_elementsdldata_type_codebytes_per_elementShapeArrayTyper+   stride_arrayr   r   dltensorr,   rG   managed_tensor_ptrcapsule_ptrr'   capsule_wrapperr   r   r   create_dlpack_capsules   sZ   
r\   rM   rN   rO   rP   r   c                 C   s,   t | |||||}tjj|j}||_|S )ax  
    Pack GPU memory into a PyTorch tensor with specified stride.

    Parameters:
        ptr: GPU memory address obtained from cudaMalloc
        segment_size: Memory size of each segment in bytes
        segment_stride: Memory stride size between segments in bytes
        num_segments: Number of segments
        dtype: PyTorch data type for the resulting tensor
        dev_id: CUDA device ID

    Returns:
        PyTorch tensor that references the provided memory

    Note:
        This function creates a new DLPack capsule each time it's called,
        even with the same pointer. Each capsule is consumed only once.
    )r\   r2   utilsdlpackfrom_dlpackr'   _capsule_wrapper)rM   rN   rO   rP   r   rR   r[   torch_tensorr   r   r   pack_strided_memory   s   rb   )rD   r   r   r   r   r   r   r   r	   r
   r2   	Structurer   r   r   DLManagedTensorDeleterr    r   r%   r&   r\   intr   rb   r   r   r   r   <module>   s:   ,

L