o
    
۾i                  	   @   s   d dl mZmZ d dlmZ d dlZd dlZd dlm	Z	m
Z
 d dlmZ d dlmZ d dlmZ 		ddejejB d	ejdB d
ejdB dejfddZG dd dZG dd dZG dd dZG dd dZe
jde	jfddZdS )    )IterableSequence)partialN)tltriton)next_power_of_2)is_uva_available)$get_accelerator_view_from_cpu_tensorxoutdevicereturnc                 C   sd   t | tjrt| } | jsJ |  rJ |d u r'|d us J tj| |d}|  }|j	|ddS )N)r   Tnon_blocking)

isinstancenpndarraytorch
from_numpyis_cpu	is_pinned
empty_like
pin_memorycopy_)r
   r   r   tmp r   S/home/ubuntu/.local/lib/python3.10/site-packages/vllm/v1/worker/gpu/buffer_utils.pyasync_copy_to_gpu   s   

r   c                   @   s(   e Zd Zdeee B dejfddZdS )	UvaBuffersizedtypec                 C   s>   t  stdtj||ddd| _| j | _t| j| _d S )NzUVA is not availablecpuTr    r   r   )	r   RuntimeErrorr   zerosr!   numpyr   r	   uva)selfr   r    r   r   r   __init__$   s
   zUvaBuffer.__init__N)__name__
__module____qualname__intr   r   r    r(   r   r   r   r   r   #   s     r   c                   @   sz   e Zd Z	ddeee B dejdefddZdeje	j
B eB dejfd	d
Z	ddeje	j
B dejdB dejfddZdS )UvaBufferPool   r   r    max_concurrencyc                    s6   | _  | _|| _ fddt|D | _d| _d S )Nc                    s   g | ]}t  qS r   )r   ).0_r    r   r   r   
<listcomp>8   s    z*UvaBufferPool.__init__.<locals>.<listcomp>r   )r   r    r/   range	_uva_bufs_currr'   r   r    r/   r   r2   r   r(   -   s
   
zUvaBufferPool.__init__r
   r   c                 C   sX   | j d | j | _ | j| j  }t|tjr|jn|j}t|}||d |< |j	d | S )N   )
r6   r/   r5   r   r   Tensorr!   r   lenr&   )r'   r
   bufdstnr   r   r   copy_to_uva<   s   zUvaBufferPool.copy_to_uvaNr   c                 C   s(   |  |}|d u r| S |j|ddS )NTr   )r>   cloner   )r'   r
   r   r&   r   r   r   copy_to_gpuF   s   
zUvaBufferPool.copy_to_gpur.   N)r)   r*   r+   r,   r   r   r    r(   r9   r   r   listr>   r@   r   r   r   r   r-   ,   s$    

 
r-   c                   @   sJ   e Zd Z	ddeee B dejdefddZddedB d	ejfd
dZ	dS )UvaBackedTensorr.   r   r    r/   c                 C   sN   || _ || _tj||ddd| _| j | _t|||| _| j	| j| _
d S )Nr!   Fr"   )r    r/   r   r$   r!   r%   r   r-   poolr>   gpur7   r   r   r   r(   T   s   zUvaBackedTensor.__init__Nr=   r   c                 C   s,   | j |d ur| jd | n| j| _| jS rB   )rE   r>   r   rF   )r'   r=   r   r   r   r>   b   s   &zUvaBackedTensor.copy_to_uvarA   rB   )
r)   r*   r+   r,   r   r   r    r(   r9   r>   r   r   r   r   rD   S   s    

rD   c                   @   s   e Zd Z		ddeee B dejdejdedef
dd	Z	d
edede
e e
e B ddfddZd
ededdfddZdddZdddZdS )StagedWriteTensorr.   Fr   r    r   r/   uva_instead_of_gpuc           	      C   s   t jt jt jg}||vrtd| d| t|tr|n|d | _|| _|| _	|s4t j
|||d| _nt||| _| jj| _g | _g | _g | _g | _tt|d}|| jt jd| _|| jt jd| _t| j}|||d| _|| jt jd| _d S )NzUnsupported dtype z: should be one of r   )r    r   )r/   )r    )r   int32int64float32
ValueErrorr   r,   num_rowsr    r/   r$   rF   r   _uva_bufr&   _staged_write_indices_staged_write_starts_staged_write_contents_staged_write_cu_lensr   r-   write_indiceswrite_startsr   write_contentswrite_cu_lens)	r'   r   r    r   r/   rH   supported_dtypes
new_buffer	init_sizer   r   r   r(   i   s,   

zStagedWriteTensor.__init__indexstartr
   r   Nc                 C   sZ   |dksJ |dksJ |sd S | j | | j| | j| | jt| j d S Nr   )rO   appendrP   rQ   extendrR   r:   )r'   rZ   r[   r
   r   r   r   stage_write   s   zStagedWriteTensor.stage_writec                 C   sF   |dksJ | j | | jd | j| | jt| j d S r\   )rO   r]   rP   rQ   rR   r:   )r'   rZ   r
   r   r   r   stage_write_elem   s
   z"StagedWriteTensor.stage_write_elemc              	   C   s   t | j}|dkrd S | j| j}| j| j}| j| j}t | j}t	| j
jts.J || j
jkrGt|}t|| j| jd| _
tj  | j
| j}t|f | j| jd||||dd |   d S )Nr   )r    r/   i   )
BLOCK_SIZE)r:   rO   rS   r>   rT   rP   rV   rR   rQ   r   rU   r   r,   r   r-   r    r/   r   cudasynchronize_apply_write_kernelrF   strideclear_staged_writes)r'   r=   indices_uva
starts_uvacu_lens_uvadiff_lennew_sizecontents_uvar   r   r   apply_write   s2   





zStagedWriteTensor.apply_writec                 C   s,   | j   | j  | j  | j  d S rB   )rO   clearrP   rQ   rR   )r'   r   r   r   rf      s   


z%StagedWriteTensor.clear_staged_writes)r.   F)r   N)r)   r*   r+   r,   r   r   r    r   boolr(   r   floatr_   r`   rm   rf   r   r   r   r   rG   h   s4    

'

'rG   ra   c                 C   s   t d}t || }t || }	|dkr t || d nd}
t || }||
 }td||D ])}|t d| }||k }t j||
 | |d}t j| ||  |	 | ||d q3d S )Nr   r8   )mask)r   
program_idloadr4   arangestore)
output_ptroutput_stridewrite_indices_ptrwrite_starts_ptrwrite_contents_ptrwrite_cu_lens_ptrra   pidrow_idx	start_idxcu_startcu_endcontent_leniblockrq   contentr   r   r   rd      s   

rd   )NN)collections.abcr   r   	functoolsr   r%   r   r   vllm.triton_utilsr   r   vllm.utils.math_utilsr   vllm.utils.platform_utilsr   vllm.utils.torch_utilsr	   r9   r   r   r   r   r-   rD   rG   jit	constexprrd   r   r   r   r   <module>   s6   

	'i