o
    
۾iD	                     @   s   d dl Z d dlmZ d dlmZ dedede jdB fddZdededed	ede	e je jf f
d
dZ
dededB ded	ede	eee jdB f f
ddZdS )    N)get_dp_groupdp_size
num_tokensreturnc                 C   s"   | dkrd S t j| f|t jddS )N   cpudtypedevice)torchfullint32)r   r    r   O/home/ubuntu/.local/lib/python3.10/site-packages/vllm/v1/worker/gpu/dp_utils.pymake_num_tokens_across_dp	   s   r   cudagraph_sizedp_rankc                 C   s^   |dksJ t  j}tjd|tjdd}| |d |< ||d |< tj||d |d |d fS )Nr      r   r   r   )group)r   	cpu_groupr   zerosr   dist
all_reduce)r   r   r   r   r   tensorr   r   r   get_batch_metadata_across_dp   s   r   c                 C   s   |dkr|d urd|d fS d| d fS | dkrd}n|d u rd}t | |||\}}t|dk r3dS t|dk rOt|  }||d d < d||fS tj|dd}t||  }d||fS )Nr   TFr   )Fr   N)min)r   r   allitemintmaxclamp)r   r   r   r   num_tokens_across_dpcudagraph_size_across_dpmax_cudagraph_sizenum_tokens_after_paddingr   r   r   get_cudagraph_and_dp_padding   s(   



r&   )r   torch.distributeddistributedr   vllm.distributed.parallel_stater   r   Tensorr   tupler   boolr&   r   r   r   r   <module>   s4   
