o
    i\6                     @   s(   d dl mZ d dlZG dd deZdS )    )ListNc                   @   s  e Zd ZdZ			dWddddddddddddd	d
Zdd Zdd Zedd Zej	de
fddZedd Zedd Zedd Zedd Zedd Zedd Zedd  Zed!d" Zed#d$ Zed%d& Zed'd( Zed)d* Zed+d, Zed-d. Zd/d0 Zde
fd1d2Zde
fd3d4Zd5d6 Zd7d8 Zd9d: Zd;d< Zd=d> Z d?d@ Z!dAdB Z"dCdD Z#dEdF Z$dGdH Z%dIdJ Z&dKe
dLe'e
 fdMdNZ(dOe
dLe'e
 fdPdQZ)e*dRe+fdSdTZ,dUdV Z-dS )XMappinga  
    A node with 8 GPUs, tp_size = 4, cp_size = 1, pp_size = 2

    2 tp groups:

    - [0, 1, 2, 3]
    - [4, 5, 6, 7]

    4 pp groups:

    - [0, 4]
    - [1, 5]
    - [2, 6]
    - [3, 7]

    A node with 8 GPUs, tp_size = 4, cp_size = 2, pp_size = 1

    2 tp groups:

    - [0, 1, 2, 3]
    - [4, 5, 6, 7]

    4 cp groups:

    - [0, 4]
    - [1, 5]
    - [2, 6]
    - [3, 7]

    A node with 8 GPUs, moe_tp_size = 2, moe_ep_size = 4

    4 moe_tp groups:

    - [0, 4]
    - [1, 5]
    - [2, 6]
    - [3, 7]

    2 moe_ep groups:

    - [0, 1, 2, 3]
    - [4, 5, 6, 7]

    2 nodes with 16 GPUs, moe_tp_size = 2, moe_ep_size = 4, pp_size = 2

    8 moe_tp groups:

    - [0 4]
    - [1 5]
    - [2 6]
    - [3 7]
    - [8 12]
    - [9 13]
    - [10 14]
    - [11 15]

    4 moe_ep groups:

    - [0, 1, 2, 3]
    - [4, 5, 6, 7]
    - [8, 9, 10, 11]
    - [12, 13, 14, 15]

    8 pp groups:

    - [0 8]
    - [1 9]
    - [2 10]
    - [3 11]
    - [4 12]
    - [5 13]
    - [6 14]
    - [7 15]

    2 nodes with 8 GPUs, tp_size 2, pp_size 2, cp_size 2

    4 tp groups:
    - [0, 1]
    - [2, 3]
    - [4, 5]
    - [6, 7]

    4 pp groups:
    - [0, 4]
    - [1, 5]
    - [2, 6]
    - [3, 7]

    4 cp groups:
    - [0, 2]
    - [1, 3]
    - [4, 6]
    - [5, 7]
       r      NF)cp_size	cp_configtp_sizepp_sizemoe_cluster_sizemoe_tp_sizemoe_ep_sizeattn_tp_sizeattn_cp_sizeauto_parallelenable_attention_dpc             
   C   sd  |dkrd}|	dkr|
dkr|| }	d}
n|	dkr ||
|  }	n
|
dkr*||	|  }
|dkr9|dkr9|| }d}n|dkrD|| | }n
|dkrN|| | }|dkr]t d| d| d|rz|dksk|dksk|dkryt d| d| d| dn|| | |krt d| d| d	| d	| d	|	|
 }|| }||krt d
| d|	 d	|
 d	| || }||| krt d| d	| d| d	| |
dkr|dkrtd|| _|| _|d ur|ni | _|| _|	| _|
| _|| _|| _	|| _
|| _|| _|| _|| _|| _g | _g | _g | _g | _g | _g | _|dkr#|
dks#J t|| D ]}t|||| }| jt| q)t|D ]'}t|D ]}t|| | | |d | | | |}| jt| qGqAt|D ]*}t|D ]"}t|| | ||  || | |d |  }| jt| qsqmt|D ]%}t||
 D ]}t|| | |d | ||
 }| jt| qqt|D ]*}t|	D ]"}t|| || |
  || |d | |
  }| jt| q̐qt|D ]:}t|	D ]2}t|D ]*}t|| || |
  ||
  || || |
  |d |
  }| jt| qqqd S )Nr   r   z(attn_cp_size must be 1 for now, but got z, .zLWhen auto parallel is enabled, tp_size, pp_size, cp_size must be 1, but got z>world_size must equal to tp_size * pp_size * cp_size, but got z != z * zLtp_size must equal to moe_tp_size * moe_ep_size * moe_cluster_size, but got zEtp_size * cp_size must equal to attn_tp_size * attn_cp_size, but got zCP don't support MoE tp/ep yet)
ValueErrorNotImplementedErrorr	   r   r   r
   r   r   r   r   r   r   
world_sizer   rankgpus_per_node	pp_groups	cp_groups	tp_groupsmoe_cluster_groupsmoe_tp_groupsmoe_ep_groupsrangeappendlist)selfr   r   r   r   r   r	   r
   r   r   r   r   r   r   r   moe_tp_ep_sizemoe_tp_cluster_ep_sizeattn_tp_cp_sizeiranksjk r)   M/home/ubuntu/vllm_env/lib/python3.10/site-packages/flashinfer/comm/mapping.py__init__u   s   
"

		


zMapping.__init__c                 C   s   t |tstS | j|jkoN| j|jkoN| j|jkoN| j|jkoN| j|jkoN| j|jkoN| j	|j	koN| j
|j
koN| j|jkoN| j|jkoN| j|jkoN| j|jkS N)
isinstancer   NotImplementedr   r   r   r   r	   r   r
   r   r   r   r   r   )r!   otherr)   r)   r*   __eq__  s2   









	


zMapping.__eq__c                 C   s8   t | j| j| j| j| j| j| j| j| j	| j
| j| jfS r,   )hashr   r   r   r   r	   r
   r   r   r   r   r   r   r!   r)   r)   r*   __hash__$  s   zMapping.__hash__c                 C   s   | j S r,   )_rankr2   r)   r)   r*   r   6  s   zMapping.rankr   c                 C   sH   | j st|tr|dk r|| jkrtd| jd  d| d|| _d S )Nr   z(Rank should be an integer between 0 and r   z
, but got r   )r   r-   intr   r   r4   r!   r   r)   r)   r*   r   :  s   
c                 C   s   | j rdS | j| j S Nr   )r   r   r	   r2   r)   r)   r*   tp_rankD  s   zMapping.tp_rankc                 C   s   | j rdS | j| j| j  S r7   r   r   r	   r   r2   r)   r)   r*   pp_rankH  s   zMapping.pp_rankc                 C   s"   | j rdS | j| j| j  | j S r7   r9   r2   r)   r)   r*   cp_rankL  s
   zMapping.cp_rankc                 C   s   | j | j| j  S r,   )r8   r   r   r2   r)   r)   r*   moe_tp_rankT  s   zMapping.moe_tp_rankc                 C      | j | j S r,   )r8   r   r2   r)   r)   r*   moe_cluster_rankX     zMapping.moe_cluster_rankc                 C   r=   r,   )r8   r   r2   r)   r)   r*   moe_ep_rank\  r?   zMapping.moe_ep_rankc                 C      | j | j| j | j  S r,   )r   r:   r   r;   r2   r)   r)   r*   tp_group`     zMapping.tp_groupc                 C   rA   r,   )r   r;   r	   r8   r2   r)   r)   r*   pp_groupd  rC   zMapping.pp_groupc                 C   rA   r,   )r   r:   r	   r8   r2   r)   r)   r*   cp_grouph  rC   zMapping.cp_groupc                 C   *   | j | j| j | j | j| j  | j  S r,   )r   r:   r   r   r>   r@   r2   r)   r)   r*   moe_tp_groupl     
zMapping.moe_tp_groupc                 C   rA   r,   )r   r:   r   r<   r2   r)   r)   r*   moe_cluster_groupt  s   zMapping.moe_cluster_groupc                 C   rF   r,   )r   r:   r   r   r<   r>   r2   r)   r)   r*   moe_ep_groupz  rH   zMapping.moe_ep_groupc                 C   s   | j | j S r,   r   r   r2   r)   r)   r*   	node_rank  r?   zMapping.node_rankc                 C   r=   r,   rK   r2   r)   r)   r*   
local_rank  r?   zMapping.local_rankc                 C   
   | j dkS Nr   )r   r2   r)   r)   r*   has_cp     
zMapping.has_cpc                 C   s
   || j  S r,   r   r6   r)   r)   r*   get_node_rank  rQ   zMapping.get_node_rankc                 C   s
   || j  S r,   rR   r6   r)   r)   r*   get_local_rank  rQ   zMapping.get_local_rankc                 C   s   | j | jkS r,   )r   r   r2   r)   r)   r*   is_multi_node  s   zMapping.is_multi_nodec                 C   rN   rO   )r	   r2   r)   r)   r*   has_tp  rQ   zMapping.has_tpc                 C      | j | jd kS rO   r:   r
   r2   r)   r)   r*   is_last_pp_rank     zMapping.is_last_pp_rankc                 C   rW   )N   rX   r2   r)   r)   r*   is_second_last_pp_rank  rZ   zMapping.is_second_last_pp_rankc                 C   s
   | j dkS r7   )r:   r2   r)   r)   r*   is_first_pp_rank  rQ   zMapping.is_first_pp_rankc                 C   rN   rO   )r
   r2   r)   r)   r*   has_pp  rQ   zMapping.has_ppc                 C   s(   | j | j| j  }|dk r|| j }|S r7   r   r	   r   r   r!   pr)   r)   r*   prev_pp_rank  s   
zMapping.prev_pp_rankc                 C   s*   | j | j| j  }|| jkr|| j }|S r,   r_   r`   r)   r)   r*   next_pp_rank  s   

zMapping.next_pp_rankc                 C   rN   rO   )r   r2   r)   r)   r*   has_moe_cluster  rQ   zMapping.has_moe_clusterc                 C   rN   rO   )r   r2   r)   r)   r*   
has_moe_tp  rQ   zMapping.has_moe_tpc                 C   rN   rO   )r   r2   r)   r)   r*   
has_moe_ep  rQ   zMapping.has_moe_ep
num_layersreturnc                 C   s   t t || j| j  S r,   )torchtensor_splitaranger
   r:   tolist)r!   rg   r)   r)   r*   	pp_layers  s
   zMapping.pp_layersnum_expertsc                 C   s:   | j dksJ || j }t| j| | jd | }t|S rO   )r   r   r   r@   r    )r!   rn   experts_per_rankexperts_ranger)   r)   r*   
ep_experts  s   
zMapping.ep_expertsmappingc                 C   s   | di |S )Nr)   r)   )clsrr   r)   r)   r*   	from_dict  s   zMapping.from_dictc                 C   s6   | j | j| j| j| j| j| j| j| j| j	| j
| jdS )Nr   r   r   r   r	   r
   r   r   r   r   r   r   ru   r2   r)   r)   r*   to_dict  s   zMapping.to_dict)r   r   r   ).__name__
__module____qualname____doc__r+   r0   r3   propertyr   setterr5   r8   r:   r;   r<   r>   r@   rB   rD   rE   rG   rI   rJ   rL   rM   rP   rS   rT   rU   rV   rY   r\   r]   r^   rb   rc   rd   re   rf   r   rm   rq   classmethoddictrt   rv   r)   r)   r)   r*   r      s    a 
	













	r   )typingr   ri   objectr   r)   r)   r)   r*   <module>   s   