o
    TiN                     @   s\   d dl mZ dd Zdd Zdd Zdd	 Zd
d Zdd ZdddZ	dd Z
dddZdS )    )commc                 C      | a d S N)num_kv_headsnum r   T/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/module_inject/tp_shard.pyset_num_kv_heads
      r
   c                 C   r   r   num_attention_headsr   r   r   r	   set_num_attention_heads   r   r   c                 C   r   r   n_embdr   r   r   r	   
set_n_embd   r   r   c                 C   r   r   )tp_grain_sizer   r   r   r	   set_tp_grain_size   r   r   c                   C   s   dt  v rtS d S )Nr   )globalsr   r   r   r   r	   get_num_kv_heads   s   
r   c                   C      t S r   r   r   r   r   r	   get_num_attention_heads%      r   Nc           	         s   ddg}g d}d} d krt  fdd|D rd}|d kr#t }td krO| t dkrOd	t vrOt |vrO|rOt| |t| k rFd
nd }| | t S | tkrj| t }|| ||| k red
 t S d t S | | || | k rvd
 S d S )Nlm_head	embed_out)	gate_projup_proj	down_projw1w2w3Tc                 3   s    | ]	}|t  v V  qd S r   )str).0snamer   r	   	<genexpr>0   s    z!get_shard_size.<locals>.<genexpr>Fr   mlp   )anydistget_rankr   r!   r   )	
total_sizemp_sizer%   ranklast_linearmoe_mlp_layernot_moe_mlp_layer	my_slices
grain_sizer   r$   r	   get_shard_size*   s(   "&r4   c                   C   r   r   r   r   r   r   r	   
get_n_embdA   r   r5   c                 C   s*   g }t |D ]}|t| ||| q|S r   )rangeappendr4   )r,   r-   r%   shard_sizesir   r   r	   get_shard_size_listF   s   r:   )NNr   )	deepspeedr   r*   r
   r   r   r   r   r   r4   r5   r:   r   r   r   r	   <module>   s   
