o
    TÃiH	  ã                   @   sB   d dl mZmZ 	d
dedededee deeef f
dd	„ZdS )é    )ÚOptionalÚTupleNÚ
shard_rankÚ
num_shardsÚ	n_heads_qÚ
n_heads_kvÚreturnc                 C   s   ||k rt dƒ‚|du s||kr(|| }|| }| |k r$|d |d fS ||fS || dkr2t dƒ‚||k r@|| dkr@t dƒ‚||krN|| dkrNt dƒ‚|| }||krb|| }|| }||fS || }	| |	 }
||	 }||	 }|
|k r||d dfS |dfS )a¯  
    Helper to determine the number of local heads of a given shard.

    Args:
        shard_rank (int): The rank of the shard.
        num_shards (int): The total number of shards that attention is distributed over.
        n_heads_q (int): The number of query heads.
        n_heads_kv (int): The number of key/value heads. If not passed, it is assumed that
            the number of query and key/value heads are the same.
    zCThere must be at least as many attention heads as there are shards.Né   r   z8Must be an even ratio between query and key/value heads.z]If splitting a group across multiple shards, we must be able to distribute the groups evenly.z@If parallelizing groups, must be able to evenly distribute them.)Ú
ValueError)r   r   r   r   Ú
base_headsÚextra_headsÚq_ratioÚlocal_kv_headsÚlocal_q_headsÚgroup_sharding_sizeÚgroup_rank_idx© r   ún/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/inference/v2/model_implementations/sharding/attn.pyÚget_local_heads	   s8   ÿr   )N)Útypingr   r   Úintr   r   r   r   r   Ú<module>   s   ýÿþý
ý