o
    Ti                     @   s   d dl mZmZ d dlZddlmZmZ efdedededed	eeef f
d
dZ			ddeej	 dedededededed	ej	fddZ
dS )    )OptionalTupleN   )ShardingTypeDEFAULT_SHARD_GRANULARITYdim_size
shard_rank
num_shardsgranularityreturnc           	      C   sf   | | dks
J d| | }|| }|| }|| t || }|| ||k r(dnd }|| || fS )a  
    Given a dimension to shard with size dim_size, return the start and end indices of the slice
    that belong to the given rank.

    The typical use of this is as an internal helper function, so see if there is a higher level
    API that better suits the application.

    Args:
        dim_size (int): The size of the dimension to shard.
        shard_rank (int): The rank of the shard to return.
        num_shards (int): Total number of shards the dimension will be distributed across.
        granularity (int): The minimum alignment of the shard endpoints. This is used to support
            non-even head counts as well as align dimensions to cleaner GEMM boundaries.
    r   z/Dimension size must be divisible by granularityr   )min)	r   r   r	   r
   total_chunksbase_chunks_per_rankremainder_chunksstart_chunk_idend_chunk_id r   o/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/inference/v2/model_implementations/sharding/utils.pyget_shard_endpoints   s   r       param
shard_modenum_concatenated_matrices	bias_dimsc           
         s  k sJ ddu rdS dkrS |t jkr\dtdtjffdd}j|krE|dd	\} tj fd
d|D ddS |dd	\} tj fdd|D ddS |t jkrd }	t	|	\ tj
dd}tj fdd|D ddS dS )aj  
    Utility for sharding a parameter. This will return the slice of the parameter that should
    exist on the given shard_rank given the sharding configuration. The workflow here is
    to find the minimum bounded Tensor to shard, get the slicing endpoints, and then concatenate
    as needed.

    The typical use of this is as an internal helper function, so see if there is a higher level
    API that better suits the application.

    Args:
        param (torch.Tensor): The parameter to shard.
        shard_mode (ShardingType): The type of sharding to apply. See ShardingType for more context.
        shard_rank (int): The rank of the shard to return.
        num_shards (int): Total number of shards the parameter will be distrbuted across.
        num_concatenated_matrices (int): The number of matrices that have been concatenated together in the original
            parameter. An example of this is a fused QKV projection matrix, where the `num_concatenated_matrices`
            argument would be 3.
        granularity (int): The minimum alignment of the shard endpoints. For attention projection matrices, this
            should be set to the head size to support non-even sharding.
        bias_dims (int): The number of dimensions that are considered bias dimensions. This is used to support
            sharding of MoE and non-MoE biases on the same codepath.
    z'Shard rank must be less than num_shardsNr   dim_idxr   c                    s6    |  }t| \}}tj| d||fS )Ndim)sizer   torchchunk)r   r   start_channel_idend_channel_id)r
   r   r	   r   r   r   r   get_matricesU   s   z!shard_param.<locals>.get_matrices)r   c                       g | ]
}|d  f qS .r   .0matr!   r    r   r   
<listcomp>]       zshard_param.<locals>.<listcomp>r   c                    s"   g | ]}|d  ddf qS ).Nr   r&   r)   r   r   r*   b   s   " c                    r$   r%   r   r&   r)   r   r   r*   h   r+   )r   OUTER_DIMENSIONintr   TensorndimcatINNER_DIMENSIONr   r   r   )
r   r   r   r	   r   r
   r   r"   matricesr   r   )r!   r
   r   r	   r   r   r    r   shard_param+   s$   
 

r4   )r   r   r   )typingr   r   r   typesr   r   r.   r   r/   r4   r   r   r   r   <module>   s>   

"