o
    TiY                     @   s`   d dl Z ddlmZ ddlmZmZ de jdedede jfd	d
ZdedededefddZ	dS )    N   )ShardingType)shard_paramget_shard_endpointsparam
shard_rank
num_shardsreturnc                 C   s   t | tj||ddS )a  
    Utility method for sharding an unembed parameter. We shard unembeddings on the vocab dimension
    with the expectation of an all-gather to produce the full results.

    TODO(cmikeh2): Really ideal would be if MII could have access to the comm and we would do
    an A2A and sharded sampling.

    Args:
        param (torch.Tensor): The parameter to shard. Should be of shape [vocab_size, model_dim]
        shard_rank (int): Which shard of the partitioned tensor to return.
        num_shards (int): The total number of shards the parameter is distributed across.

    Returns:
        torch.Tensor: The sharded parameter of shape [sharded_vocab_size, model_dim]
    r   granularity)r   r   OUTER_DIMENSION)r   r   r    r   q/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/inference/v2/model_implementations/sharding/unembed.pyshard_unembed_param   s   r   
vocab_sizec                 C   s   t | ||dd\}}|| S )aE  
    Utility method for determining the sharded vocab size of a sharded unembed parameter.

    Args:
        vocab_size (int): The size of the vocabulary.
        shard_rank (int): Which shard of the partitioned tensor to return.
        num_shards (int): The total number of shards the parameter is distributed across.
    r   r
   )r   )r   r   r   	start_idxend_idxr   r   r   sharded_unembed_dim   s   	r   )
torchtypesr   utilsr   r   Tensorintr   r   r   r   r   r   <module>   s
   