o
    TiC                     @   s  d dl Z d dlZd dlZd dlZd dlmZ ddlm	Z	m
Z
mZmZ ddlmZmZmZmZmZmZ ddlmZ ddlmZ g dZd	d
 Zdd Zdd Z			d ddZ					d!ddZdefddZde fddZ!de fddZ"d"de defddZ#dS )#    N)version   )ElasticityConfigElasticityConfigErrorElasticityErrorElasticityIncompatibleWorldSize)
ELASTICITYENABLEDENABLED_DEFAULTLATEST_ELASTICITY_VERSIONMINIMUM_DEEPSPEED_VERSIONDEEPSPEED_ELASTICITY_CONFIG   )logger)&r   r               $   0   <   x         ih  i  iH  i  i  i	  i  i  i`'  i;  iN  ipb  iHl  i0  i  i  iD i  i i@b i: i` i iu i&
 iP
 c                 C   st   g }| D ]%}||kr| | q|| }ttt|k}| t|d  |  qtt|}td|  |S )Nr   zCandidate batch size: )	appendnpargmaxasarrayHCN_LISTlistsetr   info)	base_listmax_acceptable_batch_sizecandidate_batch_sizebasevalueindex r(   S/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/elasticity/elasticity.pyget_candidate_batch_sizes   s   r*   c                 C   s   g }|D ]?}| | dkrC| | }||  kr|kr!n n| | td|d d D ]}||kr2 n||k r7q*|| dkrB| | q*qt|}tt|}|S )Nr   r   r   )r   ranger    sortedr   )
batch_sizemicro_batchesmin_valid_gpusmax_valid_gpus
valid_gpusmicro_batchmax_gpusir(   r(   r)   get_valid_gpus)   s"   

r5   c           
      C   st   d}d }t t|}| D ])}t||||}	t|	|ks-t|	|kr5|r'||ks-|s5||k r5t|	}|	}|}q||fS Nr   )intminr5   len)
candidate_batch_sizesr.   min_gpusr3   prefer_largerr0   r1   final_batch_sizer-   current_valid_gpusr(   r(   r)   get_best_candidates?   s"   r?   Tc           
         s   |pd}|p t |  }t fdd| D std  tj| }g }||  || t| }t	|| |||\}}	||	fS )a*  We use two heuristics to compute the batch size
        1. We use the Lowest Common Multiple of the micro-batches
    as the base batch size and scale it by a HCN such that the result is
    the largest batch size less than the max_acceptable batch size
        2. We use each of the micro batches as a base and scale it
    by a HCN such that the result is the largest batch size less than the
    max_acceptable batch size.

    We then use brute force to count the number of compatible GPU count for
    each of the aforementioned cases, and return the batch size with the most number of
    compatible GPU counts in the min-max GPU range if provided, other wise
    we return the batch size with the most number of total compatible GPU counts.

    Returns:
        final_batch_size
        valid_gpus
    r   c                 3   s    | ]}| kV  qd S )Nr(   ).0mbr#   r(   r)   	<genexpr>l   s    z+_get_compatible_gpus_v01.<locals>.<genexpr>zWAll micro batches must be less than             or equal to max_acceptable_batch_size: )
r8   all
ValueErrorr   lcmreduceextendr   r*   r?   )
r.   r#   r;   r3   r<   rF   r"   r:   r=   r1   r(   rB   r)   _get_compatible_gpus_v01S   s   



rI   c                    s  || dkrt d| d|  fdd}|| tt| t|| t|| d\}	}
t|	 }	fdd|
D } | |v rQ||	}|	||fS  |  }g }D ]}|| }t|t| }|||  q[d	}r{t|}nt|}||}|t|g|fS )
zW
    Returns:
        final_batch_size
        valid_gpus
        micro-batch size
    r   z,In Elasticity v0.2, number of GPUs per node:z, should be divisible by model parallel size c                    s>   d }D ]}|   | dkr|d u r|}r||k r|}q|S r6   r(   )r=   candidate_microbatchr2   )current_num_gpusr.   r<   r(   r)   get_microbatch   s   z0_get_compatible_gpus_v02.<locals>.get_microbatch)r<   c                    s   g | ]}|  qS r(   r(   )r@   r4   )dp_size_per_noder(   r)   
<listcomp>   s    z,_get_compatible_gpus_v02.<locals>.<listcomp>N)	r   rI   r7   mathfloorfloatr   maxr8   )r.   r#   rK   r;   r3   r<   num_gpus_per_nodemodel_parallel_sizerL   r=   valid_world_sizevalid_dp_world_sizerJ   current_dp_sizer:   r2   min_batch_sizefactorused_microbatchr$   r(   )rK   rM   r.   r<   r)   _get_compatible_gpus_v02~   sD   





r[   target_deepspeed_versionc                 C   s:   t t}t | }d|  dt d}||k rt|dS )NzTarget deepspeed version of z( is not compatible with minimum version z supporting elasticity.T)pkg_versionparser   r   )r\   min_versiontarget_versionerr_strr(   r(   r)   _compatible_ds_version_check   s   

rb   	ds_configc                 C   s   t | vrdS | t  ttS )NF)r   getr	   r
   )rc   r(   r(   r)   elasticity_enabled   s   re   runtime_elastic_config_dictc                 C   s   t tjv rOttjt  }t|}t| }d}|j|jkr)t|d|jd|j|j	|j	kr;t|d|j	d|j	|j
|j
krMt|d|j
d|j
dS td dS )z[
    Ensure the resource scheduler saw the same elastic config we are using at runtime
    z_Elastic config '{}={}' seen by resource scheduler does not match config passed to runtime {}={}r#   r.   r   zUnable to find DEEPSPEED_ELASTICITY_CONFIG environment variable, cannot guarantee resource scheduler will scale this job using compatible GPU counts.N)r   osenvironjsonloadsr   r#   r   formatr.   r   r   warning)rf   scheduler_elastic_config_dictscheduler_elastic_configruntime_elastic_configra   r(   r(   r)   ensure_immutable_elastic_config   s.   

rp   Fc              
   C   s  t | tstdt|  d|  t| vrtdt d| t }|tts+tdt	|}|j
}|j}|dkrKt|jdkrKtd|j d	| t|jtkr]td
|j dt t|sktd| dt t|jdkrt|j|j|j|j|jd\}}	t|}nOt|jdkr|dkr|}
n dtjv rtd rttd}
ntd}td| t|j|j|
|j|j|j||d\}}	}t|}ntd|j t d|	  |dkr'||	vrt!d| d|	 d}t"t#t$|jddD ]}|| | dkr|} nq|dus"J d| d| d|j d||	|fS |rnt|jdkr7||	|fS d}t"t#t$|jddD ]}|| | dkrS|} nqD|dusiJ d| d| d|j d||	|fS ||	fS )a  Core deepspeed elasticity API. Given an elastic config (similar to the example below)
    DeepSpeed will compute a total train batch size corresponding valid GPU count list that
    provides a high level of elasticity. Elasticity in this case means we are safe to scale
    the training job up/down across the GPU count list *without* any negative impacts on
    training convergence. This is achievable primarily due to DeepSpeed's gradient accumulation
    feature which allows us to decompose a global training batch size into:
    micro-batch-size * gradient-accumulation-steps * world-size.

    "elasticity": {
        "enabled": true,
        "max_train_batch_size": 2000,
        "micro_batch_sizes": [2,4,6],
        "min_gpus": 1,
        "max_gpus" : 10000
        "min_time": 20
        "version": 0.1
    }

    Intended to be called both by scheduling infrastructure and deepspeed runtime.
    For the same `ds_config` we should return deterministic results.

    Args:
        ds_config (dict): DeepSpeed config dictionary/json
        target_deepspeed_version (str): When called from scheduling
            infrastructure we want to ensure that the target deepspeed version is
            compatible with the elasticity version used in the backend.
        world_size (int, optional): Intended/current DP world size, will do some sanity
            checks to ensure world size is actually valid with the config.
        return_microbatch (bool, optional): whether to return micro batch size or not.

    Raises:
        ElasticityConfigError: Missing required elasticity config or elasticity disabled
        ElasticityError: If target deepspeed version is not compatible with current version

    Returns:
        final_batch_size (int): total batch size used for training
        valid_gpus (list(int)): list of valid GPU counts with this config
        micro_batch_size (int, optional): if world_size is provided will return
            specific micro batch size
    z5Expected ds_config to be a dictionary but received a z, containing: 'zP' is missing from config json, please add it if running an elastic training job.z]Elasticity is disabled, please enable it ('enabled':true) if running an elastic training job.r   g?zElasticity VzF does not support model-parallel training. Given model-parallel size: z%Attempting to run elasticity version z! but runtime only supports up to z8Unable to run elasticity on target deepspeed version of z, currently g?)r.   r#   r;   r3   r<   r   
WORLD_SIZEzElasticity V 0.2 needs WORLD_SIZE to compute valid batch size. Either give it as argument to function compute_elastic_config or set it as an environment variable. Value of WORLD_SIZE as environment variable is )r.   r#   rK   r;   r3   r<   rS   rT   z*Unable to find elastic logic for version: z/Valid World Size (GPUs / Model Parallel Size): zWorld size (z:) is not valid with the current list of valid GPU counts: NT)reversez5Unable to find divisible micro batch size world_size=z, final_batch_size=z, and  micro_batches=.)%
isinstancedictrE   typer   r   rd   r	   r
   r   rT   rS   rQ   r   r   rb   r   __version__rI   r.   r#   r;   r3   prefer_larger_batch_sizer7   rg   rh   getenv	isnumericr[   NotImplementedErrorr   r!   r   r,   r   r    )rc   r\   
world_sizereturn_microbatchelastic_config_dictelastic_configrT   rS   r=   r1   rK   rr   candidate_microbatch_sizemicro_batch_sizembszr(   r(   r)   compute_elastic_config   s   
)












r   )NNT)NNTr   r   )r   F)$rg   ri   numpyr   rO   	packagingr   r]   configr   r   r   r   	constantsr   r	   r
   r   r   r   git_version_inforx   utilsr   r   r*   r5   r?   rI   r[   strrb   rv   re   rp   r   r(   r(   r(   r)   <module>   s6    
.
A