o
    Tie                     @   s  d Z ddlZddlZddlZddlZddlZddlZddlZddlZddl	m
Z
 ddlZddlZddlmZmZmZ ddlmZ ddlZddlmZmZmZmZmZmZ ddlmZmZmZmZmZm Z  d	d
lm!Z! d	dl"m#Z# d	dl$m%Z% d	dl&m'Z' ddl(m)Z) dZ*g dZ+e+e#7 Z+e,ddZ-ej./ddgZ0dZ1dddgiZ2d8ddZ3dd Z4dd Z5dd  Z6d!e7d"ee7ee8 f fd#d$Z9d%ee7 d"ee7ee8 f fd&d'Z:d9d)d*Z;d+d, Z<d-d. Z=d/d0 Z>d1e7d2e?fd3d4Z@d8d5d6ZAeBd7kreA  dS dS ):z
DeepSpeed runner is the main front-end to launching multi-worker
training jobs with DeepSpeed. By default this uses pdsh to parallel
ssh into multiple worker nodes and launch all the necessary processes
per rank for training.
    N)deepcopy)TupleListDict)defaultdict   )
PDSHRunnerOpenMPIRunnerMVAPICHRunnerSlurmRunnerMPICHRunner
IMPIRunner)PDSH_LAUNCHEROPENMPI_LAUNCHERMVAPICH_LAUNCHERSLURM_LAUNCHERMPICH_LAUNCHERIMPI_LAUNCHER   )TORCH_DISTRIBUTED_DEFAULT_PORT)NEBULA_EXPORT_ENVS)logger)	Autotuner)get_acceleratorz/job/hostfile)MLFLOWPYTHONMV2UCXDS_ENV_FILEz.deepspeed_env~.i   AISC_JOB_NAMENCCL_IB_HCAUCX_NET_DEVICESc                 C   s  t jdt jd}|jddttdd |jddtd	d
d |jddtd	dd |jdtddd |jdtddd |jdtddd |jddtddd |jdttdd |jdd	tdd |jddtdd |jdttd d |jd!d	td"d |jd#d$d%d& |jd'd$d(d& |jd)d$d*d& |jd+d$d,d& |jd-d$d.d& |jd/d$d0d& |jd1d$d2d& |jd3d4td5d |jd6d	d7d8gtd9d: |jd;d$d<d& |jd=td>d? |jd@t j	dA |jdBd$dCd& |jdDtd dEd |jdFtd dGd |j
| dHS )INzODeepSpeed runner to help launch distributed multi-node/multi-gpu training jobs.)descriptionformatter_classz-Hz
--hostfileziHostfile path (in MPI style) that defines the resource pool available to the job (e.g., worker-0 slots=4))typedefaulthelpz-iz	--include a  Specify hardware resources to use during execution.
                        String format is
                                NODE_SPEC[@NODE_SPEC ...],
                        where
                                NODE_SPEC=NAME[:SLOT[,SLOT ...]].
                        If :SLOT is omitted, include all slots on that host.
                        Example: -i "worker-0@worker-1:0,2" will use all slots
                        on worker-0 and slots [0, 2] on worker-1.
                        z-ez	--excludeaV  Specify hardware resources to NOT use during execution.
                        Mutually exclusive with --include. Resource formatting
                        is the same as --include.
                        Example: -e "worker-1:0" will use all available
                        resources except slot 0 on worker-1.
                        z--num_nodesz^Total number of worker nodes to run on, this will use the top N hosts from the given hostfile.z--min_elastic_nodeszaMinimum number of nodes to run elastic training on. Default is 1 when elastic training is enabledz--max_elastic_nodesziMaximum number of nodes to run elastic training on. Default is num_nodes when elastic training is enabledz
--num_gpusz--num_acceleratorszLMax number of GPUs to use on each node, will use [0:N) GPU ids on each node.z--master_portzN(optional) Port used by PyTorch distributed for communication during training.)r'   r&   r(   z--master_addrzU(optional) IP address of node 0, will be inferred via 'hostname -I' if not specified.z--node_rankzGID of each node in the range [0:N). Only required when --no_ssh is set.z
--launcherz(optional) choose launcher backend for multi-node training. Options currently include PDSH, OpenMPI, MVAPICH, SLURM, MPICH, IMPI.z--launcher_argszH(optional) pass launcher specific arguments as a single quoted argument.--module
store_truezwChange each process to interpret the launch script as a Python module, executing with the same behavior as 'python -m'.)actionr(   --no_pythonzMSkip prepending the training script with 'python' - just execute it directly.--no_local_rankzNDo not pass local_rank as an argument when calling the user's training script.z--no_sshz=Launch training independently on each node without ssh setup.z--no_ssh_checkz5Do not perform ssh check in multi-node launcher modelz--force_multiz`Force multi-node launcher mode, helps in cases where user wants to launch on single remote node.
--save_pidzSave file containing launcher process id (pid) at /tmp/<main-pid>.ds, where <main-pid> is the pid of the first process that invoked `deepspeed`. Useful when launching deepspeed processes programmatically.z--enable_each_rank_logNonezFredirect the stdout and stderr from each rank into different log filesz--autotuningtunerunzXRun DeepSpeed autotuner to discover optimal configuration parameters before running job.)r'   choicesr&   r(   z--elastic_trainingz-Enable elastic training support in DeepSpeed.user_scriptz:User script to launch, followed by any required arguments.)r&   r(   	user_args)nargs--bind_cores_to_rankz-Bind each rank to different cores of the hostz--bind_core_listzList of cores to bind to with comma separated list of numbers and range. i.e. 1,3-5,7 => [1,3,4,5,7].  When not specified, all cores on system would be used rank bindingz
--ssh_portz&SSH port to use for remote connections)args)argparseArgumentParserArgumentDefaultsHelpFormatteradd_argumentstrDLTS_HOSTFILEintr   r   	REMAINDER
parse_args)r9   parser rD   M/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/launcher/runner.pyrB   0   s   rB   c                 C   sZ   t j| std d S t| d}| }W d    t|S 1 s$w   Y  t|S )NzNUnable to find hostfile, will proceed with training with local resources only.r)ospathisfiler   warningopen	readlines_parse_hostfile)hostfile_pathfdhostfile_textrD   rD   rE   fetch_hostfile   s   


rQ   c                 C   s   d}t  }| D ]L}| }t||}|ds|dkrq|rE|d}t|d}||v r@t	d|   t
d| d|||< qt	d|   t
d	| dt|d
krgt	d|   t
d|S )Nz^(\S+)\s+slots=(\d+)#r)   r   r   zBad hostfile text: z'Hostfile contains multiple entries for z", unable to proceed with launchingzHostfile contains a bad entry: r   zOHostfile is empty or not formatted correctly, unable to proceed with launching.)collectionsOrderedDictstripresearch
startswithgroupr@   r   error
ValueErrorlen)hostfile_linespatternresource_poollinematchhost	num_slotsrD   rD   rE   rM      s(   

rM   c                 C   s$   g }| D ]}||vr| | q|S N)append)datanew_listxrD   rD   rE   _stable_remove_duplicates  s   
ri   node_configreturnc                 C   sB   d}d}|| vr| g fS |  |\}}dd | |D }||fS )N:,c                 S   s   g | ]}t |qS rD   )r@   ).0rh   rD   rD   rE   
<listcomp>      z%parse_node_config.<locals>.<listcomp>)split)rj   SLOT_LIST_STARTSLOT_SEPhostnameslotsrD   rD   rE   parse_node_config  s   rv   node_config_listc                 C   sJ   d}t t}| |D ]}t|\}}||  |7  < qdd | D S )N@c                 S   s"   i | ]\}}|t tt|qS rD   )sortedlistset)rn   kvrD   rD   rE   
<dictcomp>"  s   " z*parse_node_config_list.<locals>.<dictcomp>)r   rz   rq   rv   items)rw   NODE_SEPnode_configsrj   rt   ru   rD   rD   rE   parse_node_config_list  s   r   r)   c                 C   s  |dkr|dkrt d|dkr|dkr| S t }|r|}|dkr't| }|}t| D ]h\}}t|dkrz|| vrCt d| d|D ]}|| | vrXt d| d| dqE|r`|||< q-|ry|D ]}td	| d
|  || | qdq-|| vrt d| d|r| | ||< q-|rg ||< q-g }|D ]}t	|| ||< t|| dkr|
| q|D ]}	||	= qt }
| D ]}||v r|| |
|< q|
S )a  Parse an inclusion or exclusion string and filter a hostfile dictionary.

    String format is NODE_SPEC[@NODE_SPEC ...], where
        NODE_SPEC = NAME[:SLOT[,SLOT ...]].
    If :SLOT is omitted, include/exclude all slots on that host.

    Examples:
        include_str="worker-0@worker-1:0,2" will use all slots on worker-0 and
          slots [0, 2] on worker-1.
        exclude_str="worker-1:0" will use all available resources except
          slot 0 on worker-1.
    r)   z3include_str and exclude_str are mutually exclusive.r   z
Hostname 'z' not found in hostfilez	No slot 'z' specified on host ''z	removing z from )r[   dictr   r   r   r\   r   inforemoveri   re   rS   rT   )	host_infoinclude_strexclude_strfiltered_hosts	parse_strrt   ru   slotdel_keysnameordered_hostsrb   rD   rD   rE   parse_resource_filter%  s\   

r   c                 C   sP   t  }t|}|  D ]\}}||v r|| ntt|||< qt|||dS )N)r   r   )rS   rT   r   r   rz   ranger   )r_   	inclusion	exclusionactive_resourcesr   rt   ru   rD   rD   rE   parse_inclusion_exclusionv  s
   "r   c                 C   s$   t | d}t|d}|S )Nutf-8)jsondumpsencodebase64urlsafe_b64encodedecode)
world_infoworld_info_jsonworld_info_base64rD   rD   rE   encode_world_info  s   r   c                 C   sP   t | |}td |  |  td |  | jdkr&|  d S d S )Nz[Start] Running autotuningz[End] Running autotuningr3   )r   r   r   r2   print_tuning_resultswrite_optimal_config
autotuningrun_after_tuning)r9   r   tunerrD   rD   rE   run_autotuning  s   



r   str_num_nodeselastic_trainingc                 C   s   |  d}t|dkrt|d d}}||fS t|dkr1|r1t|d t|d }}||fS t|dkr=|s=tdtd| )Nrl   r   r   r*   r   z4MIN:MAX format is only supported in elastic trainingz%num_nodes {} is not in MIN:MAX format)rq   r\   r@   RuntimeErrorformat)r   r   	node_list	min_nodes	max_nodesrD   rD   rE   parse_num_nodes  s   
r   c                    s  t | } | jr| jdksJ dt| j}t  d }tj	|d}|s^t
|r^d| }t
| jsBt
| jsB| jdksB| jdkrJt| d nd| | _t| d| j  tj|= | jdksh| jdkrv| jdksr| jdkrvtd	d
}|si }t  }|dkrtd||d< d| _d}|s| jdkrtdt|| j| j}tj  |r| js| jst| d }z&d| jd urd| j dnd | d }	t|	}
tj|
tjtjd W n tjy   td| j d| d| j dw | jsb|sJ t| d }d}	| jd ur|	d| j 7 }	|	d| d7 }	t|	}zt |W n tjyC } zt!"d |d }~ww #d d | _| jsVtdt!$d| j d |  | j%dkrot&| | d S | jdkrt'( }t)| D ]\}}| j|kr n|| ||< q|}| jdkrt'( }| D ]}tt*| j||< q|}| jr| j+rJ d!| jrd| j,  krt
|k sJ d" J d"t-|}| j.st
|dko| j }|st/j0d#d$d%d&| d'| j d(| j1 g}| jr|2d)| j,  | j3r|2d* | j4r#|2d+ | j+r,|2d, | j5r9|d-t6  g7 }| j7rF|2d.| j7  | jra|2d/ |2d0| j8  |2d1| j9  | j:rj|2d2 | j;d ury|2d3| j;  || j<g | j= }n(| j>? | _>| j>t@krtA| |}nI| j>tBkrtC| ||}n<| j>tDkrtE| ||}n/| j>tFkrtG| ||}n"| j>tHkrtI| ||}n| j>tJkrtK| ||}ntLd4| j> |M std5| j> d6tjNOd7}d8 v r|d9  d8   d8< n| d8< g }tPQ D ]\}}|  v r||7 }qtRt S  }  D ]#tTfd:d;|D rKtTfd<d;|D sK|U   q)tVD ]F}tjNW|tX}tjNY|rt!$d=|  tZ|d>}|[ D ]jd?dd@\}}|U|| qqW d    n	1 sw   Y  qO| j>t@kr|\ |\} n|\ |}t!$dAdW|  tj]| dB fdCdD}| j>t@kr|rt^^t^j_| t^^t^j`| a  jbdkrt/cjb d S d S )ENr)   z8Master Addr is required when elastic training is enabledr   zDetected VISIBLE_DEVICES=r   z but ignoring it because one or several of --include/--exclude/--num_gpus/--num_nodes cl args were used. If you want to use CUDA_VISIBLE_DEVICES don't pass any of these arguments to deepspeed.z
localhost:z: setting --include=z2Cannot specify num_nodes/gpus with include/excludeTz-Unable to proceed, no GPU resources available	localhostz	127.0.0.1Fz9Num nodes is >1 but no extra nodes available via hostfilez!ssh -o PasswordAuthentication=no z-p  z	 hostname)stderrstdoutzUsing hostfile at z
 but host=zP was not reachable via ssh. If you are running with a single node please remove z or setup passwordless ssh.zssh z -p z hostname -IziUnable to detect suitable master address via 'hostname -I', please manually specify one via --master_addrr   ziUnable to detect suitable master address via `hostname -I`, please manually specify one via --master_addrzUsing IP address of z
 for node z=--no_local_rank argument is not supported in Elastic trainingzELaunching training without ssh, but --node_rank is not set correctly.z-uz-mzdeepspeed.launcher.launchz--world_info=z--master_addr=z--master_port=z--node_rank=r.   r+   r/   r0   z--enable_each_rank_log=z--enable_elastic_trainingz--max_elastic_nodes=z--min_elastic_nodes=r8   z--bind_core_list=zUnknown launcher z
launcher 'z' not installed.r    
PYTHONPATHrl   c                    s   g | ]}  |qS rD   )rX   rn   r   varrD   rE   ro   K  s    zmain.<locals>.<listcomp>c                    s   g | ]} |kqS rD   rD   r   r   rD   rE   ro   L  rp   zdeepspeed_env file = rF   =)maxsplitzcmd = envc                    sP    tj td  tj tj d}|  td t	
d d S )Ng?r   r   )send_signalsignalSIGINTtimesleepSIGTERM
subprocessPopenwaitsysexit)signumframeresult_kill)r   kill_cmdresultrD   rE   sigkill_handler`  s   

zmain.<locals>.sigkill_handler)drB   r   master_addrrQ   hostfiler   visible_devices_envsrG   environgetr\   includeexclude	num_nodesnum_gpusprintr[   device_countr   r   copyno_ssh_checkno_sshrz   keysssh_portshlexrq   r   
check_callDEVNULLCalledProcessErrorcheck_outputr   rZ   r   r   r   r   rS   rT   	enumerater   no_local_rank	node_rankr   force_multir   
executablemaster_portre   	no_pythonmodulesave_pidgetpidenable_each_rank_logmax_elastic_nodesmin_elastic_nodesbind_cores_to_rankbind_core_listr5   r6   launcherlowerr   r   r   r	   r   r   r   r   r   r
   r   r   NotImplementedErrorbackend_existsrH   abspathEXCLUDE_ENVSr   EXPORT_ENVSexport_envsany
add_exportDEEPSPEED_ENVIRONMENT_PATHSjoinDEEPSPEED_ENVIRONMENT_NAMErI   rK   rL   get_cmdr   r   r   r   r   
returncoder   )r9   r_   visible_devices_envvisible_devicesdetected_strmulti_node_execr   r   
first_hostssh_check_cmdsafe_ssh_cmdhostname_cmderrupdated_active_resourcescountrt   r   deepspeed_launchcmdrunner	curr_pathexcluded_varsexclude_keyvar_listexportsenviron_pathenviron_filerO   keyvalr   rD   )r   r   r   r   rE   main  s`  

(












	r  __main__rd   )r)   r)   )C__doc__rG   rV   r   r   r   r:   r   rS   r   r   r   r   typingr   r   r   r   r   multinode_runnerr   r	   r
   r   r   r   	constantsr   r   r   r   r   r   r   nebula.constantsr   utilsr   r   r   deepspeed.acceleratorr   r?   r   getenvr   rH   
expanduserr   PDSH_MAX_FAN_OUTr   rB   rQ   rM   ri   r>   r@   rv   r   r   r   r   r   boolr   r  __name__rD   rD   rD   rE   <module>   s\     
 & 
"
Q

 T
