o
    i2                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dl
mZmZ d dlmZ dd Zd
ddZed	krFe  dS dS )    N)Path)str2boolstr_or_none)get_commandline_argsc                  C   s   t jdt jd} | jdddd | jddd	d | jd
ddd | jdtddd |  }|jdtddd |jdtd dd | jdtddd | jdtddd | jdtd dd | jdtd dd | jdtd d!d | jd"td#d$ | S )%Nz5Launch distributed process with appropriate options. )descriptionformatter_classz--cmdz>The path of cmd script of Kaldi: run.pl. queue.pl, or slurm.plzutils/run.pl)helpdefaultz--logz The path of log file used by cmdzrun.logz--max_num_log_filesz*The maximum number of log-files to be kepti  --ngpu   zThe number of GPUs per node)typer	   r   --num_nodeszThe number of nodesz--hosta/  Directly specify the host names.  The job are submitted via SSH. Multiple host names can be specified by splitting by comma. e.g. host1,host2 You can also the device id after the host name with ':'. e.g. host1:0:2:3,host2:0:2. If the device ids are specified in this way, the value of --ngpu is ignored.z	--envfilezpath.shz_Source the shell script before executing command. This option is used when --host is specified.--multiprocessing_distributedTz1Distributed method is used when single-node mode.z--master_portzLSpecify the port number of masterMaster is a host machine has RANK0 process.z--master_addrzLSpecify the address s of master. Master is a host machine has RANK0 process.z--init_file_prefixz.dist_init_zThe file name prefix for init_file, which is used for 'Shared-file system initialization'. This option is used when --port is not specifiedargs+)r   nargs)	argparseArgumentParserArgumentDefaultsHelpFormatteradd_argumentintadd_mutually_exclusive_groupstrr   r   )parseregroup r   F/home/ubuntu/.local/lib/python3.10/site-packages/espnet2/bin/launch.py
get_parser   s|   
r   c                 C   sX  d}t jt j|d t t  t }|| }t|j	|_	|j
d u r8t|j	d d u r8td|j	d  |j
d u rE|jdkrEd }nP|jd u rk|jtt  }t| }t|jjddd dd	| g}n*d
t|jg}|jd ur|d|jg7 }n|j
d ur|d|j
dd dd g7 }t|jd ddD ]P}|dkrt|j}|j|jd |j  }n#t|j}	|	j|	jd|  |	j  }|	j|	jd|d   |	j  }| r||jd kr|  qt || qg }
|j
d urg }g }|j
dD ]0}|d}|d }t!|dkrdd |dd  D }nt"t|j#}|$| |$| qt%dd |D }t t!| d| d |j&d urUd|j& }nd}|jdkrst|jjjddd t|jj'ddd}nd }d}t(||D ]q\}}t!|dkrdnd}t!|dkr|ndg}|D ]R}|j)dt|d d!d"t|d#t|d$t|g
 | } |dkr| d%d&g7 } d't*+  d(| d(d),d*d | D  d+}t-j.d,|d-|g||d.}|
$| |d7 }qq|n|jdkr;|j#dkr|j/rt d/|j# d0 n
t d/|j# d1 |j	d2t|j#|jg |j) dt|j#d t|j/g } t-.| }|
$| nt|j	d j0d3krJtd4t|j	d j0d5krYtd6t|j	d j0d7krt |j d8|j# d9 |j	d2t|j#d:tt1|j#dd;t|j|jd<d=g	 |j) dt|j#d d>d?d@g | } |j#dkr| d%d&g7 } t-.| }|
$| nRt |j d8|j# dA |j	d2t|j#d:tt1|j#dd;t|j|jdBdCt|jg
 |j) dt|j#d d>d?dDg | } |j#dkr| d%d&g7 } t-.| }|
$| t dE|j  dF}t2dGd |
D rZ|
D ]3}|r-|j3d ur-|4  qz|5dH W n t-j6y?   Y nw |j3d urN|j3dkrNd}qt2dGd |
D s|
D ]M}|j3dkrt7t-j8|j3| dIt9j:dJ t|j}| r|' }t"|}W d    n	1 sw   Y  tdK|j dLd,|dMd   tq\d S )NNz>%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s)levelformatr   zDThe first args of --cmd should be a script path. e.g. utils/run.pl: r   T)exist_okparentsz--dist_init_methodzfile://z--dist_master_portz--dist_master_addr,:z.1.c                 S   s   g | ]}t |qS r   )r   .0xr   r   r   
<listcomp>   s    zmain.<locals>.<listcomp>c                 s   s    | ]
}t t|d V  qdS )r   N)maxlenr&   r   r   r   	<genexpr>   s    zmain.<locals>.<genexpr>znodes with world_size=z via SSHzsource  -)r!   r    wzutf-8)encodingnoner
   r   falsez--local_rankz--dist_rankz--dist_world_sizez--dist_backendglooz<< EOF
set -euo pipefail
cd 
 c                 S   s    g | ]}t |d kr|ndqS )r   z'')r+   )r'   cr   r   r   r)      s     z
EOF
sshbash)stdoutstderrzsingle-node with zgpu on distributed modezgpu using DataParallelz--gpuzrun.plz5run.pl doesn't support submitting to the other nodes.zssh.plz#Use --host option instead of ssh.plzslurm.plz
nodes and zgpu-per-node using srunz--num_threadsr   srunz--export=ALLtruez--dist_launcherslurmzgpu-per-node using mpirunmpirunz-npmpiz
log file: Fc                 s   s    | ]}|j d u V  qd S N)
returncode)r'   pr   r   r   r,   _  s    g      ?)rA   cmd)filez,
################### The last 1000 lines of z ###################
i);loggingbasicConfigINFOinfor   r   
parse_argsshlexsplitrC   hostshutilwhichRuntimeError	num_nodesmaster_portinit_file_prefixr   uuiduuid4r   absoluteparentmkdirmaster_addrrangemax_num_log_fileslogstemsuffixexistsunlinkmover+   listngpuappendsumenvfileopenzipr   osgetcwdjoin
subprocessPopenmultiprocessing_distributednamer*   anyrA   killwaitTimeoutExpiredprintCalledProcessErrorsysr:   )rC   logfmtr   r   init_method	init_fileirB   pn_p	processeshostsids_listrL   spsids
world_sizeenvfrankrb   
local_rankheredocprocessfailedlinesr   r   r   main[   s  





 





1







r   __main__r@   )r   rE   rh   rJ   rM   rk   ru   rS   pathlibr   espnet2.utils.typesr   r   espnet.utils.cli_utilsr   r   r   __name__r   r   r   r   <module>   s$   
K  &
