o
    ϯi                     @   s~   d dl Z d dlZd dlZzd dlmZ W n ey   dZY nw dd Zdd ZdddZd	d
 Z	dd Z
dd Zdd ZdS )    Nc                 C   
   | j dkS Nr   )rankargs r   S/home/ubuntu/.local/lib/python3.10/site-packages/laion_clap/training/distributed.pyis_global_master      
r	   c                 C   r   r   )
local_rankr   r   r   r   is_local_master   r
   r   Fc                 C   s   |rt | S t| S )N)r   r	   )r   localr   r   r   	is_master   s   r   c                  C   s<   ddg} ddg}t dd | D st dd |D rdS dS )	NOMPI_COMM_WORLD_RANKOMPI_COMM_WORLD_SIZEPMI_RANKPMI_SIZEc                 S   s   g | ]}|t jv qS r   )osenviron).0varr   r   r   
<listcomp>   s    z$is_using_horovod.<locals>.<listcomp>TF)all)	ompi_varspmi_varsr   r   r   is_using_horovod   s
   $r   c                   C   s<   dt jv rtt jd dkS dt jv rtt jd dkS dS )N
WORLD_SIZE   SLURM_NTASKSFr   r   intr   r   r   r   is_using_distributed#   s
   

r!   c                  C   s   d} dD ]}|t jv rtt j| }  nqd}dD ]}|t jv r)tt j| } nqd}dD ]}|t jv r>tt j| } nq.| ||fS )Nr   )SLURM_LOCALIDMPI_LOCALRANKIDOMPI_COMM_WORLD_LOCAL_RANK
LOCAL_RANK)SLURM_PROCIDr   r   RANKr   )r   r   r   r   r   )r   vglobal_rank
world_sizer   r   r   world_info_from_env+   s&   



r+   c                 C   sz  d| _ d| _d| _d| _| jrotd usJ dt  ttj	d }ttj	d }ttj	d }|| _|| _|| _d| _ t
| jtj	d	< t
| jtj	d
< t
| jtj	d< td| j d| j d| j dt  dt  
 nt rdtj	v rt \| _| _| _t
| jtj	d	< t
| jtj	d
< t
| jtj	d< tj j| j| j| j| jd nOdtj	v rttj	d }ttj	d }ttj	d }|| _|| _|| _tj j| j| j| j| jd nt \| _}}tj j| j| jd tj  | _tj  | _d| _ td| j d| j d| j dt  dt  
 tj r1| j r(| js(d| j }nd}tj| nd}|| _t|}|S )NFr   r   zHorovod is not installedr   r   r$   Tr%   r'   r   z!Distributed training: local_rank=z, rank=z, world_size=z, hostname=z, pid=r&   )backendinit_methodr*   r   )r,   r-   zcuda:%dzcuda:0cpu)distributedr*   r   r   horovodhvdinitr    r   r   strprintsocketgethostnamegetpidr!   r+   torchinit_process_groupdist_backenddist_urlget_world_sizeget_rankcudais_availableno_set_device_rank
set_devicedevice)r   r*   
world_rankr   _rB   r   r   r   init_distributed_device?   s   



rE   )F)r   r8   r5   horovod.torchr1   ImportErrorr	   r   r   r   r!   r+   rE   r   r   r   r   <module>   s    
