o
    ziO                     @   sj   d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlm	Z	 d dl
mZ e eZG dd de	ZdS )    N)DictList)override)ClusterEnvironment)get_filesystemc                       s<  e Zd ZdZd& fddZd&ddZeedefdd	Z	eede
fd
dZeedefddZeedefddZedefddZededdfddZedefddZededdfddZedefddZedefddZdefddZedee
 fd d!Zde
fd"d#Zedefd$d%Z  ZS )'LSFEnvironmenta  An environment for running on clusters managed by the LSF resource manager.

    It is expected that any execution using this ClusterEnvironment was executed
    using the Job Step Manager i.e. ``jsrun``.

    This plugin expects the following environment variables:

    ``LSB_JOBID``
      The LSF assigned job ID

    ``LSB_DJOB_RANKFILE``
      The OpenMPI compatible rank file for the LSF job

    ``JSM_NAMESPACE_LOCAL_RANK``
      The node local rank for the task. This environment variable is set by ``jsrun``

    ``JSM_NAMESPACE_SIZE``
      The world size for the task. This environment variable is set by ``jsrun``

    ``JSM_NAMESPACE_RANK``
      The global rank for the task. This environment variable is set by ``jsrun``

    returnNc                    s4   t    |  | _|  | _|  | _|   d S )N)	super__init___get_main_address_main_address_get_main_port
_main_port_get_node_rank
_node_rank!_set_init_progress_group_env_varsself	__class__ ]/home/ubuntu/.local/lib/python3.10/site-packages/lightning_fabric/plugins/environments/lsf.pyr
   4   s
   



zLSFEnvironment.__init__c                 C   sP   t | jtjd< tdtjd   t | jtjd< tdtjd   d S )NMASTER_ADDRzMASTER_ADDR: MASTER_PORTzMASTER_PORT: )strr   osenvironlogdebugr   r   r   r   r   r   ;   s   z0LSFEnvironment._set_init_progress_group_env_varsc                 C   s   dS )zNLSF creates subprocesses, i.e., PyTorch Lightning does not need to spawn them.Tr   r   r   r   r   creates_processes_externallyB   s   z+LSFEnvironment.creates_processes_externallyc                 C      | j S )zrThe main address is read from an OpenMPI host rank file in the environment variable
        ``LSB_DJOB_RANKFILE``.)r   r   r   r   r   main_addressH   s   zLSFEnvironment.main_addressc                 C   r    )z0The main port is calculated from the LSF job ID.)r   r   r   r   r   	main_portO      zLSFEnvironment.main_portc                  C   s   h d} |  tj S )zQReturns ``True`` if the current process was launched using the ``jsrun`` command.>   	LSB_JOBIDLSB_DJOB_RANKFILEJSM_NAMESPACE_SIZEJSM_NAMESPACE_LOCAL_RANK)issubsetr   r   keys)required_env_varsr   r   r   detectU   s   zLSFEnvironment.detectc                 C   $   t jd}|du rtdt|S )zLThe world size is read from the environment variable ``JSM_NAMESPACE_SIZE``.r&   NzCannot determine world size. Environment variable `JSM_NAMESPACE_SIZE` not found. Make sure you run your executable with `jsrun`.r   r   get
ValueErrorint)r   
world_sizer   r   r   r1   \      zLSFEnvironment.world_sizesizec                 C      t d d S )NzYLSFEnvironment.set_world_size was called, but setting world size is not allowed. Ignored.r   r   )r   r3   r   r   r   set_world_sizeg      zLSFEnvironment.set_world_sizec                 C   r,   )zLThe world size is read from the environment variable ``JSM_NAMESPACE_RANK``.JSM_NAMESPACE_RANKNzCannot determine global rank. Environment variable `JSM_NAMESPACE_RANK` not found. Make sure you run your executable with `jsrun`.r-   )r   global_rankr   r   r   r9   k   r2   zLSFEnvironment.global_rankrankc                 C   r4   )Nz[LSFEnvironment.set_global_rank was called, but setting global rank is not allowed. Ignored.r5   )r   r:   r   r   r   set_global_rankv   r7   zLSFEnvironment.set_global_rankc                 C   r,   )zPThe local rank is read from the environment variable `JSM_NAMESPACE_LOCAL_RANK`.r'   NzCannot determine local rank. Environment variable `JSM_NAMESPACE_LOCAL_RANK` not found. Make sure you run your executable with `jsrun`.r-   )r   
local_rankr   r   r   r<   z   r2   zLSFEnvironment.local_rankc                 C   r    )zThe node rank is determined by the position of the current hostname in the OpenMPI host rank file stored in
        ``LSB_DJOB_RANKFILE``.)r   r   r   r   r   	node_rank   r#   zLSFEnvironment.node_rankc                 C   s6   |   }i }|D ]}||vrt|||< q|t  S )a  A helper method for getting the node rank.

        The node rank is determined by the position of the current node in the list of hosts used in the job. This is
        calculated by reading all hosts from ``LSB_DJOB_RANKFILE`` and finding this node's hostname in the list.

        )_read_hostslensocketgethostname)r   hostscounthostr   r   r   r      s   zLSFEnvironment._get_node_rankc                  C   sz   d} t j| }|du rtd|stdt|}||d}dd |D }W d   n1 s2w   Y  |dd S )	aE  Read compute hosts that are a part of the compute job.

        LSF uses the Job Step Manager (JSM) to manage job steps. Job steps are executed by the JSM from "launch" nodes.
        Each job is assigned a launch node. This launch node will be the first node in the list contained in
        ``LSB_DJOB_RANKFILE``.

        r%   Nz9Did not find the environment variable `LSB_DJOB_RANKFILE`z5The environment variable `LSB_DJOB_RANKFILE` is emptyrc                 S   s   g | ]}|  qS r   )strip).0liner   r   r   
<listcomp>   s    z.LSFEnvironment._read_hosts.<locals>.<listcomp>   )r   r   r.   r/   r   open)varrankfilefsfretr   r   r   r>      s   	zLSFEnvironment._read_hostsc                 C   s   |   }|d S )zA helper for getting the main address.

        The main address is assigned to the first node in the list of nodes used for the job.

        r   )r>   )r   rB   r   r   r   r      s   z LSFEnvironment._get_main_addressc                  C   sn   dt jv rtdt jd   tt jd S dt jv r3tt jd } | d d } td|   | S td)z}A helper function for accessing the main port.

        Uses the LSF job ID so all ranks can compute the main port.

        r   z&Using externally specified main port: r$   i  i'  zcalculated LSF main port: z7Could not find job id in environment variable LSB_JOBID)r   r   r   r   r0   r/   )portr   r   r   r      s   

zLSFEnvironment._get_main_port)r   N)__name__
__module____qualname____doc__r
   r   propertyr   boolr   r   r!   r0   r"   staticmethodr+   r1   r6   r9   r;   r<   r=   r   r   r>   r   r   __classcell__r   r   r   r   r      sD    



	r   )loggingr   r@   typingr   r   typing_extensionsr   9lightning_fabric.plugins.environments.cluster_environmentr   #lightning_fabric.utilities.cloud_ior   	getLoggerrR   r   r   r   r   r   r   <module>   s   
