o
    rri'                     @   s   d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 ddl
mZmZ ddlmZmZ dZG dd dZG d	d
 d
Zdede	dedefddZdS )    N)Path   )loggerutils)DelayedSubmissionJobPathsSUBMITIT_PREEMPT_SIGNALc                       s  e Zd ZU dZejedZi Z	e
je
jeef  ed< de
jdd f fddZd2d	d
ZedefddZedefddZdefddZedefddZede
je fddZedefddZedefddZede
je fddZede
je fddZ ede!fddZ"ede!fdd Z#ede!fd!d"Z$ede!fd#d$Z%ede!fd%d&Z&defd'd(Z'ede
jfd)d*Z(d+ed,e)ddfd-d.Z*d/e!ddfd0d1Z+  Z,S )3JobEnvironmentaV  Describe the environment inside which the job is running.
    This includes job id, number of GPUs available, ...

    This class can only be instantiated from a running submitit job.

    @plugin-dev: default implementation look for information into environment variables.
    Override _env to map environment variable to each property.
    USR2_envargsreturnc                    s0   | t urt j| g|R  S ddlm} | S )Nr   )plugins)r	   super__new__ r   get_job_environment)clsr   r   	__class__ Q/home/ubuntu/.local/lib/python3.10/site-packages/submitit/core/job_environment.pyr   &   s   zJobEnvironment.__new__Nc                 C   s   |   | _d S N)nameclusterselfr   r   r   __init__.   s   zJobEnvironment.__init__c                 C   s*   | j }|dr|d td  }| S )Nr	   )__name__endswithlenlower)r   nr   r   r   r   1   s   
zJobEnvironment.namec                 C   s   t jd }t|| j| jdS )zlProvides the paths used by submitit, including
        stdout, stderr, submitted_pickle and folder.
        SUBMITIT_FOLDER)job_idtask_id)osenvironr   r$   global_rank)r   folderr   r   r   paths8   s   
zJobEnvironment.pathsc                 C   s   t jdd|  kS )zTests if we are running inside this environment.

        @plugin-dev: assumes that the SUBMITIT_EXECUTOR variable has been
        set to the executor name
        SUBMITIT_EXECUTORr   )r&   r'   getr   r   r   r   r   	activated@   s   zJobEnvironment.activatedc                 C   s   t  S r   )socketgethostnamer   r   r   r   hostnameH      zJobEnvironment.hostnamec                 C   s   | j gS r   )r0   r   r   r   r   	hostnamesL   r1   zJobEnvironment.hostnamesc                 C   s   | j r| j  d| j S | jS )N_)array_job_idarray_task_id
raw_job_idr   r   r   r   r$   P   s   zJobEnvironment.job_idc                 C   s   t j| jd  S )Nr$   )r&   r'   r   r   r   r   r   r6   W   s   zJobEnvironment.raw_job_idc                 C   &   d}|| j vr	d S tj| j | d S )Nr4   r   r&   r'   r,   r   r"   r   r   r   r4   [      "zJobEnvironment.array_job_idc                 C   r7   )Nr5   r8   r9   r   r   r   r5   `   r:   zJobEnvironment.array_task_idc                 C      t tj| jd dS )z!Total number of tasks for the job	num_tasksr   intr&   r'   r,   r   r   r   r   r   r<   e      zJobEnvironment.num_tasksc                 C   r;   )z!Total number of nodes for the job	num_nodesr   r=   r   r   r   r   r@   j   r?   zJobEnvironment.num_nodesc                 C   r;   )zId of the current nodenoder   r=   r   r   r   r   rA   o   r?   zJobEnvironment.nodec                 C   r;   )zGlobal rank of the taskr(   r   r=   r   r   r   r   r(   t   r?   zJobEnvironment.global_rankc                 C   r;   )z/Local rank of the task, ie on the current node.
local_rankr   r=   r   r   r   r   rB   y   r?   zJobEnvironment.local_rankc                    s^    fdddD }d} j  j  j j g}| fddt||D 7 }d|}d| dS )	Nc                    s    g | ]}| d t  | qS )=getattr).0r"   r   r   r   
<listcomp>   s     z+JobEnvironment.__repr__.<locals>.<listcomp>)r$   r0   )rB   rA   r(   c                    s,   g | ]\}}| d t  | d| dqS )rC   ()rD   )rF   r"   tr   r   r   rG      s   , z, zJobEnvironment(rI   )r<   r@   zipjoin)r   infonamestotalsinfo_strr   r   r   __repr__~   s   
zJobEnvironment.__repr__c                 C   s8   d| j  }tt|d }|d u rtd| dt d|S )NSIGzUnknown signal z*, you may need to unset or update env var z (Eg: USR2))USR_SIGrE   signalRuntimeError_PREEMPT_SIG_ENV)r   r   outr   r   r   _usr_sig   s   
zJobEnvironment._usr_sigr*   
submissionc                 C   sB   t | ||}ttj|j ttj|j t|  |j dS )zSet up signals handler for the current executable.

        The default implementation checkpoint the given submission and requeues it.
        @plugin-dev: Should be adapted to the signals used in this cluster.
        N)SignalHandlerrT   SIGTERMbypassSIGCONTrX   checkpoint_and_try_requeue)r   r*   rY   handlerr   r   r   _handle_signals   s   zJobEnvironment._handle_signals	countdownc                 C   s   dS )zRequeue the current job.

        @plugin-dev:Must be overridden by JobEnvironment implementations.
            Use self.job_id to find what need to be requeued.
        Nr   )r   ra   r   r   r   _requeue   s    zJobEnvironment._requeuer   N)-r   
__module____qualname____doc__r&   r'   r,   rV   rS   r   tpClassVarDictstr__annotations__Anyr   r   classmethodr   propertyr   r*   boolr-   r0   Sequencer2   r$   r6   Optionalr4   r5   r>   r<   r@   rA   r(   rB   rQ   rX   r   r`   rb   __classcell__r   r   r   r   r	      sL   
 

	r	   c                   @   s   e Zd ZdedededdfddZdefdd	Zdd
e	de
jej ddfddZdd
e	de
jej ddfddZdd
e	de
jej ddfddZdddZdS )rZ   env	job_pathsdelayedr   Nc                 C   s*   || _ || _|| _t | _t | _d S r   )rs   
_job_paths_delayedr   
get_logger_loggertime_start_time)r   rs   rt   ru   r   r   r   r      s
   
zSignalHandler.__init__c                 C   s   t   | j }| jjd }t|d |d }||k}|r1| jd|d dd|d dd |S | jd|d dd|d dd |S )	N<   g?iX  zJob has timed out. Ran z.0fz minutes out of requested z	 minutes.zJob has not timed out. Ran )rz   r{   rw   _timeout_minminry   rM   )r   walltimemax_walltimeguaranteed_walltime	timed_outr   r   r   has_timed_out   s   zSignalHandler.has_timed_outsignumframec                 C   s   | j dt|j  d S )NzBypassing signal )ry   warningrT   Signalsr   )r   r   r   r   r   r   r\      s   zSignalHandler.bypassc           
   	   C   s   |   }|rdnd}| jdt|j dt  d| d | jj	}|dkr4| j
d| d	 d S | j}|j| }d
}t|jdrMt|| jj|}n|rQd}|dk rWd}|rjd| d}	| j
|	 t|	| j| |   d S )Nz	timed-out	preemptedCaught signal  on z: this job is .r   z;Not checkpointing nor requeuing since I am a slave (procid=).r   
checkpointz timed-out and not checkpointableztimed-out too many timeszJob not requeued because: )r   ry   r   rT   r   r   r.   r/   rs   r(   rM   rw   _timeout_countdownhasattrfunction_checkpointrv   submitted_pickler   UncompletedJobErrorrb   _exit)
r   r   r   r   caseprocidru   ra   no_requeue_reasonmessager   r   r   r^      s0   "

z(SignalHandler.checkpoint_and_try_requeuec                 C   s|   | j dt|j dt   | jj}|r$| j d| d d S | j	}t
|jdr8t| j	| jj| j	j |   d S )Nr   r   z-Not checkpointing since I am a slave (procid=r   r   )ry   rM   rT   r   r   r.   r/   rs   r(   rw   r   r   r   rv   r   r   r   )r   r   r   r   ru   r   r   r   checkpoint_and_exit   s   $z!SignalHandler.checkpoint_and_exitc                 C   s   | j d td d S )Nz,Exiting gracefully after preemption/timeout.)ry   rM   sysexitr   r   r   r   r      s   zSignalHandler._exitr   rc   )r   rd   re   r	   r   r   r   ro   r   r>   rg   rq   types	FrameTyper\   r^   r   r   r   r   r   r   rZ      s       !rZ   ru   filepathra   r   c                 C   sj   t  d |  }|du rdS || j| t|}|| W d   dS 1 s.w   Y  dS )zCall the checkpoint method and dump the updated delayed.

    Returns:
    --------
        no_requeue_reason: str
            a string explaining while there was no requeuing, else empty string if requeuing works
    zCalling checkpoint method.Nz!checkpoint function returned Noner   )	r   rx   rM   _checkpoint_functionset_timeoutr}   r   temporary_save_pathdump)ru   r   ra   ckpt_delayedtmpr   r   r   r      s   
r   )r&   rT   r.   r   rz   r   typingrg   pathlibr   r   r   r   r   r   rV   r	   rZ   r>   rj   r   r   r   r   r   <module>   s    U