o
    rri~9                     @   s  U d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 ddl
m
Z
mZmZmZ ddlmZ dZi Zejedf ed< G d	d
 d
e
je ZG dd dejZG dd de
jZ						d"de	dededededededeje ddfddZG d d! d!Z dS )#    N)Path   )corejob_environmentloggerutils)R   subprocess.Popen['bytes']
_PROCESSESc                       s   e Zd Z		d"dejeef dedeje dej	d ddf
 fd	d
Z
d#dedefddZedefddZd$dedejeef fddZd%deddfddZd&ddZd&ddZdejfddZdejddfd d!Z  ZS )'LocalJobr   Nfolderjob_idtasksprocessr
   returnc                    s:   t  ||| d| _| j| _|| _| jD ]}||_qd S )NF)super__init___cancel_at_deletion	_sub_jobs_process)selfr   r   r   r   sjob	__class__ H/home/ubuntu/.local/lib/python3.10/site-packages/submitit/local/local.pyr      s   
zLocalJob.__init__Fforce_checkc                 C   s   |   d }|dkS )z#Override to avoid using the watcherjobStateRUNNING)get_info)r   r   stater   r   r   done.   s   zLocalJob.donec                 C   s(   z	|   ddW S  ty   Y dS w )zState of the jobr   unknownUNKNOWN)r!   get	Exceptionr   r   r   r   r"   3   s
   zLocalJob.stateforcemodec                 C   sd   | j du rd}| jj rd}d|iS | j  }|du r"d}d|iS |dk r,d}d|iS d}d|iS )z,Returns information about the job as a dict.NzNO PROCESS AND NO RESULTFINISHEDr   r    r   INTERRUPTED)r   pathsresult_pickleexistspoll)r   r*   r"   r0   r   r   r   r!   <   s   

zLocalJob.get_infoTcheckc                 C   s    | j d ur| j tj d S d S N)r   send_signalsignalSIGINT)r   r1   r   r   r   cancelL   s   
zLocalJob.cancelc                 C   s"   | j dur| j t  dS dS )zBSends preemption / timeout signal to the job (for testing purpose)N)r   r3   LocalJobEnvironment_usr_sigr(   r   r   r   
_interruptP   s   
zLocalJob._interruptc                 C   sF   | j r|  ddks| jdd | jj r!t| j	d  d S d S )Nr   r+   F)r1   )
r   r!   r&   r6   r-   r.   r/   r   popr   r(   r   r   r   __del__U   s   zLocalJob.__del__c                 C   s,   t | j}d |d< | jd ur| jt| j< |S )Nr   )dict__dict__r   r   r   )r   outr   r   r   __getstate___   s
   

zLocalJob.__getstate__r"   c                 C   s    | j | t| jd | _d S r2   )r=   updater   r&   r   r   )r   r"   r   r   r   __setstate__f   s   zLocalJob.__setstate__)r   N)F)r)   )Tr   N)__name__
__module____qualname__tpUnionr   strSequenceintOptionalr   boolr#   propertyr"   Dictr!   r6   r9   r;   Anyr?   rA   __classcell__r   r   r   r   r      s.    


r   c                   @   s0   e Zd ZdddddddZded	d
fddZd
S )r7   SUBMITIT_LOCAL_JOB_IDSUBMITIT_LOCAL_NTASKSSUBMITIT_LOCAL_JOB_NUM_NODESSUBMITIT_LOCAL_NODEIDSUBMITIT_LOCAL_GLOBALIDSUBMITIT_LOCAL_LOCALID)r   	num_tasks	num_nodesnodeglobal_rank
local_rank	countdownr   Nc                 C   s0   | j }t d| d| d tt d S )NzRequeued job z (z remaining timeouts))r   r   
get_loggerinfosysexitLOCAL_REQUEUE_RETURN_CODE)r   r\   jidr   r   r   _requeuew   s   zLocalJobEnvironment._requeue)rC   rD   rE   _envrJ   rc   r   r   r   r   r7   m   s    	r7   c                       s  e Zd ZdZeZ			d!dejee	f de
dedeje d	df
 fd
dZed	eje fddZdejd	df fddZded	ee fddZed	efddZd	e
fddZdeded	efddZedejeef d	efddZde	d	eje fdd Z  ZS )"LocalExecutora  Local job executor
    This class is used to hold the parameters to run a job locally.
    In practice, it will create a bash file in the specified directory for each job,
    and pickle the task function and parameters. At completion, the job will also pickle
    the output. Logs are also dumped in the same directory.

    The submission file spawn several processes (one per task), with a timeout.


    Parameters
    ----------
    folder: Path/str
        folder for storing job submission/output and logs.

    Note
    ----
    - be aware that the log/output folder will be full of logs and pickled objects very fast,
      it may need cleaning.
    - use update_parameters to specify custom parameters (n_gpus etc...).
             ?Nr   max_num_timeoutmax_pickle_size_gbpythonr   c                    sL   t  j|||d |d u rttjn|| _tj	| j
}|jddd d S )N)ri   rh   T)parentsexist_ok)r   r   shlexquoter_   
executablerj   r   JobPathsget_first_id_independent_folderr   mkdir)r   r   rh   ri   rj   indep_folderr   r   r   r      s   zLocalExecutor.__init__c                 C   s   dhS )z4Parameters that can be set through update_parameterssetupr   )clsr   r   r   _valid_parameters   s   zLocalExecutor._valid_parameterskwargsc                    s   | dddkrtd| dd}| dd}t|tjs&td| d	td
d |D s7td| dt|dkrM|t|krMt| d| dt jdi | dS )ao  Update the parameters of the Executor.

        Valid parameters are:
        - timeout_min (float)
        - gpus_per_node (int)
        - visible_gpus (Sequence[int])
        - tasks_per_node (int)
        - nodes (int). Must be 1 if specified
        - signal_delay_s (int): signal (lately: USR2) delay before timeout

        Other parameters are ignored
        nodesr      z0LocalExecutor can use only one node. Use nodes=1gpus_per_nodevisible_gpusr   zProvided visible_gpus=z  is not an instance of Sequence.c                 s   s    | ]}t |tV  qd S r2   )
isinstancerJ   ).0xr   r   r   	<genexpr>   s    z<LocalExecutor._internal_update_parameters.<locals>.<genexpr>z( contains an element that is not an int.z gpus requested, but only z were specified visible.N)	r&   
ValueErrorr|   rF   rI   alllenr   _internal_update_parameters)r   rw   gpus_requestedr{   r   r   r   r      s   z)LocalExecutor._internal_update_parameterscommandc                 C   s   | j dd}| j dd}| j dd}|dkrt|n|d | }t| j||ddd	 |D | j d
d| j dd| j dd| j ddd}t| jt|j|t	t|d}|S )Ntasks_per_nodery   rz   r   r{   r   ,c                 s   s    | ]}t |V  qd S r2   )rH   r}   kr   r   r   r          z0LocalExecutor._submit_command.<locals>.<genexpr>timeout_ming       @signal_delay_s   stderr_to_stdoutFrt   )r   r   r   cuda_devicesr   r   r   rt   )r   r   r   r   )

parametersr&   rangestart_controllerr   joinr   rH   pidlist)r   r   ntasksn_gpusr{   gpusr   jobr   r   r   _submit_command   s$   
zLocalExecutor._submit_commandc                 C   s   d | jdtt| jgS )N z-u -m submitit.core._submit)r   rj   rm   rn   rH   r   r(   r   r   r   _submitit_command_str   s   z#LocalExecutor._submitit_command_strc                 C   s   d}| j dd}|| S )Nry   r   )r   r&   )r   rx   r   r   r   r   
_num_tasks   s   zLocalExecutor._num_tasksuidc                 C      dS )N r   )r   r   r   r   r   r   _make_submission_file_text   s   z(LocalExecutor._make_submission_file_textstringc                 C   r   )N0r   )r   r   r   r   #_get_job_id_from_submission_command   s   z1LocalExecutor._get_job_id_from_submission_commandsubmission_file_pathc                 C   s   g S r2   r   )r   r   r   r   r   _make_submission_command   s   z&LocalExecutor._make_submission_command)rf   rg   N)rC   rD   rE   __doc__r   	job_classrF   rG   rH   r   rJ   floatrK   r   classmethodSetrv   rO   r   r   r   rM   r   r   r   staticmethodbytesr   Listr   rP   r   r   r   r   re   }   s8     re   ry   r         @r   Fr   r   r   r   r   r   r   r   rt   r   c                 C   s   t tj}|jt||ttd| tt|dd|rdndd||r$dndd
 tjddt| g}	t|}
|
rDd		t
|t	|	g }	tj|	|
|d
}|S )zTStarts a job controller, which is expected to survive the end of the python session.<   r   1r   local)
rR   SUBMITIT_LOCAL_COMMANDSUBMITIT_LOCAL_TIMEOUT_SSUBMITIT_LOCAL_SIGNAL_DELAY_SrT   rS   SUBMITIT_STDERR_TO_STDOUTSUBMITIT_EXECUTORCUDA_VISIBLE_DEVICESSUBMITIT_LOCAL_WITH_SHELLz-mzsubmitit.local._localz && )shellenv)r<   osenvironr@   rH   rJ   r_   ro   rL   r   r   rm   
subprocessPopen)r   r   r   r   r   r   r   rt   r   proc_cmd
need_shellr   r   r   r   r      s&   



r   c                   @   s   e Zd ZdZdefddZdejdej	ddfd	d
Z
dddZdddZddedejeje  fddZddeddfddZdS )
ControllerzThis controls a job:
    - instantiate each of the tasks
    - sends timeout signal
    - stops all tasks if one of them finishes
    - cleans up the tasks/closes log files when deleted
    r   c                 C   s   t tjd | _ttjd | _t tjd | _t tjd | _t	tjd | _
g | _g | _g | _t	tjd }t|r@t nt | _t|| _ttj| j d S )NrR   r   r   r   r   r   )rJ   r   r   r   rm   splitr   	timeout_sr   rL   r   r   stdoutsstderrsrH   getppidgetpidr   r   r   r4   SIGTERM_forward_signal)r   r   
with_shellr   r   r   r     s   
zController.__init__signumargsr   Nc              	   G   s0   | j D ]}z|| W q ty   Y qw d S r2   )r   r3   r'   )r   r   r   taskr   r   r   r   -  s   
zController._forward_signalc                    s    j jdd  fddt jD }dd |D  _ jr! jndd |D  _t jD ]*}ttj	}|j
t|t| jd  jtj jd| j|  j| d	d
 q.d S )NT)rl   c                    s   g | ]}t  j j|qS r   )r   rp   r   r   r   r(   r   r   
<listcomp>6  s    z*Controller.start_tasks.<locals>.<listcomp>c                 S      g | ]}|j d qS a)stdoutopenr}   pr   r   r   r   7      c                 S   r   r   )stderrr   r   r   r   r   r   8  r   )rV   rU   rQ   Fzutf-8)r   r   r   r   encoding)r   rr   r   r   r   r   r   r<   r   r   r@   rH   r   r   appendr   r   r   )r   r-   r   r   r   r(   r   start_tasks4  s(   
zController.start_tasksc                 C   s   t jt jfD ]*}| | |t jkr0tdd | jD r0td tdd | jD r0td qg | _| j| j	 }g g | _| _	|D ]}|
  qCd S )Nc                 s       | ]	}|  d u V  qd S r2   r0   r}   tr   r   r   r   N      z(Controller.kill_tasks.<locals>.<genexpr>gMbP?c                 s   r   r2   r   r   r   r   r   r   P  r   rg   )r4   r5   SIGKILLr   anyr   timesleepr   r   close)r   sigfilesfr   r   r   
kill_tasksI  s   



zController.kill_tasks   freqc                 C   s   | j sJ d|| j }|| j| j  }t|D ])}dd | j D }tdd |D r/|  S ||kr:| t  t	d|  qdd | j D S )a  Waits for all tasks to finish or to time-out.

        Returns
        -------
        Sequence[Optional[int]]:
            Exit codes of each task.
            Some tasks might still have not exited, but they will have received the "timed-out" signal.
        zNothing to do!c                 S      g | ]}|  qS r   r   r   r   r   r   r   g      z#Controller.wait.<locals>.<listcomp>c                 s   s    | ]}|d uV  qd S r2   r   r}   er   r   r   r   h  r   z"Controller.wait.<locals>.<genexpr>rg   c                 S   r   r   r   r   r   r   r   r   o  r   )
r   r   r   r   r   r   r7   r8   r   r   )r   r   timeoutalmost_timeoutstep
exit_codesr   r   r   waitX  s   	
zController.wait   	max_retryc              	   C   s^   t |D ](}z!|   |  }tdd |D }|s"W |    d S W |   q|   w d S )Nc                 s   s    | ]}|t kV  qd S r2   )ra   r   r   r   r   r   x  r   z!Controller.run.<locals>.<genexpr>)r   r   r   r   r   )r   r   _r   requeuer   r   r   runq  s   zController.runrB   )r   )r   )rC   rD   rE   r   r   r   r4   SignalsrF   rO   r   r   r   rJ   rI   rK   r   r   r   r   r   r   r     s    	

 r   )ry   r   r   r   Fr   )!r   rm   r4   r   r_   r   typingrF   pathlibr   r   r   r   r   	core.corer   ra   r   rN   rH   __annotations__Jobr   JobEnvironmentr7   PicklingExecutorre   rJ   r   rL   rI   r   r   r   r   r   r   <module>   sR   
Px	
"