o
    ziN                     @   s   d dl Z d dlZd dlmZmZmZmZmZ d dlm	Z
 d dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZmZ d dlmZ d d	lmZ erPd dlZG d
d deZdS )    N)TYPE_CHECKINGAnyCallableOptionalUnion)override)_XLA_AVAILABLE)_rank_teardown)move_data_to_device)_GlobalStateSnapshot_MultiProcessingLauncher_WorkerOutput)	TrainerFn)rank_zero_debugc                       s   e Zd ZdZd fddZeedefdd	Zedd
de	de
ded de
de
f
ddZe	ddeded de	de
de
deejejf dee ddfddZeddde
ded fddZ  ZS )_XLALaunchera  Launches processes that run a given function in parallel on XLA supported hardware, and joins them all at the
    end.

    The main process in which this launcher is invoked creates N so-called worker processes (using the
    `torch_xla` :func:`xmp.spawn`) that run the given function.
    Worker processes have a rank that ranges from 0 to N - 1.

    Note:
        - This launcher requires all objects to be pickleable.
        - It is important that the entry point to the program/script is guarded by ``if __name__ == "__main__"``.

    Args:
        strategy: A reference to the strategy that is used together with this launcher

    strategypl.strategies.XLAStrategyreturnNc                    s$   t sttt t j|dd d S )Nfork)r   start_method)r   ModuleNotFoundErrorstrsuper__init__)selfr   	__class__ ^/home/ubuntu/.local/lib/python3.10/site-packages/pytorch_lightning/strategies/launchers/xla.pyr   5   s   z_XLALauncher.__init__c                 C   s   dS )NTr   )r   r   r   r   is_interactive_compatible:   s   z&_XLALauncher.is_interactive_compatible)trainerfunctionargsr    z
pl.Trainerkwargsc                O   s   | j r|dur|jjtjkrtdt  }ddl	m
  m} i }| jj}|dkr/||d< |j| jf|||||f| jdd|}	|	durT|	j| _|	 sT	 |	 rO| }
|du r^|
S |  j |jjtjkO  _ | |
| |
jS )a  Launches processes that run the given function in parallel.

        The function is allowed to have a return value. However, when all processes join, only the return value
        of worker process 0 gets returned from this `launch` method in the main process.

        Arguments:
            function: The entry point for all launched processes.
            *args: Optional positional arguments to be passed to the given function.
            trainer: Optional reference to the :class:`~pytorch_lightning.trainer.trainer.Trainer` for which
                a selected set of attributes get restored in the main process after processes join.
            **kwargs: Optional keyword arguments to be passed to the given function.

        NzCalling `trainer.fit()` twice on the same Trainer instance using a spawn-based strategy is not supported. You can work around this by creating a new Trainer instance and passing the `fit(ckpt_path=...)` argument.r      nprocsF)r"   r   join)_already_fitstatefnr   FITTINGNotImplementedErrormpManagerQueue)torch_xla.distributed.xla_multiprocessingdistributedxla_multiprocessing	_strategynum_processesspawn_wrapping_function_start_method	processesprocsr&   get _recover_results_in_main_processtrainer_results)r   r!   r    r"   r#   return_queuexmpspawn_kwargsr%   process_contextworker_outputr   r   r   launch?   s<   z_XLALauncher.launchprocess_idxr<   global_statesc                 C   s   dd l m  m} t| dkr"dd l}	|	||||f\}}}}||i |}
|d ur3| ||
}
| jj	dkrA|
t|
d t| jj	 d S )Nr   r$   cpu)torch_xla.core.xla_modelcore	xla_modellenget_xla_supported_devicescopydeepcopy_collect_rank_zero_resultsr2   
local_rankputr
   r	   )r   rB   r    r!   r"   r#   r<   rC   xmrJ   resultsr   r   r   r5   w   s   z_XLALauncher._wrapping_functionrP   r   c                 C   s   t d |j}|rt|dr|jnd }d }|jjtjkr1| j	 }t
j|jd}| jj|| | jjdkr9d S | |}t|||j||S )Nz'Collecting results from rank 0 process.best_model_pathz
.temp.ckptr   )r   checkpoint_callbackhasattrrQ   r(   r)   r   r*   r2   lightning_module_state_dictospathr&   default_root_dircheckpoint_iosave_checkpointrM   get_extra_resultsr   )r   r    rP   rR   rQ   weights_path
state_dictextrar   r   r   rL      s"   

z'_XLALauncher._collect_rank_zero_results)r   r   r   N)N)__name__
__module____qualname____doc__r   propertyr   boolr   r   r   r   rA   intr   r,   SimpleQueuequeuer.   r   r5   rL   __classcell__r   r   r   r   r   $   s:    *7	
$r   )rU   rf   typingr   r   r   r   r   torch.multiprocessingmultiprocessingr,   typing_extensionsr   !lightning_fabric.accelerators.xlar   )lightning_fabric.strategies.launchers.xlar	   lightning_fabric.utilitiesr
   6pytorch_lightning.strategies.launchers.multiprocessingr   r   r    pytorch_lightning.trainer.statesr   %pytorch_lightning.utilities.rank_zeror   pytorch_lightningplr   r   r   r   r   <module>   s   