o
    z“©i¨3  ã                   @   s<  d dl Z d dlZd dlmZmZmZmZmZmZ d dl	Z	d dl	m
Z
 d dlmZ d dlmZ d dlZd dlmZmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZmZ d dlm Z  d dl!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z,m-Z- d dl.m/Z/ er”d dl0m1Z1 G dd„ de$ƒZ2dS )é    N)ÚTYPE_CHECKINGÚAnyÚDictÚListÚOptionalÚUnion)ÚTensor)ÚModule)Úoverride)Ú_XLA_AVAILABLEÚ_XLA_GREATER_EQUAL_2_1)ÚXLACheckpointIO)ÚXLAEnvironment)Ú_StrategyRegistry)Ú_optimizers_to_device)Ú_PATHÚReduceOp)ÚXLAPrecision)Ú_WrappingCheckpointIO)ÚDDPStrategy)Ú_XLALauncher)Ú
TBroadcast)Ú	TrainerFn)Úfind_shared_parametersÚset_shared_parameters)Úrank_zero_only©ÚMpDeviceLoaderc                       s0  e Zd ZdZdZ						dZded deeej  d	ee	e
ef  d
ee dedededdf‡ fdd„Zeede	e
ef fdd„ƒƒZejedee	e
ef  ddfdd„ƒƒZeedefdd„ƒƒZejed
ee ddfdd„ƒƒZeedejfdd„ƒƒZeedef‡ fdd„ƒƒZeedef‡ fdd„ƒƒZeedef‡ fdd„ƒƒZeedef‡ fd d!„ƒƒZed[d"d#„ƒZed\d&d'„ƒZed(edefd)d*„ƒZeedee ef fd+d,„ƒƒZ!ed-e"dd.fd/d0„ƒZ#ed[d1d2„ƒZ$ed[d3d4„ƒZ%ed]d5ee  d6ed7eddfd8d9„ƒZ&ed^d;e'd<ede'fd=d>„ƒZ(e	d_d?e	e)ef d@ee dAee	e*e f  de)fdBdC„ƒZ+ed[‡ fdDdE„ƒZ,ed[dFdG„ƒZ-ed[dHdI„ƒZ.e	d]dJee ef dKe/dLee ddf‡ fdMdN„ƒZ0edKe/ddfdOdP„ƒZ1ed`dQe)d@ee dRede)fdSdT„ƒZ2ed[‡ fdUdV„ƒZ3e4edWe5ddfdXdY„ƒƒZ6‡  Z7S )aÚXLAStrategyzxStrategy for training multiple TPU devices using the :func:`torch_xla.distributed.xla_multiprocessing.spawn`
    method.ÚxlaNFTÚacceleratorzpl.accelerators.AcceleratorÚparallel_devicesÚcheckpoint_ioÚprecision_pluginÚdebugÚsync_module_statesÚ_Úreturnc                    s@   t sttt ƒƒ‚tƒ j||tƒ ||dd || _d| _|| _d S )NÚfork)r    r!   Úcluster_environmentr"   r#   Ústart_methodF)	r   ÚModuleNotFoundErrorÚstrÚsuperÚ__init__r   r$   Ú	_launchedÚ_sync_module_states)Úselfr    r!   r"   r#   r$   r%   r&   ©Ú	__class__© úT/home/ubuntu/.local/lib/python3.10/site-packages/pytorch_lightning/strategies/xla.pyr.   1   s   
ú
zXLAStrategy.__init__c                 C   s*   | j }|d urt|ttfƒsJ ‚|S tƒ S ©N)Ú_checkpoint_ioÚ
isinstancer   r   ©r1   Úpluginr4   r4   r5   r"   I   s
   zXLAStrategy.checkpoint_ioÚioc                 C   s.   |d urt |ttfƒstd|› ƒ‚|| _d S )NzHThe XLA strategy can only work with the `XLACheckpointIO` plugin, found )r8   r   r   Ú	TypeErrorr7   )r1   r;   r4   r4   r5   r"   R   s   
c                 C   s&   | j }|d urt|tƒsJ ‚|S tƒ S r6   )Ú_precision_pluginr8   r   r9   r4   r4   r5   r#   Y   s
   zXLAStrategy.precision_pluginc                 C   s*   |d urt |tƒstd|› ƒ‚|| _d S )NzEThe XLA strategy can only work with the `XLAPrecision` plugin, found )r8   r   r<   r=   )r1   r#   r4   r4   r5   r#   b   s   
c                 C   s(   | j stdƒ‚dd lm  m} | ¡ S )NzFAccessing the XLA device before processes have spawned is not allowed.r   )r/   ÚRuntimeErrorÚtorch_xla.core.xla_modelÚcoreÚ	xla_modelÚ
xla_device)r1   Úxmr4   r4   r5   Úroot_devicei   s   zXLAStrategy.root_devicec                    ó   | j rtƒ jS dS ©Nr   )r/   r-   Úglobal_rank©r1   r2   r4   r5   rG   r   ó   zXLAStrategy.global_rankc                    rE   rF   )r/   r-   Ú
local_rankrH   r2   r4   r5   rJ   w   rI   zXLAStrategy.local_rankc                    rE   rF   )r/   r-   Ú	node_rankrH   r2   r4   r5   rK   |   rI   zXLAStrategy.node_rankc                    rE   )Né   )r/   r-   Ú
world_sizerH   r2   r4   r5   rM      rI   zXLAStrategy.world_sizec                 C   s   t | ƒ| _d S r6   )r   Ú	_launcherrH   r4   r4   r5   Ú_configure_launcher†   s   zXLAStrategy._configure_launcherÚtrainerú
pl.Trainerc                 C   sä   | j d usJ ‚| j  |¡ | jrdtjd< | jd usJ ‚| j | j¡ t| jƒ}|  	¡  t
| j|ƒ |  | j¡| _| jrPtrEddlm} nddlm} || jƒ |jjtjkr\|  |¡ |  ¡  |jjtjkrpt| j| jƒ d S d S )NÚ1ÚPT_XLA_DEBUGr   )Úbroadcast_master_param)r    Úsetupr$   ÚosÚenvironÚmodelr#   Úconvert_moduler   Úmodel_to_devicer   Ú_setup_modelr0   r   r?   rT   Útorch_xla.experimental.pjrtÚstateÚfnr   ÚFITTINGÚsetup_optimizersÚsetup_precision_pluginr   Ú
optimizersrD   )r1   rP   Úshared_paramsrT   r4   r4   r5   rU   Š   s*   



ÿzXLAStrategy.setuprX   c                 C   s   |S r6   r4   )r1   rX   r4   r4   r5   r[   ©   ó   zXLAStrategy._setup_modelc                 C   s   | j | jdœS )N)Únum_replicasÚrank)rM   rG   rH   r4   r4   r5   Údistributed_sampler_kwargs­   s   z&XLAStrategy.distributed_sampler_kwargsÚ
dataloaderr   c                 C   sD   ddl m} t||ƒr|S ||| jƒ}|jj|_t|jdd ƒ|_|S )Nr   r   Úbatch_sampler)Ú%torch_xla.distributed.parallel_loaderr   r8   rD   Ú_loaderÚdatasetÚgetattrri   )r1   rh   r   r4   r4   r5   Úprocess_dataloader²   s   

zXLAStrategy.process_dataloaderc                 C   ó   d S r6   r4   rH   r4   r4   r5   Úconfigure_ddpÀ   rd   zXLAStrategy.configure_ddpc                 C   s"   | j d usJ ‚| j  | j¡| _ d S r6   )rX   ÚtorD   rH   r4   r4   r5   rZ   Ä   s   zXLAStrategy.model_to_deviceÚnameÚargsÚkwargsc                 O   s6   | j sd S dd lm  m} |d u rd}| |¡ d S )Nr   Ú )r/   r?   r@   rA   Ú
rendezvous)r1   rr   rs   rt   rC   r4   r4   r5   ÚbarrierÉ   s   zXLAStrategy.barrierr   ÚobjÚsrcc                 C   sÖ   | j s|S dd lm  m} t|tƒ}|r*| ¡ dkr | d¡}|j}| 	| j
¡}nt ¡ }t ||¡ tjt| ¡ ƒ| j
tjd}|g}|j||d |d }|sdt | ¡  ¡  ¡ ¡}t |¡}|S | 	|¡}|S )Nr   )ÚdeviceÚdtype)Úroot_ordinal)r/   r?   r@   rA   r8   r   ÚdimÚ	unsqueezerz   rq   rD   r;   ÚBytesIOÚtorchÚsaveÚtensorÚ	bytearrayÚ	getbufferÚfloatÚcollective_broadcastÚcpuÚbyteÚnumpyÚload)r1   rx   ry   rC   Ú	is_tensorÚoriginal_deviceÚbufferr4   r4   r5   Ú	broadcastÕ   s.   

ÿ

þzXLAStrategy.broadcastÚoutputÚgroupÚ	reduce_opc                 C   sž   t |tƒstj|| jd}t |tƒo|tjk}t |tƒo!| ¡ dv}|s&|r-t	d|› ƒ‚dd l
m  m} | d|t¡}t |tƒrM| ¡ dv rM|| j }|S )N)rz   )ÚsumÚmeanÚavgz]Currently, the XLAStrategy only supports `sum`, `mean`, `avg` for the reduce operation, got: r   Úreduce)r”   r“   )r8   r   r€   r‚   rD   r   ÚSUMr,   ÚlowerÚ
ValueErrorr?   r@   rA   Úmesh_reducer’   rM   )r1   r   r   r‘   Úinvalid_reduce_opÚinvalid_reduce_op_strrC   r4   r4   r5   r•   ø   s   
ÿÿ
zXLAStrategy.reducec                    s   d| _ tƒ  ¡  d S )NT)r/   r-   Úsetup_environmentrH   r2   r4   r5   rœ     s   zXLAStrategy.setup_environmentc                 C   s0   | j d usJ ‚t| j ƒdkrtdƒ‚| jt_d S )NrL   z”The `XLAStrategy` does not support running on a single device with the PjRT runtime. Try using all devices or the `SingleDeviceXLAStrategy` strategy)r!   ÚlenÚNotImplementedErrorrG   r   rf   rH   r4   r4   r5   Úsetup_distributed  s   ÿzXLAStrategy.setup_distributedc                 C   ro   r6   r4   rH   r4   r4   r5   Úset_world_ranks!  s   zXLAStrategy.set_world_ranksÚ
checkpointÚfilepathÚstorage_optionsc                    s0   dd l m  m} | ¡  tƒ j|||d d S )Nr   )r£   )r?   r@   rA   Ú	mark_stepr-   Úsave_checkpoint)r1   r¡   r¢   r£   rC   r2   r4   r5   r¥   (  s   zXLAStrategy.save_checkpointc                 C   s   | j dkr| j |¡ dS dS )zqRemove checkpoint filepath from the filesystem.

        Args:
            filepath: Path to checkpoint

        r   N)rJ   r"   Úremove_checkpoint)r1   r¢   r4   r4   r5   r¦   3  s   
ÿzXLAStrategy.remove_checkpointr‚   Ú
sync_gradsc                 C   s    | j s|S t|tƒstdt| ƒj› d|› ƒ‚| ¡ dkr"| d¡}|j}| 	| j
¡}ddlm  m} ddlm  m} |rD| |¡n| |¡}| 	|¡}|S )aC  Function to gather a tensor from several distributed processes.

        Args:
            tensor: tensor to all-gather.
            group: unused.
            sync_grads: flag that allows users to synchronize gradients for the all-gather operation.
        Return:
            A tensor of shape (world_size, ...)

        ú`z4.all_gather` is only implemented for tensors. Given r   N)r/   r8   r   rž   ÚtypeÚ__name__r}   r~   rz   rq   rD   Útorch_xla.core.functionsr@   Ú	functionsr?   rA   Ú
all_gather)r1   r‚   r   r§   rŒ   ÚxfrC   r4   r4   r5   r­   >  s   
ÿ

zXLAStrategy.all_gatherc                    s"   t ƒ  ¡  d| _tj dd ¡ d S )NFrS   )r-   Úteardownr/   rV   rW   ÚpoprH   r2   r4   r5   r¯   \  s   
zXLAStrategy.teardownÚstrategy_registryc                 C   s*   |j d| ddd |j | j| | jd d S )NÚ	xla_debugz!XLA strategy with `debug` as TrueT)Údescriptionr$   )r³   )ÚregisterÚstrategy_namerª   )Úclsr±   r4   r4   r5   Úregister_strategiesb  s   
ýzXLAStrategy.register_strategies)NNNNFT)r'   N)rP   rQ   r'   Nr6   )r   )NN)NF)8rª   Ú
__module__Ú__qualname__Ú__doc__rµ   r   r   r€   rz   r   r   r   r   Úboolr   r.   Úpropertyr
   r"   Úsetterr#   rD   ÚintrG   rJ   rK   rM   rO   rU   r	   r[   r   r,   rg   Úobjectrn   rp   rZ   rw   r   rŽ   r   r   r•   rœ   rŸ   r    r   r¥   r¦   r­   r¯   Úclassmethodr   r·   Ú__classcell__r4   r4   r2   r5   r   +   sÚ    ùþýüûúùø	÷"""ÿ
ÿÿÿþÿ
ÿÿÿþ

"r   )3r;   rV   Útypingr   r   r   r   r   r   r€   r   Útorch.nnr	   Útyping_extensionsr
   Úpytorch_lightningÚplÚ!lightning_fabric.accelerators.xlar   r   Úlightning_fabric.pluginsr   Ú%lightning_fabric.plugins.environmentsr   Úlightning_fabric.strategiesr   Ú$lightning_fabric.utilities.optimizerr   Ú lightning_fabric.utilities.typesr   r   Úpytorch_lightning.pluginsr   Ú$pytorch_lightning.plugins.io.wrapperr   Ú pytorch_lightning.strategies.ddpr   Ú*pytorch_lightning.strategies.launchers.xlar   Ú%pytorch_lightning.strategies.strategyr   Ú pytorch_lightning.trainer.statesr   Úpytorch_lightning.utilitiesr   r   Ú%pytorch_lightning.utilities.rank_zeror   rj   r   r   r4   r4   r4   r5   Ú<module>   s2    