o
    oin                     @   s  d dl Z d dlmZ d dlmZmZmZmZmZm	Z	 d dl
Z
d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZmZmZmZmZmZmZ d dl m!Z!m"Z"m#Z#m$Z$m%Z%m&Z& d dl'm(Z( d dl)m*Z* d dl+m,Z,m-Z-m.Z.m/Z/m0Z0 d dl1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8m9Z9 d dl:m;Z; d dl<m=Z=m>Z> d dl?m@Z@ d dlAmBZBmCZC d dlDmEZE d dlFmGZG eee!ef ZHG dd dZIdee, dee. fddZJdeKfddZLdS )    N)Counter)AnyDictListOptionalUnioncast)get_args)ACCELERATOR_REGISTRY)Accelerator)CUDAAccelerator)MPSAccelerator)XLAAccelerator)BitsandbytesPrecisionCheckpointIODeepSpeedPrecisionHalfPrecisionMixedPrecision	PrecisionTransformerEnginePrecisionXLAPrecision)ClusterEnvironmentLightningEnvironmentLSFEnvironmentMPIEnvironmentSLURMEnvironmentTorchElasticEnvironment)DoublePrecision)FSDPPrecision)_PRECISION_INPUT_PRECISION_INPUT_INT_PRECISION_INPUT_STR_PRECISION_INPUT_STR_ALIAS%_PRECISION_INPUT_STR_ALIAS_CONVERSION)STRATEGY_REGISTRYDeepSpeedStrategyParallelStrategySingleDeviceStrategySingleDeviceXLAStrategyStrategyXLAFSDPStrategyXLAStrategy)_DDP_FORK_ALIASES)_FSDP_ALIASESFSDPStrategy)ModelParallelStrategy)rank_zero_inforank_zero_warn)_determine_root_gpu_device)_IS_INTERACTIVEc                   @   s  e Zd ZdZ						d+deeef deeef deee	 ee	f de	d	e
e d
e
eeee f  ddfddZdeeef deeef d	e
e d
e
eeee f  ddf
ddZdeee	 ee	f de	ddfddZedefddZedefddZd,ddZd,ddZdefddZdeeef fddZd,ddZd,d d!Zdefd"d#Zd,d$d%Zed&ed'ed(edefd)d*ZdS )-
_Connectorad  The Connector parses several Fabric arguments and instantiates the Strategy including its owned components.

        A. accelerator flag could be:
            1. accelerator class
            2. accelerator str
            3. accelerator auto

        B. strategy flag could be:
            1. strategy class
            2. strategy str registered with STRATEGY_REGISTRY
            3. strategy str in _strategy_type enum which listed in each strategy as
               backend (registed these too, and _strategy_type could be deprecated)

        C. plugins flag could be:
            1. precision class (should be removed, and precision flag should allow user pass classes)
            2. checkpoint_io class
            3. cluster_environment class

    priorities which to take when:
        A. Class > str
        B. Strategy > Accelerator/precision/plugins

    auto   Nacceleratorstrategydevices	num_nodes	precisionpluginsreturnc                 C   s(  | j d|dd}| j d|dd}| j d|dd}t| j d|dd}| j d|d d}t | _t | _d| _d| _	d	| _
d | _d | _g | _d | _| j||||d
 | j||d | j	dkrd|  | _	n
| j	dkrn|  | _	|   |  | _| jdkr|  | _|   |   |  | _|   d S )Nr7   r5   )defaultr8   r9   r:   r6   r;   32-true)r8   r7   r;   r<   )r9   r:   gpu)_argument_from_envintr$   available_strategies_registered_strategiesr
   available_accelerators_registered_accelerators_strategy_flag_accelerator_flag_precision_input_precision_instance_cluster_environment_flag_parallel_devicescheckpoint_io!_check_config_and_set_final_flags(_check_device_config_and_set_final_flags_choose_auto_accelerator_choose_gpu_accelerator_backend*_set_parallel_devices_and_init_accelerator$_choose_and_init_cluster_environmentcluster_environment_choose_strategy_check_strategy_and_fallback_init_strategy_check_and_init_precisionr;   _lazy_init_strategy)selfr7   r8   r9   r:   r;   r<    r[   N/home/ubuntu/.local/lib/python3.10/site-packages/lightning/fabric/connector.py__init__b   sB   









z_Connector.__init__c                 C   s  |durt |ts|gn|}t |tr| }|| _|dkr0|| jvr0t |ts0td|d|| jvrM|dvrMt |t	sMtd|dd
| j d	t |toUd
|v }t |to^d|v }t |togd|v }t |tpr|pr|pr|}t o|dv pt |t}	|	r|rtd| d|| _t|}
|r	t }|D ]?}t |tr|| _|tj  d7  < qt |tr|| _|tj  d7  < qt |tr|| _|tj  d7  < qtd| ddd | D }|rtdd
| d|tjr	|
dur	td|
 d| j d|
du rdn|
| _t | jtr| jjr.| jdkr)td| jj| _| jjr@| jr;td| jj| _| jjrR| jrMtd| jj| _t| jddri| jrbtdt| jd| _t | jd r| jj!r| jj!d! j"d"kr| jr| jd#vrtd$| jj#j d%| j d&d"| _| jj!d! j"d'kr| jr| jd(vrtd)| jj#j d%| j d&d'| _| jj!| _$dS dS dS dS )*a  This method checks:

        1. strategy: whether the strategy name is valid, and sets the internal flags if it is.
        2. accelerator: if the value of the accelerator argument is a type of accelerator (instance or string),
            set self._accelerator_flag accordingly.
        3. precision: The final value of the precision flag may be determined either by the precision argument or
            by a plugin instance.
        4. plugins: The list of plugins may contain a Precision plugin, CheckpointIO, ClusterEnvironment and others.
            Additionally, other flags such as `precision` can populate the list with the
            corresponding plugin instances.

        Nr5   z1You selected an invalid strategy name: `strategy=z`. It must be either a string or an instance of `lightning.fabric.strategies.Strategy`. Example choices: auto, ddp, ddp_spawn, deepspeed, dp, ... Find a complete list of options in our documentation at https://lightning.ai)r5   r@   z7You selected an invalid accelerator name: `accelerator=z`. Available names are: auto, , .ddpdp	deepspeed)mpsr5   r@   NzYou set `strategy=z` but strategies from the DDP family are not supported on the MPS accelerator. Either explicitly set `accelerator='cpu'` or change the strategy.r6   zFound invalid type for plugin z>. Expected one of: Precision, CheckpointIO, ClusterEnviroment.c                 S   s   g | ]
\}}|d kr|qS )r6   r[   ).0kvr[   r[   r\   
<listcomp>   s    z@_Connector._check_config_and_set_final_flags.<locals>.<listcomp>zReceived multiple values for z> flags in `plugins`. Expected one value for each type at most.zReceived both `precision=z` and `plugins=z`. Choose one.r?   zLaccelerator set through both strategy class and accelerator flag, choose onezAprecision set through both strategy class and plugins, choose onezEcheckpoint_io set through both strategy class and plugins, choose onerT   zKcluster_environment set through both strategy class and plugins, choose oneparallel_devicesr   cpu)r5   ri   z!CPU parallel_devices set through z class, but accelerator set to z, please choose one device typecuda)r5   rj   r@   z!GPU parallel_devices set through )%
isinstanceliststrlowerrG   rD   r)   
ValueErrorrF   r   joinr&   r   is_availablerH   "_convert_precision_to_unified_argsr   r   rJ   __name__r   rM   r   rK   	TypeErroritemsgetrI   _accelerator
_precision_checkpoint_iogetattrhasattrrh   type	__class__rL   )rZ   r8   r7   r;   r<   
is_ddp_str	is_dp_stris_deepspeed_stris_parallel_strategyis_mps_acceleratorprecision_inputplugins_flags_typespluginduplicated_plugin_keyr[   r[   r\   rN      s   














z,_Connector._check_config_and_set_final_flagsc                 C   sv   t |tr	|dk rtd| d|| _|| _| jg ddfv r9t | jtr*| jjjn| j}td| jd| dd S )	Nr6   z0`num_nodes` must be a positive integer, but got r_   r   0z`Fabric(devices=z$)` value is not a valid input using z accelerator.)	rk   rB   ro   _num_nodes_flag_devices_flagrH   r   r}   __qualname__)rZ   r9   r:   accelerator_namer[   r[   r\   rO   *  s   

z3_Connector._check_device_config_and_set_final_flagsc                   C   s(   t  rdS t rdS t rdS dS )zTChoose the accelerator type (str) based on availability when ``accelerator='auto'``.tpurc   rj   ri   )r   rq   r   r   r[   r[   r[   r\   rP   <  s   z#_Connector._choose_auto_acceleratorc                   C   s    t  rdS t rdS td)Nrc   rj   zNo supported gpu backend found!)r   rq   r   RuntimeErrorr[   r[   r[   r\   rQ   G  s
   z*_Connector._choose_gpu_accelerator_backendc                 C   s   t | jtr| j| _n| jd usJ t| j| _| jj}| s5dd | jD }t	d|j
 d| d|   || j| _| jsL|| j| _d S d S )Nc                 S   s    g | ]}t | d   r|qS )r7   )r
   rq   )rd   acc_strr[   r[   r\   rg   X  s    zI_Connector._set_parallel_devices_and_init_accelerator.<locals>.<listcomp>`z` can not run on your system since the accelerator is not available. The following accelerator(s) is available and can be passed into `accelerator` argument of `Fabric`: r_   )rk   rH   r   r7   r
   rv   r}   rq   rF   r   r    _set_devices_flag_if_auto_passedparse_devicesr   rL   get_parallel_devices)rZ   accelerator_clsavailable_acceleratorr[   r[   r\   rR   O  s&   

z5_Connector._set_parallel_devices_and_init_acceleratorc                 C   sh   | j dkrd S tr,t| jtr,| j dkr,d| _ td| j  d| j  d d S | j | _ d S )Nr5   r6   zFabric will use only 1 of zm GPUs because it is running inside an interactive / notebook environment. You may try to set `Fabric(devices=z)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.)r   r3   rk   r7   r   auto_device_countr0   rZ   r[   r[   r\   r   j  s   

z+_Connector._set_devices_flag_if_auto_passedc                 C   s<   t | jtr	| jS ttttfD ]}| r|   S qt S )N)	rk   rK   r   r   r   r   r   detectr   )rZ   env_typer[   r[   r\   rS   |  s   
z/_Connector._choose_and_init_cluster_environmentc                 C   s   | j dkst| j tr| jrt| jdkrdS t| jd dS | jdkr&dS t| jdkrMt| j ttfs@t| j t	rF| j dv rFt
| j}nd}t|dS t| jdkrXtrXd	S dS )
Nr   r6   xlar   )devicer`   )rj   r@   rc   ri   ddp_fork)rH   rk   r   rL   lenr(   r   r   r   rm   r2   r'   r3   )rZ   r   r[   r[   r\   rU     s"   



z_Connector._choose_strategyc                 C   s   t | jtrdn| j}|dkr| jdkrd}|dkr(| jdkr(t|d d}|tv r;d	tj vr;t	d
| d|t
v sFt| jtu rO| jdvrOt	d|rV|| _dS dS )zChecks edge cases when the strategy selection was a string input, and we need to fall back to a different
        choice depending on other parameters or the environment. fsdpr   xla_fsdpra   ri   z: is not supported on CPUs, hence setting `strategy='ddp'`.r`   forkzYou selected `Fabric(strategy='zn')` but process forking is not supported on this platform. We recommed `Fabric(strategy='ddp_spawn')` instead.)rj   r@   zYou selected the FSDP strategy but FSDP is only available on GPU. Set `Fabric(accelerator='gpu', ...)` to continue or select a different strategy.N)rk   rG   r)   rH   r1   r,   torchmultiprocessingget_all_start_methodsro   r-   r|   r.   )rZ   strategy_flagr[   r[   r\   rV     s$   


z'_Connector._check_strategy_and_fallbackc                 C   s>   t | jttfs
J t | jtrt| j| _dS | j| _dS )zNInstantiate the Strategy given depending on the setting of ``_strategy_flag``.N)rk   rG   rm   r)   r$   rv   r8   r   r[   r[   r\   rW     s   z_Connector._init_strategyc                 C   s~  t | jtrt | jtrt | jtstd| jS t | jtt	t
fr't| jS t | jtr2t| jS t | jtr>t| jdS d}t | jtrZ| j|vrZtd| jdd| d| jdv rdt| jS | jd	krlt S | jd
krtt S | jdkrttjdS | jdkrttjdS | jdkr| jdkrtd d| _| jdv rt| jdkrdnd | jdkrdnd}t| j|dS td)Nz,Bitsandbytes is only supported on CUDA GPUs.)r;   )r?   
bf16-mixed	bf16-true16-truezDThe `ModelParallelStrategy` does not support `Fabric(..., precision=z()`. Choose a different precision among: r^   r_   )r   r   r?   z64-trueztransformer-engine)weights_dtypeztransformer-engine-float1616-mixedri   zYou passed `Fabric(accelerator='cpu', precision='16-mixed')` but AMP with fp16 is not supported on CPU. Using `precision='bf16-mixed'` instead.r   )r   r   z,Using 16-bit Automatic Mixed Precision (AMP)z.Using bfloat16 Automatic Mixed Precision (AMP)rj   )r;   r   zNo precision set)rk   rJ   r   r   r7   r   r   r8   r(   r+   r*   r   rI   r%   r   r.   r   r/   ro   rp   r   r   r   r   bfloat16float16rH   r1   r0   r   )rZ   mp_precision_supportedr   r[   r[   r\   rX     sV   










z$_Connector._check_and_init_precisionc                 C   s  | j | j_ | jr| j| j_| jr| j| j_t| jdr+| jjdu r&| j| j_| jj| _t| jdr@| jjr;| jj| _n| j| j_t| jdrK| j| j_	t| jdrV| j
  | j  tro| jjro| jjjsotd| jdt| j trt| jtttfstd| jjj d	dS dS )
zFLazily set missing attributes on the previously instantiated strategy.rT   Nrh   r:   _set_world_ranksz`Fabric(strategy=a  )` is not compatible with an interactive environment. Run your code as a script, or choose one of the compatible strategies: `Fabric(strategy='dp'|'ddp_notebook')`. In case you are spawning processes yourself, make sure to include the Fabric creation inside the worker function.zsThe `XLAAccelerator` can only be used with a `SingleDeviceXLAStrategy`, `XLAStrategy`, or `XLAFSDPStrategy`. Found r_   )r7   r8   r;   rM   r{   rT   rh   rL   r   
_num_nodesr   _configure_launcherr3   launcheris_interactive_compatibler   rG   rk   r   r(   r+   r*   ro   r}   rs   r   r[   r[   r\   rY     s>   









z_Connector._lazy_init_strategynamecurrentr>   c              
   C   sp   t jd|   }|d u r|S |d ur6|t|kr6t|t|kr6t r6td|  d|d|  d| d	|S )NLT_zYour code has `Fabric(=z+, ...)` but it conflicts with the value `--zZ` set through the CLI.  Remove it either from the CLI or from the Lightning Fabric object.)osenvironrv   upperrm   _is_using_cliro   )r   r   r>   	env_valuer[   r[   r\   rA     s   *z_Connector._argument_from_env)r5   r5   r5   r6   NN)r=   N)rs   
__module__r   __doc__r   rm   r   r)   r   rB   r   r   _PLUGIN_INPUTr]   rN   rO   staticmethodrP   rQ   rR   r   r   rS   rU   rV   rW   r   rX   rY   r   rA   r[   r[   r[   r\   r4   I   sd    


A


 $




	
0) r4   r;   r=   c                 C   s   | d u rd S t tt t t t }| |vr"tdt|  d| t| } | t tv rGt| d d dvrCtd|  dt|   d t|  } t	t| S )Nz
Precision z' is invalid. Allowed precision values:    )3264z`precision=za` is supported for historical reasons but its usage is discouraged. Please set your precision to z	 instead!)
r	   r!   r    r"   ro   reprrm   r1   r#   r   )r;   supported_precisionr[   r[   r\   rr   -  s    
rr   c                   C   s   t ttjddS )NLT_CLI_USEDr   )boolrB   r   r   rv   r[   r[   r[   r\   r   C  s   r   )Mr   collectionsr   typingr   r   r   r   r   r   r   typing_extensionsr	   lightning.fabric.acceleratorsr
   )lightning.fabric.accelerators.acceleratorr   "lightning.fabric.accelerators.cudar   !lightning.fabric.accelerators.mpsr   !lightning.fabric.accelerators.xlar   lightning.fabric.pluginsr   r   r   r   r   r   r   r   %lightning.fabric.plugins.environmentsr   r   r   r   r   r   )lightning.fabric.plugins.precision.doubler   'lightning.fabric.plugins.precision.fsdpr   ,lightning.fabric.plugins.precision.precisionr   r    r!   r"   r#   lightning.fabric.strategiesr$   r%   r&   r'   r(   r)   r*   r+   lightning.fabric.strategies.ddpr,    lightning.fabric.strategies.fsdpr-   r.   *lightning.fabric.strategies.model_parallelr/   lightning.fabric.utilitiesr0   r1   (lightning.fabric.utilities.device_parserr2   "lightning.fabric.utilities.importsr3   r   r4   rr   r   r   r[   r[   r[   r\   <module>   s:    ( 
(
   g