o
    Ti                     @   s  d dl Z d dlmZmZ d dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ d dl
mZ d dlmZmZ d d	lmZ d d
lmZ dad'ddZG dd deZejjejjeeejjeeegZzd dlZe edrxe ej!drxe"ej!j W n	 e#y   Y nw dd Z$dee% ddfddZ&edee% ddfddZ'dd Z(de)de*fddZ+d d! Z,d'd"d#Z-d$eej. deeej.e%e%f  fd%d&Z/dS )(    N)ListTuple)comm)logger)DeepSpeedCPUAdam)DeepSpeedCPUAdagrad)	FusedAdam)DeepSpeedCPULion	FusedLion)instrument_w_nvtx)get_acceleratorFc                 C   s   t t }| p	|} td||  ||  dksJ dt }d }t||  D ]}t||  |d |  }t|}||v r?|}q'|S )Nz3data_parallel_size: %s, parameter_parallel_size: %sr   z9world size should be divisible by parameter parallel size   )intdistget_world_sizer   infoget_rankrange	new_group)parameter_parallel_sizedata_parallel_sizerankmy_groupiranksgroup r   P/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/runtime/zero/utils.py%_initialize_parameter_parallel_groups   s   
r   c                   @   s   e Zd ZdS )ZeRORuntimeExceptionN)__name__
__module____qualname__r   r   r   r   r   '   s    r   
optimizersr   c                 C   s6   t  dkrtd| jj dt|   t| tv S )Nr   z$Checking ZeRO support for optimizer=z type=)r   r   r   r   	__class__r    typeZERO_SUPPORTED_OPTIMIZERS)	optimizerr   r   r   is_zero_supported_optimizer9   s   r(   lstreturnc              	   C   s`   t jt dkr
| ndgt|  tt t t	j
d dd}tj|ddd t|  S )zg
    NOTE: creates both communication and synchronization overhead so should be used
    sparingly
    r   
LOCAL_RANKF)dtypedevicerequires_grad)srcasync_op)torchtensorr   r   lenr   r.   r   device_nameosenviron	broadcastlistcpunumpy)r)   
lst_tensorr   r   r   get_lst_from_rank0?   s   r=   intsc              	   C   s<   t | }| |krtdt  d| dt  d|  dS )z
    NOTE: creates both communication and synchronization overhead so should be
    used sparingly

    takes a list of ints from each rank and ensures that they are the same
    across ranks, throwing an exception if they are not.
    z#disagreement between rank0 and rankz	: rank0: z, rankz: N)r=   RuntimeErrorr   r   )r>   
rank0_intsr   r   r   assert_ints_same_as_other_ranksO   s   	rA   c                 C   s   | j jdkp| j jdkS )N__builtin__builtins)r$   r!   objr   r   r   is_builtin_type^   s   rF   rE   c                 C   s   t | tot| dot| dS )z
    Is this an instance of namedtuple/NamedTuple?
    From: https://stackoverflow.com/a/62692640

    Args:
        obj (object): An object.

    Returns:
        bool: True if namedtuple/NamedTuple else False.
    _asdict_fields)
isinstancetuplehasattrrD   r   r   r   isinstance_namedtuplec   s   rL   c                 C   s   t | sdS t| dS )NFds_id)r2   	is_tensorrK   )	parameterr   r   r   is_zero_paramq   s   

rP   c                 C   s   t |ttfr&g }|D ]}t| |}|| qt|r!|j| S ||S t |tr=| D ]}t| || ||< q/|S t |t	j
rT| |}t|sRt|rR||_|S t|sk|rktskt dkrkt|| da|S )z
    Apply `function` to every Tensor in `value`.

    Args:
        functional: The function class to apply.
        value (Any): Target object to apply `function` to.

    Returns:
        Any: Output of `function`.
    r   T)rI   rJ   r9   apply_to_tensors_onlyappendrL   r$   dictkeysr2   TensorrP   ds_param_aliasrF   warnedr   r   r   warning)functionvaluewarning_msg_fntouched_outputselemtouched_outputkeyr   r   r   rQ   w   s,   



rQ   tensorsc                 C   s6   g }d}| D ]}|  }||||f ||7 }q|S )Nr   )numelrR   )r`   tensor_infosoffsetr3   tensor_numelr   r   r   get_mapping_to_flat_buffer   s   
re   )N)0r6   typingr   r   r2   	deepspeedr   r   deepspeed.utilsr   deepspeed.ops.adamr   deepspeed.ops.adagradr   r   deepspeed.ops.lionr	   r
   deepspeed.utils.nvtxr   deepspeed.acceleratorr   rW   r   	Exceptionr   optimAdamAdamWAdagradr&   apexrK   r#   rR   ImportErrorr(   r   r=   rA   rF   objectboolrL   rP   rQ   rU   re   r   r   r   r   <module>   sD   

,.