o
    Ti<#                     @   sP   d dl Z d dlZddlmZ zd dlZW n	 ey   Y nw G dd deZdS )    N   )DeepSpeedAcceleratorc                       s  e Zd Z fddZdd Zdd Zdd Zd	d
 ZdddZdddZ	dd Z
dd Zdd Zdd ZdddZdd ZdddZdddZd d! Zd"d# Zd$d% Zd&d' Zed(d) Zd*d+ Zdd,d-Zdd.d/Zed0d1 Zd2d3 Zdd4d5Zdd6d7Zdd8d9Zdd:d;Z dd<d=Z!dd>d?Z"dd@dAZ#ddBdCZ$ddDdEZ%ddFdGZ&ddHdIZ'ddJdKZ(dLdM Z)dNdO Z*dPdQ Z+dRdS Z,dTdU Z-dVdW Z.dXdY Z/dZd[ Z0d\d] Z1d^d_ Z2d`da Z3ddbdcZ4ddde Z5edfdg Z6edhdi Z7edjdk Z8edldm Z9edndo Z:edpdq Z;edrds Z<ddudvZ=dwdx Z>dydz Z?d{d| Z@d}d~ ZAdd ZBdd ZCdd ZDdd ZEdd ZFdd ZGdd ZHdd ZI  ZJS )NPU_Acceleratorc                    s&   t    d| _d| _d| _d | _d S )Nnpuhcclinductor)super__init___name_communication_backend_name_compile_backend
class_dictself	__class__ Y/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/accelerator/npu_accelerator.pyr	      s
   

zNPU_Accelerator.__init__c                 C      dS NFr   r   r   r   r   is_synchronized_device      z&NPU_Accelerator.is_synchronized_devicec                 C      |   S Nr   r   r   r   r   use_host_timers       zNPU_Accelerator.use_host_timersc                 C   r   r   r   r   r   r   r   resolves_data_dependency#   r   z(NPU_Accelerator.resolves_data_dependencyc                 C   r   r   r   r   r   r   r   handles_memory_backpressure&   r   z+NPU_Accelerator.handles_memory_backpressureNc                 C   s   |d u rdS d |S )Nr   npu:{})formatr   device_indexr   r   r   device_name*   s   
zNPU_Accelerator.device_namec                 C      t j|S r   )torchr   devicer!   r   r   r   r&   /      zNPU_Accelerator.devicec                 C   s   t j| d S r   )r%   r   
set_devicer!   r   r   r   r(   2      zNPU_Accelerator.set_devicec                 C   
   t j S r   )r%   r   current_devicer   r   r   r   r+   5      
zNPU_Accelerator.current_devicec                 C   s   d tj S )Nr   )r    r%   r   r+   r   r   r   r   current_device_name8   r)   z#NPU_Accelerator.current_device_namec                 C   r*   r   )r%   r   device_countr   r   r   r   r.   ;   r,   zNPU_Accelerator.device_countc                 C   r$   r   )r%   r   synchronizer!   r   r   r   r/   >   r'   zNPU_Accelerator.synchronizec                 C   s   t jS r   )r%   randomr   r   r   r   r0   B      zNPU_Accelerator.randomc                 C   s"   |d u r
t j|S t j||S r   )r%   r   set_rng_state)r   	new_stater"   r   r   r   r2   E   s   zNPU_Accelerator.set_rng_statec                 C   s   |d u r	t j S t j|S r   )r%   r   get_rng_stater!   r   r   r   r4   K   s   
zNPU_Accelerator.get_rng_statec                 C   r$   r   )r%   r   manual_seedr   seedr   r   r   r5   Q   r'   zNPU_Accelerator.manual_seedc                 C   r$   r   )r%   r   manual_seed_allr6   r   r   r   r8   T   r'   zNPU_Accelerator.manual_seed_allc                 C   r*   r   )r%   r   initial_seedr   r   r   r   r9   W   r,   zNPU_Accelerator.initial_seedc                 C   s   t jj| S r   )r%   r   default_generatorsr!   r   r   r   default_generatorZ   r'   z!NPU_Accelerator.default_generatorc                 C      t jjS r   )r%   r   Streamr   r   r   r   r=   ^      zNPU_Accelerator.Streamc                 C   r$   r   )r%   r   stream)r   r?   r   r   r   r?   b   r'   zNPU_Accelerator.streamc                 C   r$   r   )r%   r   current_streamr!   r   r   r   r@   e   r'   zNPU_Accelerator.current_streamc                 C   r$   r   )r%   r   default_streamr!   r   r   r   rA   h   r'   zNPU_Accelerator.default_streamc                 C   r<   r   )r%   r   Eventr   r   r   r   rB   k   r>   zNPU_Accelerator.Eventc                 C   r*   r   )r%   r   empty_cacher   r   r   r   rC   p   r,   zNPU_Accelerator.empty_cachec                 C   r$   r   )r%   r   memory_allocatedr!   r   r   r   rD   s   r'   z NPU_Accelerator.memory_allocatedc                 C   r$   r   )r%   r   max_memory_allocatedr!   r   r   r   rE   v   r'   z$NPU_Accelerator.max_memory_allocatedc                 C   r$   r   )r%   r   reset_max_memory_allocatedr!   r   r   r   rF   y   r'   z*NPU_Accelerator.reset_max_memory_allocatedc                 C   r$   r   )r%   r   memory_cachedr!   r   r   r   rG   |   r'   zNPU_Accelerator.memory_cachedc                 C   r$   r   )r%   r   max_memory_cachedr!   r   r   r   rH      r'   z!NPU_Accelerator.max_memory_cachedc                 C   r$   r   )r%   r   reset_max_memory_cachedr!   r   r   r   rI      r'   z'NPU_Accelerator.reset_max_memory_cachedc                 C      t tjdrtj|S d S )Nmemory_stats)hasattrr%   r   rK   r!   r   r   r   rK         zNPU_Accelerator.memory_statsc                 C   rJ   )Nreset_peak_memory_stats)rL   r%   r   rN   r!   r   r   r   rN      rM   z'NPU_Accelerator.reset_peak_memory_statsc                 C   rJ   )Nmemory_reserved)rL   r%   r   rO   r!   r   r   r   rO      rM   zNPU_Accelerator.memory_reservedc                 C   rJ   )Nmax_memory_reserved)rL   r%   r   rP   r!   r   r   r   rP      rM   z#NPU_Accelerator.max_memory_reservedc                 C   s   t j|jS r   )r%   r   get_device_propertiestotal_memoryr!   r   r   r   rR      s   zNPU_Accelerator.total_memoryc                 C   s   |  || | S r   )rR   rD   r!   r   r   r   available_memory   s   z NPU_Accelerator.available_memoryc                 C   r*   r   )r%   r   is_bf16_supportedr   r   r   r   rT      r,   z!NPU_Accelerator.is_bf16_supportedc                 C   r   )NTr   r   r   r   r   is_fp16_supported   r   z!NPU_Accelerator.is_fp16_supportedc                 C   s   t jt jt jgS r   )r%   floathalfbfloat16r   r   r   r   supported_dtypes   r)   z NPU_Accelerator.supported_dtypesc                 C   s   t tjdr
tjjS d S )Namp)rL   r%   r   rZ   r   r   r   r   rZ      s   zNPU_Accelerator.ampc                 C   r*   r   )r%   r   is_availabler   r   r   r   r[      r,   zNPU_Accelerator.is_availablec                 C      d S r   r   )r   msgr   r   r   
range_push   r   zNPU_Accelerator.range_pushc                 C   r\   r   r   r   r   r   r   	range_pop   r   zNPU_Accelerator.range_popc                 C   r$   r   )r%   r   
_lazy_call)r   callbackr   r   r   	lazy_call   r'   zNPU_Accelerator.lazy_callc                 C      | j S r   )r   r   r   r   r   communication_backend_name   r1   z*NPU_Accelerator.communication_backend_namec                 C   r   r   r   r   r   r   r   is_triton_supported   r   z#NPU_Accelerator.is_triton_supportedc                 C   r\   r   r   r   r   r   r   create_graph   r   zNPU_Accelerator.create_graphc                 C   s   ddl m} | S )Nr   )noop_context)deepspeed.runtime.utilsrg   )r   graphpoolr?   rg   r   r   r   capture_to_graph   s   z NPU_Accelerator.capture_to_graphc                 C   r\   r   r   )r   ri   r   r   r   replay_graph   r   zNPU_Accelerator.replay_graphc                 C   r<   r   )r%   r   BFloat16Tensorr   r   r   r   rm      r>   zNPU_Accelerator.BFloat16Tensorc                 C   r<   r   )r%   r   
ByteTensorr   r   r   r   rn      r>   zNPU_Accelerator.ByteTensorc                 C   r<   r   )r%   r   DoubleTensorr   r   r   r   ro      r>   zNPU_Accelerator.DoubleTensorc                 C   r<   r   )r%   r   FloatTensorr   r   r   r   rp      r>   zNPU_Accelerator.FloatTensorc                 C   r<   r   )r%   r   
HalfTensorr   r   r   r   rq      r>   zNPU_Accelerator.HalfTensorc                 C   r<   r   )r%   r   	IntTensorr   r   r   r   rr      r>   zNPU_Accelerator.IntTensorc                 C   r<   r   )r%   r   
LongTensorr   r   r   r   rs      r>   zNPU_Accelerator.LongTensorr   c                 C      |  S r   )
pin_memory)r   tensoralign_bytesr   r   r   ru      r   zNPU_Accelerator.pin_memoryc                 C   rt   r   )	is_pinned)r   rv   r   r   r   rx      r   zNPU_Accelerator.is_pinnedc                 C   s   t |j}|drdS dS )Nznpu:TF)strr&   
startswith)r   rv   
device_strr   r   r   on_accelerator   s   

zNPU_Accelerator.on_acceleratorc                 C   s(   z	ddl m} W dS  ty   Y dS w )Nr   )__deepspeed__zop_builder.npuzdeepspeed.ops.op_builder.npu)
op_builderr}   ImportError)r   r}   r   r   r   op_builder_dir   s   zNPU_Accelerator.op_builder_dirc                 C   sD   | j rd S t|  }i | _ t|tjD ]	\}}|| j |< qd S r   )r   	importlibimport_moduler   inspect
getmembersisclass)r   op_builder_module
class_name	class_objr   r   r   _lazy_init_class_dict   s   z%NPU_Accelerator._lazy_init_class_dictc                 C   s   |  |}|d u rd S | S r   )get_op_builder)r   r   builder_classr   r   r   create_op_builder  s   
z!NPU_Accelerator.create_op_builderc                 C   s4   |    || jv r| j| S d| jv r| jd S d S )NNotImplementedBuilder)r   r   )r   r   r   r   r   r     s   

zNPU_Accelerator.get_op_builderc                 C   s   ddl m} |S )Nr   )BuildExtension)torch.utils.cpp_extensionr   )r   r   r   r   r   build_extension  s   zNPU_Accelerator.build_extensionc                 C   s   g dS )N)ASCENDHCCL
LD_LIBRARYPATHr   r   r   r   r   export_envs  r   zNPU_Accelerator.export_envsc                 C   s   dgS )NASCEND_RT_VISIBLE_DEVICESr   r   r   r   r   visible_devices_envs  r1   z$NPU_Accelerator.visible_devices_envsc                 C   s&   |   D ]}dtt|||< qd S )N,)r   joinmapry   )r   current_envlocal_accelerator_idsenvr   r   r   set_visible_devices_envs  s   z(NPU_Accelerator.set_visible_devices_envsc                 C   rc   r   )r   r   r   r   r   get_compile_backend"  r1   z#NPU_Accelerator.get_compile_backendc                 C   s<   t jjdd}||v r|| _d S t| d|   d| )Nr   )exclude_tagsz not supported by z. Supported Backends are )r%   _dynamolist_backendsr   
ValueErrorr#   )r   backendsupported_backendsr   r   r   set_compile_backend%  s   
z#NPU_Accelerator.set_compile_backendr   )NN)r   )K__name__
__module____qualname__r	   r   r   r   r   r#   r&   r(   r+   r-   r.   r/   r0   r2   r4   r5   r8   r9   r;   propertyr=   r?   r@   rA   rB   rC   rD   rE   rF   rG   rH   rI   rK   rN   rO   rP   rR   rS   rT   rU   rY   rZ   r[   r^   r_   rb   rd   re   rf   rk   rl   rm   rn   ro   rp   rq   rr   rs   ru   rx   r|   r   r   r   r   r   r   r   r   r   r   __classcell__r   r   r   r   r      s    






























	r   )r   r   abstract_acceleratorr   	torch.npur%   r   r   r   r   r   r   <module>   s   