o
    Ti=*                     @   sX   d dl Z d dlZd dlZddlmZ zd dlZW n	 ey!   Y nw G dd deZdS )    N   )DeepSpeedAcceleratorc                   @   s  e Zd Zdd Zdd Zdd Zdd Zd	d
 ZdddZdddZ	dd Z
dd Zdd Zdd ZdddZdd ZdddZdddZd d! Zd"d# Zd$d% Zd&d' Zed(d) Zd*d+ Zdd,d-Zdd.d/Zed0d1 Zd2d3 Zdd4d5Zdd6d7Zdd8d9Zdd:d;Z dd<d=Z!dd>d?Z"dd@dAZ#ddBdCZ$ddDdEZ%ddFdGZ&ddHdIZ'ddJdKZ(dLdM Z)dNdO Z*dPdQ Z+dRdS Z,dTdU Z-dVdW Z.dXdY Z/dZd[ Z0d\d] Z1d^d_ Z2d`da Z3ddbdcZ4ddde Z5edfdg Z6edhdi Z7edjdk Z8edldm Z9edndo Z:edpdq Z;edrds Z<ddudvZ=dwdx Z>dydz Z?d{d| Z@d}d~ ZAdd ZBdd ZCdd ZDdd ZEdd ZFdd ZGdd ZHdd ZIdS )SDAA_Acceleratorc                 C   s   d| _ d| _d| _d | _d S )Nsdaatcclinductor)_name_communication_backend_name_compile_backend
class_dictself r   Z/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/accelerator/sdaa_accelerator.py__init__/   s   
zSDAA_Accelerator.__init__c                 C      dS NFr   r   r   r   r   is_synchronized_device5      z'SDAA_Accelerator.is_synchronized_devicec                 C      |   S Nr   r   r   r   r   use_host_timers8      z SDAA_Accelerator.use_host_timersc                 C   r   r   r   r   r   r   r   resolves_data_dependency;   r   z)SDAA_Accelerator.resolves_data_dependencyc                 C   r   r   r   r   r   r   r   handles_memory_backpressure>   r   z,SDAA_Accelerator.handles_memory_backpressureNc                 C   s   |d u rdS d |S )Nr   sdaa:{})formatr   device_indexr   r   r   device_nameB   s   
zSDAA_Accelerator.device_namec                 C      t j|S r   )torchr   devicer   r   r   r   r#   G      zSDAA_Accelerator.devicec                 C   s   t j| d S r   )r"   r   
set_devicer   r   r   r   r%   J      zSDAA_Accelerator.set_devicec                 C   
   t j S r   )r"   r   current_devicer   r   r   r   r(   M      
zSDAA_Accelerator.current_devicec                 C   s   d tj S )Nr   )r   r"   r   r(   r   r   r   r   current_device_nameP   r&   z$SDAA_Accelerator.current_device_namec                 C   r'   r   )r"   r   device_countr   r   r   r   r+   S   r)   zSDAA_Accelerator.device_countc                 C   r!   r   )r"   r   synchronizer   r   r   r   r,   V   r$   zSDAA_Accelerator.synchronizec                 C   s   t jS r   )r"   randomr   r   r   r   r-   Z      zSDAA_Accelerator.randomc                 C   s"   |d u r
t j|S t j||S r   )r"   r   set_rng_state)r   	new_stater   r   r   r   r/   ]   s   zSDAA_Accelerator.set_rng_statec                 C   s   |d u r	t j S t j|S r   )r"   r   get_rng_stater   r   r   r   r1   c   s   
zSDAA_Accelerator.get_rng_statec                 C   r!   r   )r"   r   manual_seedr   seedr   r   r   r2   i   r$   zSDAA_Accelerator.manual_seedc                 C   r!   r   )r"   r   manual_seed_allr3   r   r   r   r5   l   r$   z SDAA_Accelerator.manual_seed_allc                 C   r'   r   )r"   r   initial_seedr   r   r   r   r6   o   r)   zSDAA_Accelerator.initial_seedc                 C   s   t jj| S r   )r"   r   default_generatorsr   r   r   r   default_generatorr   r$   z"SDAA_Accelerator.default_generatorc                 C      t jjS r   )r"   r   Streamr   r   r   r   r:   v      zSDAA_Accelerator.Streamc                 C   r!   r   )r"   r   stream)r   r<   r   r   r   r<   z   r$   zSDAA_Accelerator.streamc                 C   r!   r   )r"   r   current_streamr   r   r   r   r=   }   r$   zSDAA_Accelerator.current_streamc                 C   r!   r   )r"   r   default_streamr   r   r   r   r>      r$   zSDAA_Accelerator.default_streamc                 C   r9   r   )r"   r   Eventr   r   r   r   r?      r;   zSDAA_Accelerator.Eventc                 C   r'   r   )r"   r   empty_cacher   r   r   r   r@      r)   zSDAA_Accelerator.empty_cachec                 C   r!   r   )r"   r   memory_allocatedr   r   r   r   rA      r$   z!SDAA_Accelerator.memory_allocatedc                 C   r!   r   )r"   r   max_memory_allocatedr   r   r   r   rB      r$   z%SDAA_Accelerator.max_memory_allocatedc                 C   r!   r   )r"   r   reset_max_memory_allocatedr   r   r   r   rC      r$   z+SDAA_Accelerator.reset_max_memory_allocatedc                 C   r!   r   )r"   r   memory_cachedr   r   r   r   rD      r$   zSDAA_Accelerator.memory_cachedc                 C   r!   r   )r"   r   max_memory_cachedr   r   r   r   rE      r$   z"SDAA_Accelerator.max_memory_cachedc                 C   r!   r   )r"   r   reset_max_memory_cachedr   r   r   r   rF      r$   z(SDAA_Accelerator.reset_max_memory_cachedc                 C      t tjdrtj|S d S )Nmemory_stats)hasattrr"   r   rH   r   r   r   r   rH         zSDAA_Accelerator.memory_statsc                 C   rG   )Nreset_peak_memory_stats)rI   r"   r   rK   r   r   r   r   rK      rJ   z(SDAA_Accelerator.reset_peak_memory_statsc                 C   rG   )Nmemory_reserved)rI   r"   r   rL   r   r   r   r   rL      rJ   z SDAA_Accelerator.memory_reservedc                 C   rG   )Nmax_memory_reserved)rI   r"   r   rM   r   r   r   r   rM      rJ   z$SDAA_Accelerator.max_memory_reservedc                 C   s   t j|jS r   )r"   r   get_device_propertiestotal_memoryr   r   r   r   rO      s   zSDAA_Accelerator.total_memoryc                 C   s   |  || | S r   )rO   rA   r   r   r   r   available_memory   s   z!SDAA_Accelerator.available_memoryc                 C   r'   r   )r"   r   is_bf16_supportedr   r   r   r   rQ      r)   z"SDAA_Accelerator.is_bf16_supportedc                 C   r   )NTr   r   r   r   r   is_fp16_supported   r   z"SDAA_Accelerator.is_fp16_supportedc                 C   s4   t jg}|  r|t j |  r|t j |S r   )r"   floatrR   appendhalfrQ   bfloat16)r   supported_dtypesr   r   r   rW      s   z!SDAA_Accelerator.supported_dtypesc                 C   s   t tjdr
tjjS d S )Namp)rI   r"   r   rX   r   r   r   r   rX      s   zSDAA_Accelerator.ampc                 C   r'   r   )r"   r   is_availabler   r   r   r   rY      r)   zSDAA_Accelerator.is_availablec                 C      d S r   r   )r   msgr   r   r   
range_push   r   zSDAA_Accelerator.range_pushc                 C   rZ   r   r   r   r   r   r   	range_pop   r   zSDAA_Accelerator.range_popc                 C   r!   r   )r"   r   
_lazy_call)r   callbackr   r   r   	lazy_call   r$   zSDAA_Accelerator.lazy_callc                 C      | j S r   )r	   r   r   r   r   communication_backend_name   r.   z+SDAA_Accelerator.communication_backend_namec                 C   r   r   r   r   r   r   r   is_triton_supported   r   z$SDAA_Accelerator.is_triton_supportedc                 C   rZ   r   r   r   r   r   r   create_graph   r   zSDAA_Accelerator.create_graphc                 C   s   ddl m} | S )Nr   )noop_context)deepspeed.runtime.utilsre   )r   graphpoolr<   re   r   r   r   capture_to_graph   s   z!SDAA_Accelerator.capture_to_graphc                 C   rZ   r   r   )r   rg   r   r   r   replay_graph   r   zSDAA_Accelerator.replay_graphc                 C      t jtjtjddS Nr   )dtyper#   )	functoolspartialr"   tensorrV   r   r   r   r   BFloat16Tensor      zSDAA_Accelerator.BFloat16Tensorc                 C   rk   rl   )rn   ro   r"   rp   uint8r   r   r   r   
ByteTensor   rr   zSDAA_Accelerator.ByteTensorc                 C   rk   rl   )rn   ro   r"   rp   doubler   r   r   r   DoubleTensor   rr   zSDAA_Accelerator.DoubleTensorc                 C   rk   rl   )rn   ro   r"   rp   rS   r   r   r   r   FloatTensor   rr   zSDAA_Accelerator.FloatTensorc                 C   rk   rl   )rn   ro   r"   rp   rU   r   r   r   r   
HalfTensor   rr   zSDAA_Accelerator.HalfTensorc                 C   rk   rl   )rn   ro   r"   rp   intr   r   r   r   	IntTensor   rr   zSDAA_Accelerator.IntTensorc                 C   rk   rl   )rn   ro   r"   rp   longr   r   r   r   
LongTensor   rr   zSDAA_Accelerator.LongTensorr   c                 C      |  S r   )
pin_memory)r   rp   align_bytesr   r   r   r~     r   zSDAA_Accelerator.pin_memoryc                 C   r}   r   )	is_pinned)r   rp   r   r   r   r     r   zSDAA_Accelerator.is_pinnedc                 C   s   t |j}|drdS dS )Nzsdaa:TF)strr#   
startswith)r   rp   
device_strr   r   r   on_accelerator	  s   

zSDAA_Accelerator.on_acceleratorc                 C   s(   z	ddl m} W dS  ty   Y dS w )Nr   )__deepspeed__zop_builder.sdaazdeepspeed.ops.op_builder.sdaa)
op_builderr   ImportError)r   r   r   r   r   op_builder_dir  s   zSDAA_Accelerator.op_builder_dirc                 C   sD   | j rd S t|  }i | _ t|tjD ]	\}}|| j |< qd S r   )r   	importlibimport_moduler   inspect
getmembersisclass)r   op_builder_module
class_name	class_objr   r   r   _lazy_init_class_dict  s   z&SDAA_Accelerator._lazy_init_class_dictc                 C   s   |  |}| S r   )get_op_builder)r   r   builder_classr   r   r   create_op_builder%  s   
z"SDAA_Accelerator.create_op_builderc                 C   s&   |    || jv r| j| S | jd S )NNotImplementedBuilder)r   r   )r   r   r   r   r   r   *  s   


zSDAA_Accelerator.get_op_builderc                 C   s   ddl m} |S )Nr   )BuildExtension)torch.utils.cpp_extensionr   )r   r   r   r   r   build_extension1  s   z SDAA_Accelerator.build_extensionc                 C   s   g dS )N)NCCL
LD_LIBRARYPATHr   r   r   r   r   export_envs5  r   zSDAA_Accelerator.export_envsc                 C   s   dgS )NSDAA_VISIBLE_DEVICESr   r   r   r   r   visible_devices_envs8  r.   z%SDAA_Accelerator.visible_devices_envsc                 C   s&   |   D ]}dtt|||< qd S )N,)r   joinmapr   )r   current_envlocal_accelerator_idsenvr   r   r   set_visible_devices_envs;  s   z)SDAA_Accelerator.set_visible_devices_envsc                 C   ra   r   )r
   r   r   r   r   get_compile_backend?  r.   z$SDAA_Accelerator.get_compile_backendc                 C   s<   t jjdd}||v r|| _d S t| d|   d| )Nr   )exclude_tagsz not supported by z. Supported Backends are )r"   _dynamolist_backendsr
   
ValueErrorr    )r   backendsupported_backendsr   r   r   set_compile_backendB  s   
z$SDAA_Accelerator.set_compile_backendr   )NN)r   )J__name__
__module____qualname__r   r   r   r   r   r    r#   r%   r(   r*   r+   r,   r-   r/   r1   r2   r5   r6   r8   propertyr:   r<   r=   r>   r?   r@   rA   rB   rC   rD   rE   rF   rH   rK   rL   rM   rO   rP   rQ   rR   rW   rX   rY   r\   r]   r`   rb   rc   rd   ri   rj   rq   rt   rv   rw   rx   rz   r|   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   -   s    




















	








	r   )	r   r   rn   abstract_acceleratorr   
torch.sdaar"   r   r   r   r   r   r   <module>   s   