o
    Ti*                     @   sH   d dl Z d dlZd dlZd dlZd dlZddlmZ G dd deZdS )    N   )DeepSpeedAcceleratorc                   @   s  e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd ZdddZ	dddZ
dd Zdd Zdd Zdd ZdddZdd ZdddZdd d!Zd"d# Zd$d% Zd&d' Zd(d) Zed*d+ Zd,d- Zdd.d/Zdd0d1Zed2d3 Zd4d5 Zdd6d7Zdd8d9Zdd:d;Z dd<d=Z!dd>d?Z"dd@dAZ#ddBdCZ$ddDdEZ%ddFdGZ&ddHdIZ'ddJdKZ(ddLdMZ)dNdO Z*dPdQ Z+dRdS Z,dTdU Z-dVdW Z.dXdY Z/dZd[ Z0d\d] Z1d^d_ Z2d`da Z3dbdc Z4ddddeZ5dfdg Z6edhdi Z7edjdk Z8edldm Z9edndo Z:edpdq Z;edrds Z<edtdu Z=ddwdxZ>dydz Z?d{d| Z@d}d~ ZAdZBdd ZCdd ZDdd ZEdd ZFdd ZGdd ZHdd ZIdd ZJdd ZKdS )HPU_Acceleratorc              
   C   sv   d| _ d| _d| _|   zdd lm  m} || _td dtjj	_
W n ty5 } ztdd }~ww d | _d S )Nhpuhcclhpu_backendr   TFz\HPU_Accelerator requires habana_frameworks.torch.hpu, which is not installed on this system.)_name_communication_backend_name_compile_backendapply_hpu_workaroundshabana_frameworks.torch.hputorchr   use_deterministic_algorithmsutilsdeterministicfill_uninitialized_memoryImportError
ValueErrorfp16_supported)selfr   e r   Y/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/accelerator/hpu_accelerator.py__init__   s    

zHPU_Accelerator.__init__c                 C   s    dd }|dd |dd d S )Nc                 S   s    | t j vr|t j| < d S d S N)osenvironkeys)keyvaluer   r   r   update_wa_env_var$   s   z@HPU_Accelerator.apply_hpu_workarounds.<locals>.update_wa_env_varPT_HPU_LAZY_ACC_PAR_MODE0#PT_HPU_ENABLE_REFINE_DYNAMIC_SHAPESr   )r   r    r   r   r   r   "   s   
z%HPU_Accelerator.apply_hpu_workaroundsc                 C      dS NFr   r   r   r   r   is_synchronized_device,      z&HPU_Accelerator.is_synchronized_devicec                 C   r$   r%   r   r&   r   r   r   use_host_timers/   r(   zHPU_Accelerator.use_host_timersc                 C   r$   NTr   r&   r   r   r   resolves_data_dependency2   r(   z(HPU_Accelerator.resolves_data_dependencyc                 C   r$   r*   r   r&   r   r   r   handles_memory_backpressure5   r(   z+HPU_Accelerator.handles_memory_backpressureNc                 C   r$   )Nr   r   r   device_indexr   r   r   device_name8   s   zHPU_Accelerator.device_namec                 C   s   t | |S r   )r   devicer/   r-   r   r   r   r0   <      zHPU_Accelerator.devicec                 C      | j | d S r   )r   
set_devicer-   r   r   r   r3   ?   r1   zHPU_Accelerator.set_devicec                 C   
   | j  S r   )r   current_devicer&   r   r   r   r5   B      
zHPU_Accelerator.current_devicec                 C   s   d |  S )Nzhpu:{})formatr5   r&   r   r   r   current_device_nameE      z#HPU_Accelerator.current_device_namec                 C   r4   r   )r   device_countr&   r   r   r   r:   H   r6   zHPU_Accelerator.device_countc                 C   r4   r   )r   synchronizer-   r   r   r   r;   K   r6   zHPU_Accelerator.synchronizec                 C   s   t jS r   )r   randomr&   r   r   r   r<   O      zHPU_Accelerator.randomc                 C      | j j| d S r   )r   r<   set_rng_state)r   	new_stater.   r   r   r   r?   R      zHPU_Accelerator.set_rng_statec                 C      | j j S r   )r   r<   get_rng_stater-   r   r   r   rC   U      zHPU_Accelerator.get_rng_statec                 C   s   | j j|S r   )r   r<   manual_seedr   seedr   r   r   rE   X   r9   zHPU_Accelerator.manual_seedc                 C   r>   r   )r   r<   manual_seed_allrF   r   r   r   rH   [   rA   zHPU_Accelerator.manual_seed_allc                 C   rB   r   )r   r<   initial_seedr&   r   r   r   rI   ^   rD   zHPU_Accelerator.initial_seedc                 C   s   | j jj| S r   )r   r<   default_generatorsr-   r   r   r   default_generatora   r9   z!HPU_Accelerator.default_generatorc                 C   s   | j jS r   )r   Streamr&   r   r   r   rL   e   s   zHPU_Accelerator.Streamc                 C      | j |S r   )r   stream)r   rN   r   r   r   rN   i   rD   zHPU_Accelerator.streamc                 C   r4   r   )r   current_streamr-   r   r   r   rO   l   r6   zHPU_Accelerator.current_streamc                 C   r4   r   )r   default_streamr-   r   r   r   rP   o   r6   zHPU_Accelerator.default_streamc                 C   s   dd l m  m} |jjS Nr   )habana_frameworks.torch.corer   corer   Event)r   htcorer   r   r   rT   r   s   zHPU_Accelerator.Eventc                 C      d S r   r   r&   r   r   r   empty_cachex   r(   zHPU_Accelerator.empty_cachec                 C   r4   r   )r   memory_allocatedr-   r   r   r   rX   {   r6   z HPU_Accelerator.memory_allocatedc                 C   r4   r   )r   max_memory_allocatedr-   r   r   r   rY   ~   r6   z$HPU_Accelerator.max_memory_allocatedc                 C   r4   r   )r   reset_max_memory_allocatedr-   r   r   r   rZ      r6   z*HPU_Accelerator.reset_max_memory_allocatedc                 C   rM   r   )r   memory_cachedr-   r   r   r   r[      rD   zHPU_Accelerator.memory_cachedc                 C   rM   r   )r   max_memory_cachedr-   r   r   r   r\      rD   z!HPU_Accelerator.max_memory_cachedc                 C   rV   r   r   r-   r   r   r   reset_max_memory_cached   r(   z'HPU_Accelerator.reset_max_memory_cachedc                 C   rM   r   )r   memory_statsr-   r   r   r   r^      rD   zHPU_Accelerator.memory_statsc                 C   r2   r   )r   reset_peak_memory_statsr-   r   r   r   r_      r1   z'HPU_Accelerator.reset_peak_memory_statsc                 C   rM   r   )r   memory_reservedr-   r   r   r   r`      rD   zHPU_Accelerator.memory_reservedc                 C   rM   r   )r   max_memory_reservedr-   r   r   r   ra      rD   z#HPU_Accelerator.max_memory_reservedc                 C   s   |  |d S )NLimit)r^   r-   r   r   r   total_memory   r9   zHPU_Accelerator.total_memoryc                 C   s   |  || | S r   )rc   rX   r-   r   r   r   available_memory   s   z HPU_Accelerator.available_memoryc                 C   r$   r*   r   r&   r   r   r   is_bf16_supported   r(   z!HPU_Accelerator.is_bf16_supportedc                 C   s2   | j d u rdd lm  m  m} | | _ | j S rQ   )r   *habana_frameworks.torch.utils.experimentalr   r   experimental_is_fp16_supported)r   htexpr   r   r   is_fp16_supported   s   

z!HPU_Accelerator.is_fp16_supportedc                 C   s$   t jt jg}|  r|t j |S r   )r   floatbfloat16rj   appendhalf)r   supported_dtypesr   r   r   ro      s   z HPU_Accelerator.supported_dtypesc                 C   rV   r   r   r&   r   r   r   amp   r(   zHPU_Accelerator.ampc                 C   r4   r   )r   is_availabler&   r   r   r   rq      r6   zHPU_Accelerator.is_availablec                 C   rV   r   r   )r   msgr   r   r   
range_push   r(   zHPU_Accelerator.range_pushc                 C   rV   r   r   r&   r   r   r   	range_pop   r(   zHPU_Accelerator.range_popc                 C   s
   |  d S r   r   )r   callbackr   r   r   	lazy_call   r6   zHPU_Accelerator.lazy_callc                 C      | j S r   )r	   r&   r   r   r   communication_backend_name   r=   z*HPU_Accelerator.communication_backend_namec                 C   r$   r%   r   r&   r   r   r   is_triton_supported   r(   z#HPU_Accelerator.is_triton_supportedc                 C   r4   r   )r   HPUGraphr&   r   r   r   create_graph   r6   zHPU_Accelerator.create_graphc                 C   s   | j j||dS )N)rN   )r   graph)r   r|   poolrN   r   r   r   capture_to_graph   r1   z HPU_Accelerator.capture_to_graphc                 C   s   |   d S r   )replay)r   r|   r   r   r   replay_graph   s   zHPU_Accelerator.replay_graphc                 C      t jtjtjddS Nr   )dtyper0   )	functoolspartialr   tensorrl   r&   r   r   r   BFloat16Tensor      zHPU_Accelerator.BFloat16Tensorc                 C   r   r   )r   r   r   r   uint8r&   r   r   r   
ByteTensor   r   zHPU_Accelerator.ByteTensorc                 C   r   r   )r   r   r   r   doubler&   r   r   r   DoubleTensor   r   zHPU_Accelerator.DoubleTensorc                 C   r   r   )r   r   r   r   rk   r&   r   r   r   FloatTensor   r   zHPU_Accelerator.FloatTensorc                 C   r   r   )r   r   r   r   rn   r&   r   r   r   
HalfTensor   r   zHPU_Accelerator.HalfTensorc                 C   r   r   )r   r   r   r   intr&   r   r   r   	IntTensor   r   zHPU_Accelerator.IntTensorc                 C   r   r   )r   r   r   r   longr&   r   r   r   
LongTensor   r   zHPU_Accelerator.LongTensorr   c                 C   s   | |  S r   )
pin_memoryr0   )r   r   align_bytesr   r   r   r      r9   zHPU_Accelerator.pin_memoryc                 C   s   |  S r   )	is_pinned)r   r   r   r   r   r      s   zHPU_Accelerator.is_pinnedc                 C   s   t |j}|drdS dS )Nzhpu:TF)strr0   
startswith)r   r   
device_strr   r   r   on_accelerator   s   

zHPU_Accelerator.on_acceleratorc                 C   s(   z	ddl m} W dS  ty   Y dS w )Nr   )__deepspeed__zop_builder.hpuzdeepspeed.ops.op_builder.hpu)
op_builderr   r   )r   r   r   r   r   op_builder_dir   s   zHPU_Accelerator.op_builder_dirc                 C   s   | j d urd S i | _ |  }t|}tj|j}t	|gD ]F\}}}|dkrf|dkrftj
tj||sftd||}| D ] }|dre|dkre|dkre|dkre|| j vret||| j |< qEq d S )Nall_opsbuilderz{}.{}Builder	OpBuilderCPUOpBuilderTorchCPUOpBuilder)
class_dictr   	importlibimport_moduler   pathdirname__file__pkgutiliter_modulesisdirjoinr7   __dir__endswithgetattr)r   r   op_builder_moduleop_builder_absolute_path_module_namemodulemember_namer   r   r   _lazy_init_class_dict  s(   


z%HPU_Accelerator._lazy_init_class_dictc                 C   s"   |    || jv r| j|  S d S r   r   r   r   
class_namer   r   r   create_op_builder!  s   
z!HPU_Accelerator.create_op_builderc                 C   s4   |    || jv r| j| S d| jv r| jd S d S )NNotImplementedBuilderr   r   r   r   r   get_op_builder)  s   

zHPU_Accelerator.get_op_builderc                 C   s   ddl m} |S )Nr   )BuildExtension)torch.utils.cpp_extensionr   )r   r   r   r   r   build_extension0  s   zHPU_Accelerator.build_extensionc                 C   s   g S r   r   r&   r   r   r   export_envs4  r(   zHPU_Accelerator.export_envsc                 C   s   dgS )NCUDA_VISIBLE_DEVICESr   r&   r   r   r   visible_devices_envs7  s   z$HPU_Accelerator.visible_devices_envsc                 C   s&   |   D ]}dtt|||< qd S )N,)r   r   mapr   )r   current_envlocal_accelerator_idsenvr   r   r   set_visible_devices_envs>  s   z(HPU_Accelerator.set_visible_devices_envsc                 C   rw   r   )r
   r&   r   r   r   get_compile_backendB  r=   z#HPU_Accelerator.get_compile_backendc                 C   s<   t jjdd}||v r|| _d S t| d|   d| )Nr   )exclude_tagsz not supported by z. Supported Backends are )r   _dynamolist_backendsr
   r   r/   )r   backendsupported_backendsr   r   r   set_compile_backendE  s   
z#HPU_Accelerator.set_compile_backendr   )NN)r   )L__name__
__module____qualname__r   r   r'   r)   r+   r,   r/   r0   r3   r5   r8   r:   r;   r<   r?   rC   rE   rH   rI   rK   propertyrL   rN   rO   rP   rT   rW   rX   rY   rZ   r[   r\   r]   r^   r_   r`   ra   rc   rd   re   rj   ro   rp   rq   rs   rt   rv   rx   ry   r{   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r      s    






























r   )r   r   r   r   r   abstract_acceleratorr   r   r   r   r   r   <module>   s   