o
    Ti*                  
   @   s   d dl Z d dlmZ d dlZd dlZd dlZzd dlZdZW n ey2 Z	 zdZW Y dZ	[	ndZ	[	ww zd dl
ZdZW n eyO Z	 zdZW Y dZ	[	ndZ	[	ww G dd deZdS )    N)DeepSpeedAcceleratorTFc                   @   s  e Zd Zdd Zdd Zdd Zdd Zd	d
 ZdddZdddZ	dd Z
dd Zdd Zdd ZdddZdd ZdddZdddZd d! Zd"d# Zd$d% Zd&d' Zed(d) Zd*d+ Zdd,d-Zdd.d/Zed0d1 Zd2d3 Zdd4d5Zdd6d7Zdd8d9Zdd:d;Z dd<d=Z!dd>d?Z"dd@dAZ#ddBdCZ$ddDdEZ%ddFdGZ&ddHdIZ'ddJdKZ(dLdM Z)dNdO Z*dPdQ Z+dRdS Z,dTdU Z-dVdW Z.dXdY Z/dZd[ Z0dd\d]Z1d^d_ Z2d`da Z3dbdc Z4ddde Z5edfdg Z6edhdi Z7edjdk Z8edldm Z9edndo Z:edpdq Z;edrds Z<ddudvZ=dwdx Z>dydz Z?d{d| Z@d}d~ ZAdd ZBdd ZCdd ZDdd ZEdd ZFdd ZGdd ZHdd ZIdS )XPU_Acceleratorc                 C   s.   d| _ tr	d| _nd| _d| _g | _d | _d S )Nxpucclxcclinductor)_nameoneccl_imported_p_communication_backend_name_compile_backendaligned_tensors
class_dictself r   Y/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/accelerator/xpu_accelerator.py__init__   s   
zXPU_Accelerator.__init__c                 C      dS NFr   r   r   r   r   is_synchronized_device&      z&XPU_Accelerator.is_synchronized_devicec                 C   s"   t s|  S tjdk rdS |  S )Nz2.6T)ipex_imported_pr   ipex__version__r   r   r   r   use_host_timers)   s
   
zXPU_Accelerator.use_host_timersc                 C      |   S Nr   r   r   r   r   resolves_data_dependency3      z(XPU_Accelerator.resolves_data_dependencyc                 C   r   r   r   r   r   r   r   handles_memory_backpressure6   r   z+XPU_Accelerator.handles_memory_backpressureNc                 C   s   |d krdS d |S )Nr   xpu:{})formatr   device_indexr   r   r   device_name:   s   
zXPU_Accelerator.device_namec                 C      t j|S r   )torchr   devicer#   r   r   r   r(   ?      zXPU_Accelerator.devicec                 C   s   t j| d S r   )r'   r   
set_devicer#   r   r   r   r*   B      zXPU_Accelerator.set_devicec                 C   
   t j S r   )r'   r   current_devicer   r   r   r   r-   E      
zXPU_Accelerator.current_devicec                 C   s   d tj S )Nr!   )r"   r'   r   r-   r   r   r   r   current_device_nameH   r+   z#XPU_Accelerator.current_device_namec                 C   r,   r   )r'   r   device_countr   r   r   r   r0   K   r.   zXPU_Accelerator.device_countc                 C   r&   r   )r'   r   synchronizer#   r   r   r   r1   N   r)   zXPU_Accelerator.synchronizec                 C      t jjS r   )r'   r   randomr   r   r   r   r3   R   r   zXPU_Accelerator.randomc                 C   s"   |d kr
t j|S t j||S r   )r'   r   set_rng_state)r   	new_stater$   r   r   r   r4   U   s   zXPU_Accelerator.set_rng_statec                 C   s   |d kr	t j S t j|S r   )r'   r   get_rng_stater#   r   r   r   r6   Z   s   
zXPU_Accelerator.get_rng_statec                 C   r&   r   )r'   r   manual_seedr   seedr   r   r   r7   _   r)   zXPU_Accelerator.manual_seedc                 C   r&   r   )r'   r   manual_seed_allr8   r   r   r   r:   b   r)   zXPU_Accelerator.manual_seed_allc                 C   r,   r   )r'   r   initial_seedr   r   r   r   r;   e   r.   zXPU_Accelerator.initial_seedc                 C   s   t jj| S r   )r'   r   default_generatorsr#   r   r   r   default_generatorh   r)   z!XPU_Accelerator.default_generatorc                 C   r2   r   )r'   r   Streamr   r   r   r   r>   l      zXPU_Accelerator.Streamc                 C   r&   r   )r'   r   stream)r   r@   r   r   r   r@   p   r)   zXPU_Accelerator.streamc                 C   r&   r   r'   r   current_streamr#   r   r   r   rB   s   r)   zXPU_Accelerator.current_streamc                 C   r&   r   rA   r#   r   r   r   default_streamv   s   zXPU_Accelerator.default_streamc                 C   r2   r   )r'   r   Eventr   r   r   r   rD   |   r?   zXPU_Accelerator.Eventc                 C   r,   r   )r'   r   empty_cacher   r   r   r   rE      r.   zXPU_Accelerator.empty_cachec                 C   r&   r   )r'   r   memory_allocatedr#   r   r   r   rF      r)   z XPU_Accelerator.memory_allocatedc                 C   r&   r   )r'   r   max_memory_allocatedr#   r   r   r   rG      r)   z$XPU_Accelerator.max_memory_allocatedc                 C   r&   r   )r'   r   reset_max_memory_allocatedr#   r   r   r   rH      r)   z*XPU_Accelerator.reset_max_memory_allocatedc                 C   r&   r   r'   r   memory_reservedr#   r   r   r   memory_cached   r)   zXPU_Accelerator.memory_cachedc                 C   r&   r   r'   r   max_memory_reservedr#   r   r   r   max_memory_cached   r)   z!XPU_Accelerator.max_memory_cachedc                 C   r&   r   )r'   r   reset_max_memory_reservedr#   r   r   r   reset_max_memory_cached   r)   z'XPU_Accelerator.reset_max_memory_cachedc                 C   r&   r   )r'   r   memory_statsr#   r   r   r   rQ      r)   zXPU_Accelerator.memory_statsc                 C   r&   r   )r'   r   reset_peak_memory_statsr#   r   r   r   rR      r)   z'XPU_Accelerator.reset_peak_memory_statsc                 C   r&   r   rI   r#   r   r   r   rJ      r)   zXPU_Accelerator.memory_reservedc                 C   r&   r   rL   r#   r   r   r   rM      r)   z#XPU_Accelerator.max_memory_reservedc                 C   s   t j|jS r   )r'   r   get_device_propertiestotal_memoryr#   r   r   r   rT      s   zXPU_Accelerator.total_memoryc                 C   s   |  || | S r   )rT   rF   r#   r   r   r   available_memory   s   z XPU_Accelerator.available_memoryc                 C   r2   r   )r'   r   ampr   r   r   r   rV      r   zXPU_Accelerator.ampc                 C   r,   r   )r'   r   is_availabler   r   r   r   rW      r.   zXPU_Accelerator.is_availablec                 C      d S r   r   )r   msgr   r   r   
range_push      zXPU_Accelerator.range_pushc                 C   rX   r   r   r   r   r   r   	range_pop   r[   zXPU_Accelerator.range_popc                 C   s&   t tjdrtj|S tjj|S )N
_lazy_call)hasattrr'   r   r]   	lazy_init)r   callbackr   r   r   	lazy_call   s   zXPU_Accelerator.lazy_callc                 C      | j S r   )r
   r   r   r   r   communication_backend_name      z*XPU_Accelerator.communication_backend_namec                 C   r   r   r   r   r   r   r   is_triton_supported   r   z#XPU_Accelerator.is_triton_supportedc                 C   rX   r   r   r   r   r   r   create_graph   r   zXPU_Accelerator.create_graphc                 C   s   ddl m} | S )Nr   )noop_context)deepspeed.runtime.utilsrg   )r   graphpoolr@   rg   r   r   r   capture_to_graph   s   z XPU_Accelerator.capture_to_graphc                 C   rX   r   r   )r   ri   r   r   r   replay_graph   r   zXPU_Accelerator.replay_graphc                 C   r   NTr   r   r   r   r   is_bf16_supported   r   z!XPU_Accelerator.is_bf16_supportedc                 C   r   rm   r   r   r   r   r   is_fp16_supported   r   z!XPU_Accelerator.is_fp16_supportedc                 C   s   t jt jt jgS r   )r'   floathalfbfloat16r   r   r   r   supported_dtypes   r+   z XPU_Accelerator.supported_dtypesc                 C      t jtjtj| jdS N)dtyper(   )	functoolspartialr'   tensorrr   r   r   r   r   r   BFloat16Tensor      zXPU_Accelerator.BFloat16Tensorc                 C   rt   ru   )rw   rx   r'   ry   uint8r   r   r   r   r   
ByteTensor   r{   zXPU_Accelerator.ByteTensorc                 C   rt   ru   )rw   rx   r'   ry   doubler   r   r   r   r   DoubleTensor   r{   zXPU_Accelerator.DoubleTensorc                 C   rt   ru   )rw   rx   r'   ry   rp   r   r   r   r   r   FloatTensor   r{   zXPU_Accelerator.FloatTensorc                 C   rt   ru   )rw   rx   r'   ry   rq   r   r   r   r   r   
HalfTensor   r{   zXPU_Accelerator.HalfTensorc                 C   rt   ru   )rw   rx   r'   ry   intr   r   r   r   r   	IntTensor   r{   zXPU_Accelerator.IntTensorc                 C   rt   ru   )rw   rx   r'   ry   longr   r   r   r   r   
LongTensor   r{   zXPU_Accelerator.LongTensor   c                 C   s   |dkr|j |  dS |dkrGddlm} |  ddddd| _| j| |}|d |  |}| j	
| |d  g |S d S )	Nr   r(   r   )AsyncIOBuilderi      F)
pin_memoryr/   deepspeed.ops.op_builder.xpur   load
aio_handlenew_cpu_locked_tensornumelcopy_r   appenddata_ptr)r   ry   align_bytesr   	aligned_tr   r   r   r      s   zXPU_Accelerator.pin_memoryc                 C   sF   |j |  dr
dS | jD ]\}}|| kr | |kr  dS qdS )Nr   TF)	is_pinnedr/   r   r   )r   ry   beginendr   r   r   r     s   zXPU_Accelerator.is_pinnedc                 C   s(   z	ddl m} W dS  ty   Y dS w )Nr   )__deepspeed__zop_builder.xpuzdeepspeed.ops.op_builder.xpu)
op_builderr   ImportError)r   r   r   r   r   op_builder_dir  s   zXPU_Accelerator.op_builder_dirc                 C   s   t |j}|drdS dS )Nzxpu:TF)strr(   
startswith)r   ry   
device_strr   r   r   on_accelerator  s   

zXPU_Accelerator.on_acceleratorc                 C   sD   | j rd S t|  }i | _ t|tjD ]	\}}|| j |< qd S r   )r   	importlibimport_moduler   inspect
getmembersisclass)r   op_builder_module
class_name	class_objr   r   r   _lazy_init_class_dict  s   z%XPU_Accelerator._lazy_init_class_dictc                 C   s   |  |}| S r   )get_op_builder)r   r   builder_classr   r   r   create_op_builder(  s   
z!XPU_Accelerator.create_op_builderc                 C   s&   |    || jv r| j| S | jd S )NNotImplementedBuilder)r   r   )r   r   r   r   r   r   -  s   


zXPU_Accelerator.get_op_builderc                 C   sH   t rz	ddlm} W |S  ty   ddlm} Y |S w ddlm} |S )Nr   )DpcppBuildExtension)r   -intel_extension_for_pytorch.xpu.cpp_extensionr   r   %intel_extension_for_pytorch.xpu.utilstorch.utils.cpp_extension)r   r   r   r   r   build_extension4  s   zXPU_Accelerator.build_extensionc                 C   s   g S r   r   r   r   r   r   export_envs>  r   zXPU_Accelerator.export_envsc                 C   s   dgS )NZE_AFFINITY_MASKr   r   r   r   r   visible_devices_envsA  rd   z$XPU_Accelerator.visible_devices_envsc                 C   s&   |   D ]}dtt|||< qd S )N,)r   joinmapr   )r   current_envlocal_accelerator_idsenvr   r   r   set_visible_devices_envsD  s   z(XPU_Accelerator.set_visible_devices_envsc                 C   rb   r   )r   r   r   r   r   get_compile_backendH  rd   z#XPU_Accelerator.get_compile_backendc                 C   s<   t jjdd}||v r|| _d S t| d|   d| )Nr   )exclude_tagsz not supported by z. Supported Backends are )r'   _dynamolist_backendsr   
ValueErrorr%   )r   backendsupported_backendsr   r   r   set_compile_backendK  s   
z#XPU_Accelerator.set_compile_backendr   )NN)r   )J__name__
__module____qualname__r   r   r   r   r    r%   r(   r*   r-   r/   r0   r1   r3   r4   r6   r7   r:   r;   r=   propertyr>   r@   rB   rC   rD   rE   rF   rG   rH   rK   rN   rP   rQ   rR   rJ   rM   rT   rU   rV   rW   rZ   r\   ra   rc   re   rf   rk   rl   rn   ro   rs   rz   r}   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r      s    






























		
r   )r'   *deepspeed.accelerator.abstract_acceleratorr   rw   r   r   oneccl_bindings_for_pytorchr	   r   eintel_extension_for_pytorchr   r   r   r   r   r   r   <module>   s(   