o
    Ti9)                  
   @   s   d dl mZ zddlZW n ey Z zW Y dZ[ndZ[ww zddlZdZW n ey; Z zdZW Y dZ[ndZ[ww ddlZG dd deZdS )   )DeepSpeedAccelerator    NTFc                   @   s  e Zd Zdd Zdd Zdd Zdd Zd	d
 ZdddZdddZ	dd Z
dd Zdd Zdd ZdddZdd ZdddZdddZd d! Zd"d# Zd$d% Zd&d' Zed(d) Zd*d+ Zdd,d-Zdd.d/Zed0d1 Zd2d3 Zd4d5 Zd6d7 Zdd8d9Zdd:d;Z dd<d=Z!dd>d?Z"dd@dAZ#ddBdCZ$ddDdEZ%ddFdGZ&ddHdIZ'ddJdKZ(ddLdMZ)ddNdOZ*dPdQ Z+dRdS Z,dTdU Z-dVdW Z.dXdY Z/dZd[ Z0d\d] Z1d^d_ Z2d`da Z3dbdc Z4ddde Z5ddfdgZ6dhdi Z7edjdk Z8edldm Z9edndo Z:edpdq Z;edrds Z<edtdu Z=edvdw Z>ddydzZ?d{d| Z@d}d~ ZAdd ZBdd ZCdd ZDdd ZEdd ZFdd ZGdd ZHdd ZIdd ZJdS )CPU_Acceleratorc              
   C   sn   d| _ d| _trd| _nd| _zdd l}|  j}|| _W d S  t	y6 } z
d| _W Y d }~d S d }~ww )Ncpuinductorcclgloor   )
_name_compile_backendoneccl_imported_p_communication_backend_namepsutilProcessmemory_inforssmax_memImportError)selfr   meme r   Y/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/accelerator/cpu_accelerator.py__init__   s   zCPU_Accelerator.__init__c                 C      dS NTr   r   r   r   r   is_synchronized_device*      z&CPU_Accelerator.is_synchronized_devicec                 C      |   S Nr   r   r   r   r   use_host_timers-      zCPU_Accelerator.use_host_timersc                 C   r   r   r    r   r   r   r   resolves_data_dependency0   r"   z(CPU_Accelerator.resolves_data_dependencyc                 C   r   r   r    r   r   r   r   handles_memory_backpressure3   r"   z+CPU_Accelerator.handles_memory_backpressureNc                 C   r   Nr   r   r   device_indexr   r   r   device_name7   r   zCPU_Accelerator.device_namec                 C      d S r   r   r&   r   r   r   device:   r   zCPU_Accelerator.devicec                 C   r)   r   r   r&   r   r   r   
set_device=   r   zCPU_Accelerator.set_devicec                 C   s   t jddS )N
LOCAL_RANKr   )osenvirongetr   r   r   r   current_device@   s   zCPU_Accelerator.current_devicec                 C   r   r%   r   r   r   r   r   current_device_nameC   r   z#CPU_Accelerator.current_device_namec                 C   sn   t tjdd}|dkr|S ddlm} | }|sdS d}g }|D ]}t|dkr4||kr4|d7 }|}q"|S )N
LOCAL_SIZEr   )get_numa_coresr   )intr-   r.   r/   deepspeed.utils.numar3   len)r   device_countr3   numa_core_lists
numa_countprev_core_list	core_listr   r   r   r7   F   s   zCPU_Accelerator.device_countc                 C   r)   r   r   r&   r   r   r   synchronizeZ   r   zCPU_Accelerator.synchronizec                 C      t jS r   )torchrandomr   r   r   r   r?   ^      zCPU_Accelerator.randomc                 C   s   |d u r	t |S t ||S r   )r>   set_rng_state)r   	new_stater'   r   r   r   rA   a   s   
zCPU_Accelerator.set_rng_statec                 C      t  S r   )r>   get_rng_stater&   r   r   r   rD   f   r"   zCPU_Accelerator.get_rng_statec                 C   
   t |S r   r>   manual_seedr   seedr   r   r   rG   i      
zCPU_Accelerator.manual_seedc                 C   rE   r   rF   rH   r   r   r   manual_seed_alll   rJ   zCPU_Accelerator.manual_seed_allc                 C   rC   r   )r>   initial_seedr   r   r   r   rL   o   r"   zCPU_Accelerator.initial_seedc                 C   r=   r   )r>   default_generatorr&   r   r   r   rM   r   r@   z!CPU_Accelerator.default_generatorc                 C   r)   r   r   r   r   r   r   Streamv      zCPU_Accelerator.Streamc                 C   s   ddl m} | S Nr   )noop_contextdeepspeed.runtime.utilsrQ   )r   streamrQ   r   r   r   rT   z      zCPU_Accelerator.streamc                 C   r)   r   r   r&   r   r   r   current_stream~   r   zCPU_Accelerator.current_streamc                 C   r)   r   r   r&   r   r   r   default_stream   r   zCPU_Accelerator.default_streamc                 C   r)   r   r   r   r   r   r   Event   rO   zCPU_Accelerator.Eventc                 C   r)   r   r   r   r   r   r   empty_cache   r   zCPU_Accelerator.empty_cachec                 C   s*   dd l }|  j}|| jkr|| _|S Nr   r   r   r   r   r   r   r   r   r   r   r   get_rss   s
   
zCPU_Accelerator.get_rssc                 C   s    dd l }|  j}|| _|S rZ   r[   r\   r   r   r   	reset_rss   s   zCPU_Accelerator.reset_rssc                 C   r   r   r]   r&   r   r   r   memory_allocated   r"   z CPU_Accelerator.memory_allocatedc                 C      |    | jS r   r]   r   r&   r   r   r   max_memory_allocated      z$CPU_Accelerator.max_memory_allocatedc                 C      |    d S r   r^   r&   r   r   r   reset_max_memory_allocated      z*CPU_Accelerator.reset_max_memory_allocatedc                 C   r   r   r_   r&   r   r   r   memory_cached   r"   zCPU_Accelerator.memory_cachedc                 C   ra   r   rb   r&   r   r   r   max_memory_cached   rd   z!CPU_Accelerator.max_memory_cachedc                 C   re   r   rf   r&   r   r   r   reset_max_memory_cached   rh   z'CPU_Accelerator.reset_max_memory_cachedc                 C   s"   |   }i }||d< | j|d< |S )Nzallocated_bytes.all.currentzallocated_bytes.all.peakrb   )r   r'   r   mem_statr   r   r   memory_stats   s
   
zCPU_Accelerator.memory_statsc                 C   re   r   rf   r&   r   r   r   reset_peak_memory_stats   rh   z'CPU_Accelerator.reset_peak_memory_statsc                 C   r   r   r_   r&   r   r   r   memory_reserved   r"   zCPU_Accelerator.memory_reservedc                 C   ra   r   rb   r&   r   r   r   max_memory_reserved   rd   z#CPU_Accelerator.max_memory_reservedc                 C      dd l }| jS rZ   )r   virtual_memorytotalr   r'   r   r   r   r   total_memory      
zCPU_Accelerator.total_memoryc                 C   rq   rZ   )r   rr   	availablert   r   r   r   available_memory   rv   z CPU_Accelerator.available_memoryc                 C   s   t jjS r   )r>   r   ampr   r   r   r   ry      r"   zCPU_Accelerator.ampc                 C   r   r   r   r   r   r   r   is_available   r   zCPU_Accelerator.is_availablec                 C   r)   r   r   )r   msgr   r   r   
range_push      zCPU_Accelerator.range_pushc                 C   r)   r   r   r   r   r   r   	range_pop   r}   zCPU_Accelerator.range_popc                 C   s   | S r   r   )r   callbackr   r   r   	lazy_call   r@   zCPU_Accelerator.lazy_callc                 C      | j S r   )r   r   r   r   r   communication_backend_name   r@   z*CPU_Accelerator.communication_backend_namec                 C   r   )NFr   r   r   r   r   is_triton_supported   r   z#CPU_Accelerator.is_triton_supportedc                 C   r   r   r   r   r   r   r   is_bf16_supported   r   z!CPU_Accelerator.is_bf16_supportedc                 C   s&   zt jj r
W dS W d S    Y dS )NTF)r>   opsmkldnn_is_mkldnn_fp16_supportedr   r   r   r   is_fp16_supported   s   z!CPU_Accelerator.is_fp16_supportedc                 C   s$   t jt jg}|  r|t j |S r   )r>   floatbfloat16r   appendfloat16)r   supported_dtypesr   r   r   r      s   z CPU_Accelerator.supported_dtypesc                 C   r)   r   r   r   r   r   r   create_graph   r   zCPU_Accelerator.create_graphc                 C   s   ddl m} | S rP   rR   )r   graphpoolrT   rQ   r   r   r   capture_to_graph   rU   z CPU_Accelerator.capture_to_graphc                 C   r)   r   r   )r   r   r   r   r   replay_graph   r   zCPU_Accelerator.replay_graphc                 C   r=   r   )r>   BFloat16Tensorr   r   r   r   r         zCPU_Accelerator.BFloat16Tensorc                 C   r=   r   )r>   
ByteTensorr   r   r   r   r     r   zCPU_Accelerator.ByteTensorc                 C   r=   r   )r>   DoubleTensorr   r   r   r   r     r   zCPU_Accelerator.DoubleTensorc                 C   r=   r   )r>   FloatTensorr   r   r   r   r     r   zCPU_Accelerator.FloatTensorc                 C   r=   r   )r>   
HalfTensorr   r   r   r   r     r   zCPU_Accelerator.HalfTensorc                 C   r=   r   )r>   	IntTensorr   r   r   r   r     r   zCPU_Accelerator.IntTensorc                 C   r=   r   )r>   
LongTensorr   r   r   r   r     r   zCPU_Accelerator.LongTensorr   c                 C   s   |S r   r   )r   tensoralign_bytesr   r   r   
pin_memory  r   zCPU_Accelerator.pin_memoryc                 C   s   |  S r   )	is_pinned)r   r   r   r   r   r     r"   zCPU_Accelerator.is_pinnedc                 C   s(   z	ddl m} W dS  ty   Y dS w )Nr   __deepspeed__zop_builder.cpuzdeepspeed.ops.op_builder.cpu)
op_builderr   r   )r   r   r   r   r   op_builder_dir"  s   zCPU_Accelerator.op_builder_dirc                 C   s   t |j}|drdS dS )Nr   TF)strr*   
startswith)r   r   
device_strr   r   r   on_accelerator+  s   

zCPU_Accelerator.on_acceleratorc                 C   s   |  |}|d ur| S d S r   )get_op_builder)r   op_namebuilder_classr   r   r   create_op_builder3  s   
z!CPU_Accelerator.create_op_builderc           	      C   s   zddl m} ddlm}m}m}m}m}m} W n t	y1   ddl
m}m}m}m}m}m} Y nw |dkr8|S |dkr>|S |dkrD|S |dkrJ|S |dkrP|S |S )	Nr   r   )AsyncIOBuilderCCLCommBuilderShareMemCommBuilderFusedAdamBuilderCPUAdamBuilderNotImplementedBuilderr   r   r   r   r   )r   r   op_builder.cpur   r   r   r   r   r   r   deepspeed.ops.op_builder.cpu)	r   
class_namer   r   r   r   r   r   r   r   r   r   r   :  s"   $$zCPU_Accelerator.get_op_builderc                 C   s   ddl m} |S )Nr   )BuildExtension)torch.utils.cpp_extensionr   )r   r   r   r   r   build_extensionQ  s   zCPU_Accelerator.build_extensionc                 C   s   g S r   r   r   r   r   r   export_envsU  r   zCPU_Accelerator.export_envsc                 C   s   dgS )NCUDA_VISIBLE_DEVICESr   r   r   r   r   visible_devices_envsY  r@   z$CPU_Accelerator.visible_devices_envsc                 C   s&   |   D ]}dtt|||< qd S )N,)r   joinmapr   )r   current_envlocal_accelerator_idsenvr   r   r   set_visible_devices_envs\  s   z(CPU_Accelerator.set_visible_devices_envsc                 C   r   r   )r
   r   r   r   r   get_compile_backend`  r@   z#CPU_Accelerator.get_compile_backendc                 C   s<   t jjdd}||v r|| _d S t| d|   d| )Nr   )exclude_tagsz not supported by z. Supported Backends are )r>   _dynamolist_backendsr
   
ValueErrorr(   )r   backendsupported_backendsr   r   r   set_compile_backendc  s   
z#CPU_Accelerator.set_compile_backendr   )NN)r   )K__name__
__module____qualname__r   r   r!   r#   r$   r(   r*   r+   r0   r1   r7   r<   r?   rA   rD   rG   rK   rL   rM   propertyrN   rT   rV   rW   rX   rY   r]   r^   r`   rc   rg   ri   rj   rk   rm   rn   ro   rp   ru   rx   ry   rz   r|   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r      s    





























	r   )	abstract_acceleratorr   r>   r   r   oneccl_bindings_for_pytorchr   r-   r   r   r   r   r   <module>   s    