o
    Ti)                     @   s   d dl Z d dlmZ d dlmZ d dlmZ dZdZdZ	dZ
d	Zd
ZdZdZdZdZdZzd dlZdZW n ey?   dZY nw G dd deZG dd dZG dd dZG dd dZdd ZdS )    N)mean)log_dist)get_acceleratorfwd_microstepfwdbwd_microstepbwdbwd_inner_microstep	bwd_innerbwd_allreduce_microstepbwd_allreducestep_microstepstepgư>TFc                   @   s.   e Zd Zde jde jfddZdd ZdS )CudaEventTimerstart_event	end_eventc                 C   s   || _ || _d S N)r   r   )selfr   r    r   I/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/utils/timer.py__init__"   s   
zCudaEventTimer.__init__c                 C   s*   t   | j | j  | j| jS r   )r   current_stream
wait_eventr   synchronizer   elapsed_timer   r   r   r   get_elapsed_msec&   s   
zCudaEventTimer.get_elapsed_msecN)__name__
__module____qualname__r   Eventr   r   r   r   r   r   r       s    r   c                   @   sV   e Zd ZdZG dd dZdd Zdd Zdd	 Zed
d Z	dddZ
dddZdS )SynchronizedWallClockTimerz3Group of timers. Borrowed from Nvidia Megatron codec                   @   sL   e Zd ZdZdd Zdd ZdddZd	d
 Zdd ZdddZ	dd Z
dS )z SynchronizedWallClockTimer.TimerzTimer.c                 C   s:   || _ d| _g | _t  | _d | _d | _d| _d| _	d S )NF        )
name_started_event_timersr   use_host_timersuse_host_timerr   elapsed_records
start_timeend_timer   namer   r   r   r   2   s   
z)SynchronizedWallClockTimer.Timer.__init__c                 C   sP   | j rJ | j d| jrt | _nt j}|dd| _| j  d| _ dS )zStart the timer.z timer has already been startedTenable_timingN)	r$   r#   r'   timer)   r   r    r   record)r   event_classr   r   r   start<   s   

z&SynchronizedWallClockTimer.Timer.startFc                 C   sz   | j sJ dt j}| jrt | _| j| j| j  nt j}|dd}|	  | jt
| j| d| _d| _ dS )zStop the timer.ztimer is not startedTr-   NF)r$   r   r    r'   r/   r*   r%   appendr)   r0   r   r   )r   resetr0   r1   r   r   r   r   stopG   s   


z%SynchronizedWallClockTimer.Timer.stopc                 C   s@   | j rdd | jD | _n	dd | jD | _| j  t| jS )Nc                 S   s   g | ]}|d  qS )     @@r   .0etr   r   r   
<listcomp>X       zFSynchronizedWallClockTimer.Timer._get_elapsed_msec.<locals>.<listcomp>c                 S   s   g | ]}|  qS r   )r   r7   r   r   r   r:   Z   r;   )r'   r%   r(   clearsumr   r   r   r   _get_elapsed_msecV   s
   

z2SynchronizedWallClockTimer.Timer._get_elapsed_msecc                 C   s    d| _ d| _d| _| j  dS )zReset timer.FN)r$   r   r(   r%   r<   r   r   r   r   r4   ^   s   z&SynchronizedWallClockTimer.Timer.resetTc                 C   s8   | j }| j r
|   |  }|r|   |r|   |S )zCalculate the elapsed time.)r$   r5   r>   r4   r2   )r   r4   r$   elapsed_r   r   r   elapsede   s   z(SynchronizedWallClockTimer.Timer.elapsedc                 C   s   | j dd t| jdS )NFr4   g?)r@   	trim_meanr(   r   r   r   r   r   u   s   z%SynchronizedWallClockTimer.Timer.meanN)FF)T)r   r   r   __doc__r   r2   r5   r>   r4   r@   r   r   r   r   r   Timer/   s    


rD   c                 C   s
   i | _ d S r   timersr   r   r   r   r   y      
z#SynchronizedWallClockTimer.__init__c                 C      | j S r   rE   r   r   r   r   
get_timers|      z%SynchronizedWallClockTimer.get_timersc                 C   s$   || j vr| || j |< | j | S r   )rF   rD   r+   r   r   r   __call__   s   

z#SynchronizedWallClockTimer.__call__c                  C   s`   d t  d } d t  d }d t  d }d t  d }d | |||S )Nzmem_allocated: {:.4f} GB   @zmax_mem_allocated: {:.4f} GBzcache_allocated: {:.4f} GBzmax_cache_allocated: {:.4f} GBz | {} | {} | {} | {})formatr   memory_allocatedmax_memory_allocatedmemory_cachedmax_memory_cached)alloc	max_alloccache	max_cacher   r   r   memory_usage   s   z'SynchronizedWallClockTimer.memory_usage      ?TFNc           	      C   s`   |dksJ d}|D ]}|| j v r$| j | j|d| }|d||7 }q
t||p+dgd dS )zLog a group of timers.r"   z	time (ms)rA   z | {}: {:.2f}r   )ranksN)rF   r@   rM   r   )	r   names
normalizerr4   memory_breakdownrX   stringr,   r   r   r   r   log   s   
zSynchronizedWallClockTimer.logc                 C   sF   |dksJ i }|D ]}|| j v r | j |  d | }|||< q
|S )z"Get the mean of a group of timers.r"   r6   )rF   r   )r   rY   rZ   r4   meansr,   r   r   r   r   get_mean   s   
z#SynchronizedWallClockTimer.get_meanrW   TFNrW   T)r   r   r   rC   rD   r   rI   rK   staticmethodrV   r]   r_   r   r   r   r   r!   ,   s    J

	r!   c                   @   sF   e Zd ZG dd dZdd Zdd Zdd ZdddZdddZdS )	NoopTimerc                   @   s4   e Zd Zdd Zdd Zdd Zdd Zd	d
 ZdS )zNoopTimer.Timerc                 C      d S r   r   r   r   r   r   r2         zNoopTimer.Timer.startc                 C   rd   r   r   r   r   r   r   r4      re   zNoopTimer.Timer.resetc                 K   rd   r   r   r   kwargsr   r   r   r5      re   zNoopTimer.Timer.stopc                 K      dS Nr   r   rf   r   r   r   r@      re   zNoopTimer.Timer.elapsedc                 C   rh   ri   r   r   r   r   r   r      re   zNoopTimer.Timer.meanN)r   r   r   r2   r4   r5   r@   r   r   r   r   r   rD      s    rD   c                 C   s   |   | _d S r   )rD   timerr   r   r   r   r      s   zNoopTimer.__init__c                 C   rH   r   )rj   r+   r   r   r   rK      rJ   zNoopTimer.__call__c                 C   s   i S r   r   r   r   r   r   rI      re   zNoopTimer.get_timersrW   TFNc                 C   rd   r   r   )r   rY   rZ   r4   r[   rX   r   r   r   r]      re   zNoopTimer.logc                 C   rd   r   r   )r   rY   rZ   r4   r   r   r   r_      re   zNoopTimer.get_meanr`   ra   )	r   r   r   rD   r   rK   rI   r]   r_   r   r   r   r   rc      s    
rc   c                   @   sH   e Zd ZdddZdd Zdd	 Zd
d Zdd ZdddZdd Z	dS )ThroughputTimer   NFc                 C   s   ddl m} || _d| _d| _d| _|d u rdn|| _|| _d| _d| _	d| _
d| _d| _|| _|| _|| _| jd u r?|j| _d| _| jrKtsMtdd S d S )Nr   )loggerF   z2Unable to import 'psutils', please install package)deepspeed.utilsrm   configr)   r*   started
batch_size
start_stepepoch_countmicro_step_countglobal_step_counttotal_elapsed_timestep_elapsed_timesteps_per_outputmonitor_memorylogginginfoinitializedPSUTILS_INSTALLEDImportError)r   rp   rr   rs   ry   rz   
logging_fnrm   r   r   r   r      s*   

zThroughputTimer.__init__c                 C   s   |  j d7  _ d| _d S )Nrn   r   )rt   ru   r   r   r   r   update_epoch_count   s   
z"ThroughputTimer.update_epoch_countc                 C   s
   d| _ d S NT)r}   r   r   r   r   _init_timer   rG   zThroughputTimer._init_timerc                 C   sJ   | j jsd S |   d| _| j| jkr#| j jrt   t		 | _
d S d S r   )rp   enabledr   rq   rv   rs   synchronizedr   r   r/   r)   r   r   r   r   r2      s   
zThroughputTimer.startc                 C   s   | j d u rdS | j| j  dkS )NFr   )ry   rv   r   r   r   r   _is_report_boundary   s   
z#ThroughputTimer._is_report_boundaryTc                 C   s2  | j jr| js	d S d| _|  jd7  _|r|  jd7  _| jdkr| j jr*t   t		 | _
| j
| j }|  j|7  _|  j|7  _|r|r|  r| d| j| j| j|  | j| jt  tt  d dtt  d d | jrt }t }| d| j| j| j|j|j d| _d S d S d S )NFrn   r   zepoch={}/micro_step={}/global_step={}, RunningAvgSamplesPerSec={}, CurrSamplesPerSec={}, MemAllocated={}GB, MaxMemAllocated={}GBrL   rl   z;epoch={}/micro_step={}/global_step={}, vm %: {}, swap %: {})rp   r   rq   ru   rv   r)   r   r   r   r/   r*   rw   rx   r   r{   rM   rt   avg_samples_per_secrr   TIME_EPSILONroundrN   rO   rz   psutilvirtual_memoryswap_memorypercent)r   global_stepreport_speeddurationvirt_memswapr   r   r   r5      sP   



zThroughputTimer.stopc                 C   s2   | j dkr| j | j }| j| }| j| S tdS )Nr   z-inf)rv   rs   rw   rr   float)r   total_step_offsetavg_time_per_stepr   r   r   r     s
   


z#ThroughputTimer.avg_samples_per_sec)rl   NFN)FT)
r   r   r   r   r   r   r2   r   r5   r   r   r   r   r   rk      s    


)rk   c                 C   s`   d|  krdksJ  J t | }t | dkrdS |   tt|| }t| |||  S )zCompute the trimmed mean of a list of numbers.

    Args:
        data (list): List of numbers.
        trim_percent (float): Percentage of data to trim.

    Returns:
        float: Trimmed mean.
    r"   rW   r   )lensortintr   r   )datatrim_percentnkr   r   r   rB   (  s   
rB   )r/   numpyr   deepspeed.utils.loggingr   deepspeed.acceleratorr   FORWARD_MICRO_TIMERFORWARD_GLOBAL_TIMERBACKWARD_MICRO_TIMERBACKWARD_GLOBAL_TIMERBACKWARD_INNER_MICRO_TIMERBACKWARD_INNER_GLOBAL_TIMERBACKWARD_REDUCE_MICRO_TIMERBACKWARD_REDUCE_GLOBAL_TIMERSTEP_MICRO_TIMERSTEP_GLOBAL_TIMERr   r   r~   r   objectr   r!   rc   rk   rB   r   r   r   r   <module>   s6   x#a