o
    Ni4                     @  s   d dl mZ d dlmZ d dlmZ d dlmZ d dlZd dlm	Z	m
Z
 d dlmZ dd	 Zd
d ZdddZdd ZddddddZddddddZG dd deZdS )    )annotations)Callable)deepcopy)partialN)nnTensor)Modulec                 C  s   | d uS N )valr
   r
   K/home/ubuntu/.local/lib/python3.10/site-packages/ema_pytorch/ema_pytorch.pyexists   s   r   c                 C  s   | | dkS Nr   r
   )numdenr
   r
   r   divisible_by      r   mr   c                 C  s   t |  jS r	   )next
parametersdevice)r   r
   r
   r   get_module_device   s   r   c                 C  s   | j |kr| S | |S r	   )dtypeto)tr   r
   r
   r   maybe_coerce_dtype   s   

r   Fauto_move_devicecoerce_dtypetgtr   srcc                C  s.   |r| | j}|rt|| j}| | d S r	   )r   r   r   r   copy_)r   r    r   r   r
   r
   r   inplace_copy   s
   r"   c                C  s0   |r| | j}|rt|| j}| || d S r	   )r   r   r   r   lerp_)r   r    weightr   r   r
   r
   r   inplace_lerp#   s
   r%   c                      s   e Zd ZdZddddddde e e d	d
d
dddd
d
d
fd7 fddZ	d8d9ddZdd Zedd Z	dd  Z
e d!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Zd8d-d.Zd/d0 Zd1d2 Ze d8d3d4Zd5d6 Z  ZS ):EMAag  
    Implements exponential moving average shadowing for your model.

    Utilizes an inverse decay schedule to manage longer term training runs.
    By adjusting the power, you can control how fast EMA will ramp up to your specified beta.

    @crowsonkb's notes on EMA Warmup:

    If gamma=1 and power=1, implements a simple average. gamma=1, power=2/3 are
    good values for models you plan to train for a million or more steps (reaches decay
    factor 0.999 at 31.6K steps, 0.9999 at 1M steps), gamma=1, power=3/4 for models
    you plan to train for less (reaches decay factor 0.999 at 10K steps, 0.9999 at
    215.4k steps).

    Args:
        inv_gamma (float): Inverse multiplicative factor of EMA warmup. Default: 1.
        power (float): Exponential factor of EMA warmup. Default: 2/3.
        min_value (float): The minimum EMA decay rate. Default: 0.
    NgH.?d   
         ?gUUUUUU?        TFr
   modelr   	ema_model$Module | Callable[[], Module] | Noneparam_or_buffer_names_no_emaset[str]ignore_namesignore_startswith_namesforward_method_namestuple[str, ...]c                   sB  t    || _|dk| _|| _|r|| _n|g| _t|ts&t|r&| }d | _	|| _
|s4| | nt|r:J tt||d| _tt||d| _|| _|| _|| _|| _|| _t|	ttfsbJ |	| _|
| _|| _|| _|| _|| _|| _|| _|rtt drtt dsJ d|| _!| "dt #d | "dt #d	 d S )
Nr)   r   _foreach_lerp__foreach_copy_zFyour version of torch does not have the prerequisite foreach functionsinittedFstepr   )$super__init__beta	is_frozeninclude_online_modelonline_model
isinstancer   callabler,   r2   init_emar   r   r"   r%   update_everyupdate_after_step	inv_gammapower	min_valuesetlistr.   r0   r1   update_model_with_ema_everyupdate_model_with_ema_betaallow_different_devicesr   move_ema_to_online_devicehasattrtorchuse_foreachregister_buffertensor)selfr+   r,   r:   rB   rA   rC   rD   rE   r.   r0   r1   r<   rJ   rN   rH   rI   r2   rK   r   lazy_init_ema	__class__r
   r   r9   A   sF   

zEMA.__init__Module | Nonec              
   C  s   || _ t| j s1zt| j| _ W n  ty0 } ztd|  td t  W Y d }~nd }~ww | j  D ]}|  q6| j	D ]}t
| j |}t| || q@dd | j  D | _dd | j  D | _d S )Nz'Error: While trying to deepcopy model: zNYour model was not copyable. Please make sure you are not using any LazyLinearc                 S  (   h | ]\}}t |st |r|qS r
   rM   is_floating_point
is_complex).0nameparamr
   r
   r   	<setcomp>      ( zEMA.init_ema.<locals>.<setcomp>c                 S  rV   r
   rW   )rZ   r[   bufferr
   r
   r   r]      r^   )r,   r   r   r+   	Exceptionprintexitr   detach_r2   getattrsetattrnamed_parametersparameter_namesnamed_buffersbuffer_names)rQ   r,   epforward_method_namefnr
   r
   r   r@      s"   


zEMA.init_emac                   s$   t |dsJ  fdd}||S )Nregister_step_post_hookc                    s       d S r	   )update)_rQ   r
   r   hook   r   z1EMA.add_to_optimizer_post_step_hook.<locals>.hook)rL   rn   )rQ   	optimizerrr   r
   rq   r   add_to_optimizer_post_step_hook   s   
z#EMA.add_to_optimizer_post_step_hookc                 C  s   | j r| jS | jd S r   )r<   r=   rq   r
   r
   r   r+      s   z	EMA.modelc                 C  s
   | j  S r	   )r,   evalrq   r
   r
   r   ru      s   
zEMA.evalc                 O  s(   | j j}| j |i |}| j | |S r	   )r,   trainingtrain)rQ   argskwargsrv   outr
   r
   r   forward_eval   s   zEMA.forward_evalc                 C  s   | j j}| j| d S r	   )r6   r   r,   r   )rQ   r   r
   r
   r   restore_ema_model_device   s   zEMA.restore_ema_model_devicec                 c  .    |  D ]\}}|| jvrq||fV  qd S r	   )rf   rg   )rQ   r+   r[   r\   r
   r
   r   get_params_iter      
zEMA.get_params_iterc                 c  r}   r	   )rh   ri   )rQ   r+   r[   r_   r
   r
   r   get_buffers_iter   r   zEMA.get_buffers_iterc                 C  s~   | j }t| | j| | jD ]\\}}\}}||j|j qt| | j| | jD ]\\}}\}}||j|j q-d S r	   r"   zipr~   r,   r+   datar   rQ   copyrp   	ma_paramscurrent_params
ma_bufferscurrent_buffersr
   r
   r   copy_params_from_model_to_ema      **z!EMA.copy_params_from_model_to_emac                 C  s~   | j }t| | j| | jD ]\\}}\}}||j|j qt| | j| | jD ]\\}}\}}||j|j q-d S r	   r   r   r
   r
   r   copy_params_from_ema_to_model   r   z!EMA.copy_params_from_ema_to_modelc                 C  s4   t |s| j}|dkr|  S | | j| j| d S )Nr*   )r   rI   r   update_moving_averager+   r,   )rQ   decayr
   r
   r   update_model_with_ema   s
   zEMA.update_model_with_emac                 C  sX   | j | j d jdd}dd|| j  | j   }| dkr!dS |j| j| jd S )N   r*   )minr   )r   max)r7   rB   clamprC   rD   itemrE   r:   )rQ   epochvaluer
   r
   r   get_current_decay  s
   zEMA.get_current_decayc                 C  s   | j  }|  j d7  _ | j s*t| js|   |   | jjt	
d d S t|| j}|r=|| jkr=|   d S |rG| | j| j t| jrXt|| jrZ|   d S d S d S )Nr   T)r7   r   r6   r   r,   r@   r   r   r!   rM   rP   r   rA   rB   r   r+   rH   r   )rQ   r7   should_updater
   r
   r   ro     s"   


z
EMA.updatec                   s"  | j rd S | jrt|t|kr|t| t|s|  }g }g }t| || |D ]3\\ }\}} | jv r<q.t	 fdd| j
D rIq. | jv rX||j|jf q.||j|jf q.t| || |D ]3\\ }	\}}
 | jv r{qmt	 fdd| j
D rqm | jv r||
j|	jf qm||
j|	jf qm| js|D ]
\}}| || q|D ]\}}| ||d|  qd S | jrdd |D }dd |D }| jrdd |D }dd |D }t|d	krt| \}}t|| t|d	krt| \}}t||d|  d S d S )
Nc                      g | ]}  |qS r
   
startswithrZ   prefixr[   r
   r   
<listcomp>?      z-EMA.update_moving_average.<locals>.<listcomp>c                   r   r
   r   r   r   r
   r   r   N  r   r)   c                 S      g | ]\}}|| |jfqS r
   r   r   rZ   r   r    r
   r
   r   r   e       c                 S  r   r
   r   r   r
   r
   r   r   f  r   c                 S      g | ]\}}|t ||jfqS r
   r   r   r   r
   r
   r   r   i  r   c                 S  r   r
   r   r   r
   r
   r   r   j  r   r   )r;   rK   r   r   r   r   r   r~   r0   anyr1   r.   appendr   r   rN   r"   r%   rJ   r   lenrM   r5   r4   )rQ   ma_modelcurrent_modelcurrent_decaytensors_to_copytensors_to_lerpr   rp   r   current_buffer	ma_bufferr   r    tgt_copysrc_copytgt_lerpsrc_lerpr
   r   r   r   %  sZ   &

&

zEMA.update_moving_averagec                 O  s   | j |i |S r	   )r,   )rQ   rx   ry   r
   r
   r   __call__t  s   zEMA.__call__)r+   r   r,   r-   r.   r/   r0   r/   r1   r/   r2   r3   r	   )r,   rU   )__name__
__module____qualname____doc__rF   r9   r@   rt   propertyr+   ru   rM   no_gradr{   r|   r~   r   r   r   r   r   ro   r   r   __classcell__r
   r
   rS   r   r&   ,   sR    g

	
			Nr&   )r   r   )r   r   r    r   )
__future__r   typingr   r   r   	functoolsr   rM   r   r   torch.nnr   r   r   r   r   r"   r%   r&   r
   r
   r
   r   <module>   s    
		