o
    پi                  $   @   s   d Z ddlZddlmZmZ ddlZddlmZ ddlm	Z	 dej
dej
d	ej
d
ej
dededededej
dedededededededeeef def$ddZG dd deZdS )a   PyTorch MARS Optimizer

Code simplified from https://github.com/AGI-Arena/MARS

Paper: MARS: Unleashing the Power of Variance Reduction for Training Large Models - https://arxiv.org/abs/2411.10438

@article{yuan2024mars,
  title={MARS: Unleashing the Power of Variance Reduction for Training Large Models},
  author={Yuan, Huizhuo and Liu, Yifeng and Wu, Shuang and Zhou, Xun and Gu, Quanquan},
  journal={arXiv preprint arXiv:2411.10438},
  year={2024}
}
    N)OptionalTuple)	Optimizer   )ParamsTpgradexp_avg
exp_avg_sqlrweight_decaybeta1beta2	last_gradepsstepgamma	mars_type
is_grad_2doptimize_1dlr_1d_factorbetas_1dcautionc                 C   s  |s|rd| }|
dkr|}n||  |||  |}t|}|dkr*|| }| |j||d |rO|| dk|j}|| jdd || }|dkr| |j	||d| d d||
  }d||
  }|
 t
| |	}| | || | }n|d	kr| | |  }nJ | j|| d ||fS |\}}| |j|d| d | |j	||d| d d||
  }d||
  }|
 t
| |	}|r|| dk|j}|| jdd || }| | || | }| j|||  d ||fS )
N      ?r   )alphar   gMbP?)minadamw)valuelion)mul_add_torchnormtodtypediv_meanclamp_addcmul_sqrtmathsign)r   r   r	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   one_minus_beta1c_tc_t_normmaskbias_correction1bias_correction2denomupdatebeta1_1dbeta2_1d r6   C/home/ubuntu/.local/lib/python3.10/site-packages/timm/optim/mars.py_mars_single_tensor_step   sJ   
r8   c                       s   e Zd ZdZ										
	ddededeeef dedededededede	eeef  def fddZ
 fddZe dddZ  ZS )Marsz MARS Optimizer

    Paper: MARS: Unleashing the Power of Variance Reduction for Training Large Models
        https://arxiv.org/abs/2411.10438

    ~jth?g?gGz?:0yE>        皙?r   Fr   Nparamsr   betasr   r   r   r   r   r   r   r   c                    s   d|kst d|d|kst d|d|d   kr"dk s,n t d|d d|d   kr8dk sBn t d|d |d	v sJJ d
t||||||||	|
pV||d
}tt| || d S )Nr=   zInvalid learning rate: {}zInvalid epsilon value: {}r   r   z%Invalid beta parameter at index 0: {}r   z%Invalid beta parameter at index 1: {})r   r   zMARS type not supported)
r   r@   r   r   r   r   r   r   r   r   )
ValueErrorformatdictsuperr9   __init__)selfr?   r   r@   r   r   r   r   r   r   r   r   defaults	__class__r6   r7   rE   b   s,   zMars.__init__c                    s,   t t| | | jD ]}|dd qd S )Nr   F)rD   r9   __setstate__param_groups
setdefault)rF   stategrouprH   r6   r7   rJ      s   
zMars.__setstate__c                 C   sf  d}|durt   | }W d   n1 sw   Y  | jD ]}|d D ]}|jdu r.q&|j}|jr8td| j| }t|dkr\d|d< t ||d< t ||d< t ||d	< |d  d7  < |d }|d }|d	 }	|d }
|d
 }|d }|d \}}|j	dk}t
||||	|||||
|d ||d |d ||d |d |d |d d ||d< q&q |S )zPerforms a single optimization step.

        Arguments:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
        Nr?   zJAdam does not support sparse gradients, please consider SparseAdam insteadr   r   r   r	   r   r
   r   r   r@      r   r   r   r   r   r   r   )r   r   r   r   r   r   )r!   enable_gradrK   r   	is_sparseRuntimeErrorrM   len
zeros_likendimr8   )rF   closurelossrN   r   r   rM   r   r	   r
   r   r   wdr   r   r   r6   r6   r7   r      sd   





4z	Mars.step)
r:   r;   r<   r=   r>   r   Fr   NF)N)__name__
__module____qualname____doc__r   floatr   strboolr   rE   rJ   r!   no_gradr   __classcell__r6   r6   rH   r7   r9   [   sL    	
	
&r9   )r\   r*   typingr   r   r!   torch.optim.optimizerr   _typesr   Tensorr]   intr^   r_   r8   r9   r6   r6   r6   r7   <module>   sX    	


B