o
    viB                     @  sL   d dl mZ d dlmZmZ d dlZd dlmZ dd ZG dd deZ	dS )	    )annotations)TupleCallableN)	Optimizerc                 C  s   | d uS N )valr   r   N/home/ubuntu/.local/lib/python3.10/site-packages/lion_pytorch/cautious_lion.pyexists	   s   r
   c                      sB   e Zd Z						dd fddZe 	ddddZ  ZS )Lion-C6?g?gGz?        FlrfloatbetasTuple[float, float]weight_decaycautious_factorcautious_wdbooldecoupled_weight_decayc           	        sn   |dksJ t dd |D sJ d|  krdksJ  J || _|| _t|||||d}t || d S )Nr   c                 S  s$   g | ]}d |  kodkn  qS )r         ?r   ).0betar   r   r	   
<listcomp>   s   $ z!Lion.__init__.<locals>.<listcomp>r   )r   r   r   r   r   )all_init_lrdecoupled_wddictsuper__init__)	selfparamsr   r   r   r   r   r   defaults	__class__r   r	   r!      s   
zLion.__init__NclosureCallable | Nonec                 C  s  d }t |rt  | }W d    n1 sw   Y  | jD ]}tdd |d D ]}|j|d |d |d |d g|d | j| | j| jR \
}}}}}	}
}}}}|r]|| }t	|d	krjt
||d
< |d
 }| |
j|d|
 d }|dk r|| d	k}t|t||}|| jdd }|| |dkr|	r|| d	k nd}|jd|| |   |j|| d ||j|d| d q+q |S )Nc                 S  s
   t | jS r   )r
   grad)pr   r   r	   <lambda>6   s   
 zLion.step.<locals>.<lambda>r#   r   r   r   r   r   r   exp_avgr   )alphagh㈵>)minr   )r
   torchenable_gradparam_groupsfilterr)   stater   r   len
zeros_likeclonemul_addsign_where	ones_likemeanclampr   dataadd_)r"   r'   lossgroupr*   r)   r   wdr   r   beta1beta2r3   r   init_lrr,   update
align_maskscalewd_maskr   r   r	   step*   s4   

T 
+z	Lion.step)r   r   r   r   FF)r   r   r   r   r   r   r   r   r   r   r   r   r   )r'   r(   )__name__
__module____qualname__r!   r/   no_gradrJ   __classcell__r   r   r%   r	   r      s    r   )

__future__r   typingr   r   r/   torch.optim.optimizerr   r
   r   r   r   r   r	   <module>   s    