o
    vi	                     @  sT   d dl mZ d dlmZmZ d dlZd dlmZ dd Zdd Z	G d	d
 d
eZ
dS )    )annotations)TupleCallableN)	Optimizerc                 C  s   | d uS N )valr   r   M/home/ubuntu/.local/lib/python3.10/site-packages/lion_pytorch/lion_pytorch.pyexists	   s   r
   c                 C  s`   | j d||   | |j|d| d }| j|| d ||j|d| d d S )N      ?)alpha)datamul_cloneaddsign_add_)pgradexp_avglrwdbeta1beta2updater   r   r	   	update_fn   s    r   c                      s@   e Zd Z					dd fddZe 	ddddZ  ZS )Lion-C6?g?gGz?        Fr   floatbetasTuple[float, float]weight_decay
use_tritonbooldecoupled_weight_decayc           	        sn   |dksJ t dd |D sJ || _|| _t|||d}t || t| _|r5ddlm} || _d S d S )Nr   c                 S  s$   g | ]}d |  kodkn  qS )r   r   r   ).0betar   r   r	   
<listcomp>)   s   $ z!Lion.__init__.<locals>.<listcomp>)r   r!   r#   r   )r   )all_init_lrdecoupled_wddictsuper__init__r   lion_pytorch.triton)	selfparamsr   r!   r#   r$   r&   defaultstriton_update_fn	__class__r   r	   r/      s   	
zLion.__init__NclosureCallable | Nonec                 C  s   d }t |rt  | }W d    n1 sw   Y  | jD ]R}tdd |d D ]F}|j|d |d g|d | j| | j| jR \}}}}}	}
}}|rU|| }t	|
dkrbt
||
d< |
d }| |||||||	 q+q |S )	Nc                 S  s
   t | jS r   )r
   r   )r   r   r   r	   <lambda>H   s   
 zLion.step.<locals>.<lambda>r2   r   r#   r!   r   r   )r
   torchenable_gradparam_groupsfilterr   stater,   r+   len
zeros_liker   )r1   r7   lossgroupr   r   r   r   r   r   r>   r,   init_lrr   r   r   r	   step<   s0   

Dz	Lion.step)r   r   r   FF)
r   r    r!   r"   r#   r    r$   r%   r&   r%   r   )r7   r8   )__name__
__module____qualname__r/   r:   no_gradrD   __classcell__r   r   r5   r	   r      s    r   )
__future__r   typingr   r   r:   torch.optim.optimizerr   r
   r   r   r   r   r   r	   <module>   s    