o
    vi.                     @   s   d dl Z zd dlZd dlmZ W n ey) Z zed e  W Y dZ[ndZ[ww ej	ej
ddiddej
ddiddgd	gd
dgdejdejfddZde jde jde jdedededefddZdS )    NzNtriton is not installed, please install by running `pip install triton>=2.2.0`
BLOCK_SIZE      )	num_warpsi      
n_elementsp_ptrexp_avg_ptr)configskeyrestore_valuec	                 C   s   t jdd}	|	| }
|
t d| }||k }| | }|| }|| }t j||d}t j||d}t j||d}|d||   }|| }|| | }|dk}t |dk| |}|||  }|| | }t j|||d t j|||d d S )Nr   )axis)mask   )tl
program_idarangeloadwherestore)r   grad_ptrr	   lrwdbeta1beta2r   r   pidblock_startoffsetsr   offset_p_ptroffset_grad_ptroffset_exp_avg_ptrpgradexp_avgdiffupdate
can_updateupdate_sign r(   G/home/ubuntu/.local/lib/python3.10/site-packages/lion_pytorch/triton.pyupdate_fn_kernel   s&   r*   r!   r"   r#   r   r   r   r   c              	      sN   t dd | ||fD sJ |    fdd}t| | ||||||  d S )Nc                 S   s   g | ]}|j qS r(   )is_cuda).0tr(   r(   r)   
<listcomp>T       zupdate_fn.<locals>.<listcomp>c                    s   t  | d fS )Nr   )tritoncdiv)metar   r(   r)   <lambda>W   r/   zupdate_fn.<locals>.<lambda>)allnumelr*   )r!   r"   r#   r   r   r   r   gridr(   r3   r)   	update_fnK   s   	r8   )torchr0   triton.languagelanguager   ImportErroreprintexitautotuneConfigjit	constexprr*   Tensorfloatr8   r(   r(   r(   r)   <module>   sD    
	: