o
    ߗi>                     @   sv   U d dl mZmZmZmZ d dlZd dlm  mZ	 d dlm
Z
 d dlmZ g Zee ed< ejjG dd dZdS )    )DictListOptionalTupleN)Tensor)2_scripted_functional_optimizer_deprecation_warning__all__c                   @   sh   e Zd Z							ddee dedeeef d	ed
edededefddZdee	e  fddZ
dS )_FunctionalAdamaxMbP?g?g+?:0yE>        Fparamslrbetasepsweight_decayforeachmaximize_allow_empty_param_listc	           	      C   s  t dd d|kstd| d|kstd| d|d   kr'dk s1n td|d  d|d	   kr=dk sGn td
|d	  d|ksRtd| |||d |d	 |d| _|| _|| _tjttj	tt
tj	f f i | _t|dkr|stdd|i| _d S )N   )
stacklevelr   zInvalid learning rate: zInvalid epsilon value: r   g      ?z#Invalid beta parameter at index 0:    z#Invalid beta parameter at index 1: zInvalid weight_decay value: )r   r   beta1beta2r   z%optimizer got an empty parameter listr   )r   
ValueErrordefaultsr   r   torchjitannotater   r   strstatelenparam_group)	selfr   r   r   r   r   r   r   r    r%   g/home/ubuntu/transcripts/venv/lib/python3.10/site-packages/torch/distributed/optim/functional_adamax.py__init__   s.   
$z_FunctionalAdamax.__init__	gradientsc                 C   s  | j d }g }g }g }g }g }t|t|kr*tddt| d dt|  d}t| j d |D ]]\}	}
|
d ur|t|	O }||	 ||
 |	| jvrwi | j|	< | j|	 }td|d< tj	|	tj
d	|d
< tj	|	tj
d	|d< | j|	 }||d
  ||d  ||d  q4t , tj|||||| jd | jd | jd | jd | jd | j| j|d W d    d S 1 sw   Y  d S )Nr   zEthe gradients passed in does not equal to the size of the parameters!zParams length: z. zGradients length: Fr   step)memory_formatexp_avgexp_infr   r   r   r   r   )r   r   r   r   r   r   r   has_complex)r#   r"   r   zipr   
is_complexappendr!   tensor
zeros_likepreserve_formatno_gradFadamaxr   r   r   )r$   r(   r   params_with_gradgradsexp_avgsexp_infsstate_stepsr-   paramgradientr!   r%   r%   r&   r)   C   sh   









"z_FunctionalAdamax.stepN)r
   r   r   r   FFF)__name__
__module____qualname__r   r   floatr   boolr'   r   r)   r%   r%   r%   r&   r	      s4    
	
)r	   )typingr   r   r   r   r   torch.optim._functionaloptim_functionalr5   r   ,torch.distributed.optim._deprecation_warningr   r   r    __annotations__r   scriptr	   r%   r%   r%   r&   <module>   s   