o
    #iU                     @   sj   U d dl mZmZmZmZ d dlZd dlm  mZ	 d dlm
Z
 g Zee ed< ejjG dd dZdS )    )DictListOptionalTupleN)Tensor__all__c                   @   s   e Zd Z									ddee dedeeef d	ed
edededededefddZdede	e fddZ
dee	e  fddZdS )_FunctionalAdamMbP?g?g+?:0yE>        Fparamslrbetasepsweight_decayamsgradmaximizeforeachfused_allow_empty_param_listc                 C   s  d|kst d| d|kst d| d|d   kr"dk s,n t d|d  d|d   kr8dk sBn t d|d  d|ksMt d	| |||d |d |d
| _|| _|| _|| _|	| _tjt	tj
t	ttj
f f i | _t|dkr|
st dd|i| _d S )Nr   zInvalid learning rate: zInvalid epsilon value: r   g      ?z#Invalid beta parameter at index 0:    z#Invalid beta parameter at index 1: zInvalid weight_decay value: )r   r   beta1beta2r   z%optimizer got an empty parameter listr   )
ValueErrordefaultsr   r   r   r   torchjitannotater   r   strstatelenparam_group)selfr   r   r   r   r   r   r   r   r   r    r$   e/home/ubuntu/SoloSpeech/.venv/lib/python3.10/site-packages/torch/distributed/optim/functional_adam.py__init__   s0   $z_FunctionalAdam.__init__paramgradc                 C   sv  g }g }g }g }g }g }t |}	|dur|| || || jvrVi | j|< | j| }
t d|
d< t j|t jd|
d< t j|t jd|
d< | jrVt j|t jd|
d< | j| }
||
d  ||
d  | jrs||
d  ||
d  t  3 t	j
||||||| j|	| j| jd | jd	 | jd
 | jd | jd | j| jddd W d   dS 1 sw   Y  dS )zo
        Similar to step, but operates on a single parameter and optionally a
        gradient tensor.
        Nr   stepmemory_formatexp_avg
exp_avg_sqmax_exp_avg_sqr   r   r   r   r   r   has_complexr   r   r   r   r   r   r   r   
grad_scale	found_inf)r   
is_complexappendr    tensor
zeros_likepreserve_formatr   no_gradFadamr   r   r   r   )r#   r'   r(   params_with_gradgradsexp_avgsexp_avg_sqsmax_exp_avg_sqsstate_stepsr0   r    r$   r$   r%   
step_paramB   sh   










"z_FunctionalAdam.step_param	gradientsc                 C   s  | j d }g }g }g }g }g }g }d}	t|t|kr.tddt| d dt|  t| j d |D ]t\}
}|d ur|	t|
O }	||
 || |
| jvri | j|
< | j|
 }td|d< tj	|
tj
d	|d
< tj	|
tj
d	|d< | jrtj	|
tj
d	|d< | j|
 }||d
  ||d  | jr||d  ||d  q6t 3 tj||||||| j|	| j| jd | jd | jd | jd | jd | j| jd d d W d    d S 1 sw   Y  d S )Nr   FzEthe gradients passed in does not equal to the size of the parameters!zParams length: z. zGradients length: r   r)   r*   r,   r-   r.   r   r   r   r   r   r/   )r"   r!   r   zipr   r3   r4   r    r5   r6   r7   r   r8   r9   r:   r   r   r   r   )r#   rB   r   r;   r<   r=   r>   r?   r@   r0   r'   gradientr    r$   r$   r%   r)   ~   s   










"z_FunctionalAdam.stepN)	r	   r
   r   r   FFFFF)__name__
__module____qualname__r   r   floatr   boolr&   r   rA   r)   r$   r$   r$   r%   r      sB    
	

,<r   )typingr   r   r   r   r   torch.optim._functionaloptim_functionalr9   r   r   r   __annotations__r   scriptr   r$   r$   r$   r%   <module>   s   