o
    ॵi                     @   s   d dl Z d dlmZmZmZ d dlZd dlZd dlm	Z	 d dl
mZ d dlmZ e ZddgZ	ddejjfd	dZG d
d deZdS )    N)CallableIterableTuple)	Bernoulli)	Optimizer)
get_loggercalculate_fisherChildTuningAdamWmodelc                 C   sD  t  }|   |  D ]\}}d|v r|| ||< qt|}|D ]8}	|| |	}
|
  |  D ]$\}}d|v rV|d urItjj	j
|fi | ||  |jd | 7  < q2|   q#td d }| D ]\}}|d  }|d u r{|}qgt||}qgt|d| d }|D ]
}|| |k||< qtd| |S )Nlayer   zCalculate Fisher Information...   d   zPolar => {})dicttrainnamed_parameters	new_zerossizelenbackwardtorchnnutilsclip_grad_norm_grad	zero_gradloggerinfoitemsviewcpunumpynpappend
percentileprintformat)r
   data_loaderforward_step	reserve_p	grad_clipgradient_masknameparamsitersinputslossrkvpolar r6   n/home/ubuntu/.local/lib/python3.10/site-packages/modelscope/trainers/optimizer/child_tuning_adamw_optimizer.pyr      s:   


c                       sp   e Zd Z							ddeejjj d	ed
e	eef dedede
f fddZdd ZddefddZ  ZS )r	   MbP?g?g+?ư>        T      ?Nr.   lrbetasepsweight_decaycorrect_biasc	           
         s   |dk rt d|d|d   krdk s!n t d|d d|d   kr-dk s7n t d|d d|ksBt d|t|||||d}	t ||	 d | _|| _|| _d S )	Nr;   z,Invalid learning rate: {} - should be >= 0.0r   r<   z4Invalid beta parameter: {} - should be in [0.0, 1.0[r   z,Invalid epsilon value: {} - should be >= 0.0)r=   r>   r?   r@   rA   )
ValueErrorr'   r   super__init__r,   r*   mode)
selfr.   r=   r>   r?   r@   rA   r*   rE   defaults	__class__r6   r7   rD   K   s>   	
zChildTuningAdamW.__init__c                 C   s
   || _ d S N)r,   )rF   r,   r6   r6   r7   set_gradient_maskn   s   
z"ChildTuningAdamW.set_gradient_maskclosurec                 C   s  d}|dur	| }| j D ]}|d D ]}|jdu rq|jj}|jr%td| jdurQ| jdkr<|| jv r;|| j| 9 }nt|j|	 | j
d}|| | j
 9 }| j| }t|dkrpd|d< t|j|d< t|j|d	< |d |d	 }}	|d
 \}
}|d  d7  < ||
j|d|
 d |	|j||d| d |	 |d }|d }|d rd|
|d   }d||d   }|t| | }|jj||| d |jj|j|d  |d  d qq|S )z
        Performs a single optimization step.
        Arguments:
            closure (:obj:`Callable`, `optional`): A closure that reevaluates the model and returns the loss.
        Nr.   zJAdam does not support sparse gradients, please consider SparseAdam insteadzChildTuning-D)r   
fill_valuer   stepexp_avg
exp_avg_sqr>   r   r<   )alpha)valuer?   r=   rA   r@   )param_groupsr   data	is_sparseRuntimeErrorrE   r,   r   new_fullr   r*   samplestater   r   
zeros_likemul_add_addcmul_sqrtmathaddcdiv_)rF   rL   r1   grouppr   	grad_maskrY   rO   rP   beta1beta2denom	step_sizebias_correction1bias_correction2r6   r6   r7   rN   q   s^   






"
=zChildTuningAdamW.step)r8   r9   r:   r;   Tr<   NrJ   )__name__
__module____qualname__r   r   r   	parameter	Parameterfloatr   boolrD   rK   r   rN   __classcell__r6   r6   rH   r7   r	   I   s.    
#rJ   )r_   typingr   r   r   r"   r#   r   torch.distributions.bernoullir   torch.optimr   modelscope.utils.loggerr   r   __all__r   Moduler   r	   r6   r6   r6   r7   <module>   s   +