o
    Ti                     @   sP   d dl Z d dlmZ d dlmZ d dlmZ d dlmZ G dd de j	j
ZdS )    N)get_cpu_info)logger)should_log_le)CPULionBuilderc                       sH   e Zd ZdZd fdd	Zdd Z fd	d
Ze dddZ	  Z
S )DeepSpeedCPULionr   MbP?g?g+?Tc              	      s   t |||d}tt| || t }d|v r|d  nd| _d| jv rHt| jD ]\}}	t|	d D ]\}
}|j	t
jkrEtd  nq4q* tj| _tjd t_|| _t  | _| j| j||d |d |td	 d
S )a&  Fast vectorized implementation of Lion optimizer on CPU:

        See Symbolic Discovery of Optimization Algorithms (https://doi.org/10.48550/arXiv.2302.06675).

        .. note::
                We recommend using our `config
                <https://www.deepspeed.ai/docs/config-json/#optimizer-parameters>`_
                to allow :meth:`deepspeed.initialize` to build this optimizer
                for you.


        Arguments:
            model_params (iterable): iterable of parameters to optimize or dicts defining
                parameter groups.
            lr (float, optional): learning rate. (default: 1e-3)
            betas (Tuple[float, float], optional): coefficients used for computing
                running averages of gradient and its square. (default: (0.9, 0.999))
            weight_decay (float, optional): weight decay (L2 penalty) (default: 0)
            full_precision_optimizer_states: creates momentum and variance in full precision regardless of
                        the precision of the parameters (default: True)
        )lrbetasweight_decayvendor_id_rawunknownamdparamsz0FP16 params for CPULion may not work on AMD CPUs   r   infoN)dictsuperr   __init__r   lower
cpu_vendor	enumerateparam_groupsdtypetorchhalfr   warningoptimizer_idopt_idfp32_optimizer_statesr   loadds_opt_lioncreate_lionr   )selfmodel_paramsr	   r
   r   r   default_argscpu_infogroup_idgroupparam_idp	__class__ O/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/ops/lion/cpu_lion.pyr      s$   

(zDeepSpeedCPULion.__init__c                 C   s   | j | j d S N)r!   destroy_lionr   )r#   r-   r-   r.   __del__=   s   zDeepSpeedCPULion.__del__c                    s,   t t| | | jD ]}|dd qd S )NamsgradF)r   r   __setstate__r   
setdefault)r#   stater(   r+   r-   r.   r3   B   s   
zDeepSpeedCPULion.__setstate__Nc                 C   sF  d}|durt   | }W d   n1 sw   Y  t d}t| jD ]y\}}t|d D ]n\}}|jdu r;q1|j|ksIJ d|j d| j| }t|dkrwd|d< | jr^t j	n|j
}	t j|j|	|d|d	< t j|j|	|d|d
< |d  d7  < |d \}
}| j| j|d |d |
||d |j|jj|d	 	 q1q'|S )a  Update the model parameters.

        .. note::
            This method will be called internally by ZeRO-Offload. DeepSpeed
            users should still use ``engine.step()`` as shown in the
            `Getting Started
            <https://www.deepspeed.ai/getting-started/#training>`_ guide.

        Args:
            closure (callable, optional): closure to compute the loss.
                Defaults to ``None``.

        Returns:
            loss: if ``closure`` is provided. Otherwise ``None``.
        Ncpur   zCPULion param is on zY and must be 'cpu', make sure you enabled 'offload_optimizer': 'cpu' in your ZeRO config.r   step)r   deviceexp_avg
exp_avg_sqr   r
   r	   r   )r   enable_gradr8   r   r   gradr5   lenr   floatr   
zeros_likedatar!   lion_updater   )r#   closurelossr8   r'   r(   r)   r*   r5   state_dtypebeta1beta2r-   r-   r.   r7   G   s0   



zDeepSpeedCPULion.step)r   r   r   Tr/   )__name__
__module____qualname__r   r   r1   r3   r   no_gradr7   __classcell__r-   r-   r+   r.   r      s    -r   )r   cpuinfor   deepspeed.utilsr   deepspeed.utils.loggingr   deepspeed.ops.op_builderr   optim	Optimizerr   r-   r-   r-   r.   <module>   s   