o
    TÃi¶   ã                   @   s<   d dl mZmZ d dlmZmZ d dlZG dd„ deƒZdS )é    )ÚABCÚabstractmethod)ÚListÚTupleNc                       sÔ   e Zd ZdZd*‡ fdd„	Zdd„ Zdd„ Zed	d
„ ƒZedd„ ƒZ	dd„ Z
dd„ Zd*dd„Zdeeejejf  fdd„Zdd„ Zdd„ Zdd„ Zdd„ Zdd„ Zd d!„ Zd"d#„ Zd*d$d%„Zd&d'„ Zd(d)„ Z‡  ZS )+ÚHybridEngineContainera¢  
    This container identifies which methods need to be overridden in addition to
    the base container to enable use in the RLHF pipeline. These methods are not
    necessary for inference alone.

    NOTE: If you are using this feature with a container that
    also inherits from `MetaTensorContainer`, ensure that `MetaTensorContainer`
    is inherited before `HybridEngineContainer` in the class definition.
    Fc                    s   t ƒ j|d |  ¡  dS )ah  
        Same purposes as the base container, but also grabs the hooks for any LoRA
        parameters. If it's necessary to override specific sub-components of the model,
        it's best to augment the specific `set_[component]` itself rather than modifying
        the `initialize_tensors` method. See the `HybridSplitQKVContainer` for an example.
        )Úenable_trainingN)ÚsuperÚinitialize_tensorsÚset_lora_params)Úselfr   ©Ú	__class__© úm/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/module_inject/containers/features/hybrid_engine.pyr	      s   z(HybridEngineContainer.initialize_tensorsc                 C   ó   dS )a  
        If the views on certain parameters are largely incompatible, it may be necessary to do
        more substantial transformations to the parameters. This method should be overridden to
        transform the inference format to what is necessary for training.
        Nr   ©r   r   r   r   Útransform_for_training!   ó   z,HybridEngineContainer.transform_for_trainingc                 C   r   )a  
        If the views on certain parameters are largely incompatible, it may be necessary to do
        more substantial transformations to the parameters. This method should be overridden to
        transform the training format to what is necessary for inference.
        Nr   r   r   r   r   Útransform_for_inference)   r   z-HybridEngineContainer.transform_for_inferencec                 C   ó   t dƒ‚)a
  
        If available, set the LoRA parameters for the module.  An implementation
        for this would iterate over all parameters of the model and use the `maybe_get_lora` helper
        method to check if the parameter does in fact have any LoRA params.
        zIA set_lora_params() function must be defined for the relevant parameters.©ÚNotImplementedErrorr   r   r   r   r
   1   s   z%HybridEngineContainer.set_lora_paramsc                 C   r   )z=Get the pair of lora params and its matched model parameters.zDget_lora_matched_pair() must be defined for the relevant parameters.r   r   r   r   r   Úget_lora_matched_pair:   s   z+HybridEngineContainer.get_lora_matched_pairc              	   C   sN   |   ¡ D ] \}}t|ƒdkr$|\}}}| j|t | ¡ | ¡ ¡ 7  _qdS )z0Fuse the LoRA parameters for the inference mode.é   N©r   ÚlenÚdataÚtorchÚmatmulÚt©r   Úmaybe_lora_paramÚparamÚlora_right_weightÚlora_left_weightÚlora_scalingr   r   r   Ú	fuse_lora?   ó   þ"€ûzHybridEngineContainer.fuse_lorac              	   C   sN   |   ¡ D ] \}}t|ƒdkr$|\}}}| j|t | ¡ | ¡ ¡ 8  _qdS )z1Unfuse the LoRA parameters for the training mode.r   Nr   r    r   r   r   Úunfuse_loraH   r'   z!HybridEngineContainer.unfuse_lorac                 C   s<   | j ||d | j||d | j||d | j||d dS )a?  
        Add support for reversed dim in tensor parallelism. If necessary, override
        the called methods to handle partitioned weights (i.e. if qkv is split, override
        the `attention_qkv_mp` method). If the model component is not split, it should
        be safe to use the default implementation.
        )Úreversed_dimN)Úattention_qkv_mpÚattention_o_mpÚmlp_inter_mpÚmlp_output_mp)r   Ú
mp_replacer)   r   r   r   Úapply_tensor_parallelismQ   s   z.HybridEngineContainer.apply_tensor_parallelismÚparam_pairsc                 C   s    |D ]\}}|dur~|}qdS )zá
        Helper for `release_[component]` methods. Accepts a list of tuples where the first
        element is the module param that needs to be deleted, and the second is the reassignment
        from the container.
        Nr   )r   r0   Úmodule_paramÚcontainer_paramr   r   r   Ú_release_paramsd   s
   ýz%HybridEngineContainer._release_paramsc                 C   sr   | j jj| jf| j jj| jf| j jj| jf| j jj| jf| j j	| j
f| j j| jfg}|  |¡ |  ¡  |  ¡  dS )a  
        Delete module parameters if they exist and point them back to the container. The primary
        purpose of this is for TP-inference with ZeRO-3. In this scenario, we need to delete the
        parameters we've created for inference to free their memory.
        N)ÚmoduleÚ	attentionÚattn_owÚdense_wÚattn_obÚdense_bÚmlpÚattn_nwÚattn_nbÚnorm_wÚinput_nwÚnorm_bÚinput_nbr3   Úrelease_qkvÚrelease_mlp)r   Úgeneral_paramsr   r   r   Úrelease_memoryo   s   ú
	z$HybridEngineContainer.release_memoryc                 C   s.   | j jj| jf| j jj| jfg}|  |¡ dS )zF
        Release for QKV parameters (as well as any aliases).
        N)r4   r5   Ú	attn_qkvwÚqkvwÚ	attn_qkvbÚqkvbr3   )r   Ú
qkv_paramsr   r   r   rA   ƒ   s   þz!HybridEngineContainer.release_qkvc                 C   sJ   | j jj| jf| j jj| jf| j jj| jf| j jj| j	fg}|  
|¡ dS )zF
        Release for MLP parameters (as well as any aliases).
        N)r4   r:   Úinter_wÚ_h4h_wÚinter_bÚ_h4h_bÚoutput_wÚ_4hh_wÚoutput_bÚ_4hh_br3   )r   Ú
mlp_paramsr   r   r   rB   Ž   s   üz!HybridEngineContainer.release_mlpc                 C   s   |   ¡  |  ¡  dS )a  
        The purpose of reset params is to get the weights from the FP16 training
        copy of the model and copy to them to contiguous inference view. This only needs
        to be performed when the container parameters cannot be used directly for inference.
        N)Ú	reset_qkvÚ	reset_mlpr   r   r   r   Úreset_params›   s   z"HybridEngineContainer.reset_paramsc                 C   r   )z^
        Perform any necessary resets of the model parameters for the QKV components.
        Nr   r   r   r   r   rS   ¤   ó   zHybridEngineContainer.reset_qkvc                 C   r   )z^
        Perform any necessary resets of the model parameters for the MLP components.
        Nr   r   r   r   r   rT   ª   rV   zHybridEngineContainer.reset_mlpc                 C   s   t | dƒs	|  ¡  | jS )zV
        Return a list of all parameters that would have LoRA for the module.
        Úlora_params)Úhasattrr
   rW   r   r   r   r   Úget_lora_params°   s   
z%HybridEngineContainer.get_lora_paramsc                 C   sH   | j | jj_ | j| jj_| j| j_| j| j_| j|d | j	|d dS )z¶
        Rather than copying into, set the parameters directly. This is necessary to provide
        an inexpensive (low-memory-overhead) view onto the FP16 forward weights.
        )Ú
Z3_enabledN)
r;   r4   r:   r<   r>   r=   r@   r?   Úset_attn_params_wo_copyÚset_mlp_params_wo_copy)r   rZ   r   r   r   Úset_params_wo_copy¸   s   

z(HybridEngineContainer.set_params_wo_copyc                 K   ó4   | j | jj_| j| jj_| j| jj_| j| jj_	dS ©zC
        Narrower sub-method for finer grained overriding.
        N)
r7   r4   r5   r6   r9   r8   rF   rE   rH   rG   ©r   Úkwargsr   r   r   r[   Ä   ó   z-HybridEngineContainer.set_attn_params_wo_copyc                 K   r^   r_   )
rK   r4   r:   rJ   rM   rL   rO   rN   rQ   rP   r`   r   r   r   r\   Í   rb   z,HybridEngineContainer.set_mlp_params_wo_copy)F)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r	   r   r   r   r
   r   r&   r(   r/   r   r   r   ÚTensorr3   rD   rA   rB   rU   rS   rT   rY   r]   r[   r\   Ú__classcell__r   r   r   r   r      s.    



	
		
	r   )Úabcr   r   Útypingr   r   r   r   r   r   r   r   Ú<module>   s   