o
    TÃiö  ã                   @   sX   d dl mZmZ ddlZzddlZW n ey   dZY nw dd„ ZG dd„ deƒZdS )é   )ÚLoRAOptimizedLinearÚOptimizedLinearé    Nc                 C   s.   |   d¡ |  ¡ D ]}t|tƒr| ¡  q	d S ©NF)Úrequires_grad_ÚmodulesÚ
isinstancer   Ú	init_lora)ÚmodelÚm© r   úT/home/ubuntu/.local/lib/python3.10/site-packages/deepspeed/linear/context_manager.pyr	      s   

€þr	   c                   @   s*   e Zd ZdZd	dd„Zdd„ Zdd„ ZdS )
ÚInitad  
    Init context wrapper similar in style to zero.Init. Allows for injecting OptimizedLinear during model
    construction which will shard base weights and reduce overall memory usage during model init. Primarily
    useful when initializing a model via transformers.AutoModelForCausalLM.

    Example usage:
        lora_config = deepspeed.linear.LoRAConfig(..)
        quant_config = deepspeed.linear.QuantizationConfig(..)
        with deepspeed.linear.Init(lora_config=lora_config, quant_config=quant_config):
            model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3.1-405B")

    Nc                 C   sB   t jj| _d | _td krtjj| _tjj| _	|| _
|| _d| _d S r   )ÚtorchÚnnÚLinearÚ_orig_nn_linearÚ_orig_causallm_pretrainedÚtransformersÚAutoModelForCausalLMÚfrom_pretrainedÚfrom_configÚ_orig_causallm_configÚlora_configÚquant_configÚ_post_init_complete)Úselfr   r   r   r   r   Ú__init__%   s   



zInit.__init__c                    sb   G ‡fdd„dƒ}‡fdd„‰ ‡ ‡fdd„}‡ ‡fdd„}|t j_td kr/|tj_|tj_d S d S )	Nc                       s&   e Zd Z” jZ” jZ” jZdd„ ZdS )z(Init.__enter__.<locals>.OptLinearWrapperc                 _   s4   d| j _| j |d< | j|d< | j|d< t|i |¤ŽS )NTr   Úquantization_configÚ
linear_cls)Ú_lora_configÚdelay_lora_initÚ_quant_configr   r   ©r   ÚargsÚkwargsr   r   r   Ú__new__6   s
   


z0Init.__enter__.<locals>.OptLinearWrapper.__new__N)	Ú__name__Ú
__module__Ú__qualname__r   r   r    r   r"   r&   r   ©r   r   r   ÚOptLinearWrapper1   s
    r+   c                    s   ˆ j d kr	t| ƒ dˆ _| S )NT)r   r	   r   )r
   r*   r   r   Ú_model_init=   s   
z#Init.__enter__.<locals>._model_initc                     ó   ˆj | i |¤Ž}ˆ |ƒS ©N)r   ©r$   r%   r
   ©r,   r   r   r   r   D   ó   z'Init.__enter__.<locals>.from_pretrainedc                     r-   r.   )r   r/   r0   r   r   r   H   r1   z#Init.__enter__.<locals>.from_config)r   r   r   r   r   r   r   )r   r+   r   r   r   r0   r   Ú	__enter__/   s   þzInit.__enter__c                 O   s4   | j tj_| jstdƒ d S | jtj_	| j
tj_d S )Na!  WARNING: For some reason LoRA modules are not initialized, this is usually done automatically if using transformers via (AutoModelForCausalLM from_pretrained/from_config). You must call `init_lora` on each module in order to use DeepSpeed LoRA, otherwise you will error out during runtime.)r   r   r   r   r   Úprintr   r   r   r   r   r   r#   r   r   r   Ú__exit__Q   s
   

zInit.__exit__)NN)r'   r(   r)   Ú__doc__r   r2   r4   r   r   r   r   r      s
    

"r   )	Úoptimized_linearr   r   r   r   ÚImportErrorr	   Úobjectr   r   r   r   r   Ú<module>   s   ÿ