o
    ziQ                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dl mZ d dlm	Z	 d dl
mZmZmZmZmZmZmZmZmZmZ d dlZd dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlm Z m!Z! d dl"m#Z# d dl$m%Z%m&Z&m'Z' d dl(m)Z) e*e+Z,edZ-G dd de#Z.deejgdf dededdfddZ/dej0j1deddfddZ2	d+dej0j3dejdee dej0j3fdd Z4e j5d!d"de	fd#d$Z6d,dej0j1d&ed'ee7 d(e7ddf
d)d*Z8dS )-    N)	ExitStack)partial)
ModuleType)
AnyCallableContextManagerLiteralOptionalOrderedDictSetTupleTypecast)apply_to_collection)RequirementCache)Tensor)init)_IncompatibleKeys)Selfoverride)	Precision)_ClassReplacementContextManager_convert_fp_tensor_DtypeContextManager)_DEVICEzbitsandbytes>=0.42.0c                	   @   s   e Zd ZdZ		dded deej deee	  ddfdd	Z
ed
ejjdejjfddZedefddZedefddZedefddZededefddZededefddZdS )BitsandbytesPrecisiona  Plugin for quantizing weights with `bitsandbytes <https://github.com/TimDettmers/bitsandbytes>`__.

    .. warning::  This is an :ref:`experimental <versioning:Experimental API>` feature.

    .. note::
        The optimizer is not automatically replaced with ``bitsandbytes.optim.Adam8bit`` or equivalent 8-bit optimizers.

    Args:
        mode: The quantization mode to use.
        dtype: The compute dtype to use.
        ignore_modules: The submodules whose Linear layers should not be replaced, for example. ``{"lm_head"}``.
            This might be desirable for numerical stability. The string will be checked in as a prefix, so a value like
            "transformer.blocks" will ignore all linear layers in all of the transformer blocks.
    Nmode)nf4nf4-dqfp4fp4-dqint8int8-trainingdtypeignore_modulesreturnc                 C   s   t   |d u r |drtj}ntj rtj rtjntj}|dr4|tjur4t|d| dt	 }|d |d |d |d |d |d	 d
}|| | _
|| _|pXt | _d S )Nr!   z7 only works with `dtype=torch.float16`, but you chose ``
_NF4Linear_NF4DQLinear
_FP4Linear_FP4DQLinear_Linear8bitLt_Int8LinearInference)r   r   r   r    r"   r!   )_import_bitsandbytes
startswithtorchfloat16cudais_availableis_bf16_supportedbfloat16
ValueErrorglobals_linear_clsr#   setr$   )selfr   r#   r$   globals_mode_to_cls r<   c/home/ubuntu/.local/lib/python3.10/site-packages/lightning_fabric/plugins/precision/bitsandbytes.py__init__E   s&   

zBitsandbytesPrecision.__init__modulec                    s|   t dd | D stdt  t  fdd| D s't|| j| j | D ]}t| jj	r;| j
|_d|_q+|S )Nc                 s   s    | ]
}t |tjjV  qd S N)
isinstancer/   nnLinear.0mr<   r<   r=   	<genexpr>i       z7BitsandbytesPrecision.convert_module.<locals>.<genexpr>z|You are using the bitsandbytes precision plugin, but your model has no Linear layers. This plugin won't work for your model.c                 3   s&    | ]}t | jj jjfV  qd S r@   )rA   rB   Linear8bitLt
Linear4bitrD   bnbr<   r=   rG   q   s   $ F)anymodules	TypeErrorr-   _convert_layersr7   r$   rA   rB   rJ   r#   compute_dtypecompute_type_is_set)r9   r?   rF   r<   rK   r=   convert_modulef   s   z$BitsandbytesPrecision.convert_modulec                 C   
   t | jS r@   r   r#   r9   r<   r<   r=   tensor_init_context|      
z)BitsandbytesPrecision.tensor_init_contextc                 C   sL   | j rtd| j  d|  }td| ji}t }|| || |S )NzInstantiating your model under the `init_module` context manager is not supported when used with `BitsandbytesPrecision(..., ignore_modules=z)` as this may initialize the layers on-device, defeating the purpose of quantization. You can remove `ignore_modules` or remove the `init_module` context manager.ztorch.nn.Linear)r$   RuntimeErrorrW   r   r7   r   enter_context)r9   	dtype_ctxcontext_managerstackr<   r<   r=   module_init_context   s   

z)BitsandbytesPrecision.module_init_contextc                 C   rT   r@   rU   rV   r<   r<   r=   forward_context   rX   z%BitsandbytesPrecision.forward_contextdatac                 C   s   t |tt| jdS N)functionr#   dst_type)r   r   r   r#   r9   r`   r<   r<   r=   convert_input   s   z#BitsandbytesPrecision.convert_inputc                 C   s   t |ttt dS ra   )r   r   r   r/   get_default_dtyperd   r<   r<   r=   convert_output   s   z$BitsandbytesPrecision.convert_outputNN)__name__
__module____qualname____doc__r   r	   r/   r#   r   strr>   r   rB   ModulerS   r   rW   r^   r_   r   re   rg   r<   r<   r<   r=   r   -   s2    

!r   quantize_fn
state_dict_r%   c                 G   s6   t dd |D d }|d u rd S ||}| | d S )Nc                 s   s    | ]
}| d r|V  qdS )weightN)endswith)rE   namer<   r<   r=   rG      rH   z)_quantize_on_load_hook.<locals>.<genexpr>)nextpop)ro   rp   rq   
weight_keyrr   r<   r<   r=   _quantize_on_load_hook   s
   
rx   r?   incompatible_keysc                 C   s*   t |jD ]}|dr|j| qd S )Nrr   )reversedmissing_keysrs   remove)r?   ry   keyr<   r<   r=   _ignore_missing_weights_hook   s
   
r~   paramr`   quant_statec                 C   sl   t  }| jjdkr't| |jjr|jj||j|| j| jdS t	jj
||jdS || _t| |jjr4|| _| S )Nmeta)requires_gradr   compress_statistics
quant_type)r   )r-   devicetyperA   rB   
Params4bitr   r   r   r/   	Parameterr`   r   )r   r`   r   rL   r<   r<   r=   _replace_param   s   r      )maxsizec            	         s   t sttt dtjv } | sdtjd< tjddd tjddd dd l | s,tjd= G  fdd	d	 jj	}G  fd
dd jj
}G dd d|}G dd d|}G dd d|}G dd d|}G dd d|}|||||||d}t |  S )NBITSANDBYTES_NOWELCOME1ignorez1.*bitsandbytes was compiled without GPU support.*)messagezHMatMul8bitLt: inputs will be cast from .* to float16 during quantizationr   c                       s   e Zd ZdZddddedee deded	df
 fd
dZddee	j
 dee	j d	dffddZedjjde	j
dee	j d	jjffddZdddeded	efddZdfddZ  ZS )z+_import_bitsandbytes.<locals>._Linear8bitLtzWraps `bnb.nn.Linear8bitLt` and enables instantiation directly on the device and re-quantizaton when loading
        the state dict.Ng      @r   	thresholdargsr   r   kwargsr%   c                   s~   t  j|||d| tjj| j| _tttjj | j	| _	tj
d|djjdkr/|   | tt| j | t d S )Nr   r   r   r1   )superr>   r   rB   
Int8Paramsrr   r	   r/   r   biastensorr   r   	quantize_"_register_load_state_dict_pre_hookr   rx   "register_load_state_dict_post_hookr~   )r9   r   r   r   r   	__class__rL   r<   r=   r>      s   z4_import_bitsandbytes.<locals>._Linear8bitLt.__init__rr   c                    L   |du r| j j}|jjtjkrdS t| j  jjsJ | | j ||| _ dS zInplace quantize.N)	rr   r`   r#   r/   r!   rA   rB   r   quantizer9   rr   r   rK   r<   r=   r         z5_import_bitsandbytes.<locals>._Linear8bitLt.quantize_
int8paramsc           	         s   |pt d}|jdkrtd|j | j|t jd}| jr&|| _| S  j	
|\}}}}}~~|| _t| d| t| d| | S )Nr1   Unexpected device type: r   r#   CBSCB)r/   r   r   rY   
contiguoustor0   has_fp16_weightsr`   
functionaldouble_quantsetattr)	r   rr   r   Br   CBtr   SCBtrq   rK   r<   r=   r      s   
z4_import_bitsandbytes.<locals>._Linear8bitLt.quantizeTrecurser   c                S   s   | j jjdkr	t| j jtjkrtt|}tj| j j|d}|jdkr,| 	|| nt
| j || _ | jd urEt
| jtj| j|d| _| S )Nr   r   r1   )rr   r   r   NotImplementedErrorr#   r/   uint8
empty_liker`   r   r   r   r9   r   r   rr   r<   r<   r=   to_empty  s   


z4_import_bitsandbytes.<locals>._Linear8bitLt.to_emptyc                    s   | j d ur%tjj| j\}}|dkrdt| nd}t| j | | t	| j jj
}|r8| jjtjkr8t| jj}tjjj|tdd |rj| jjjdkrSt| jjjdkra| | d S t| j|| _d S d S )Nr   r      ar   r1   )r   r/   rB   r   _calculate_fan_in_and_fan_outrr   mathsqrtuniform_rA   r   r#   r   r   r`   kaiming_uniform_r   r   r   r   r9   fan_inrq   boundlinear_init_finishedrr   rK   r<   r=   reset_parameters  s    
z<_import_bitsandbytes.<locals>._Linear8bitLt.reset_parametersrh   r%   N)ri   rj   rk   rl   r   r	   r   floatr>   r/   r   r   r   staticmethodrB   r   r   boolr   r   r   __classcell__r<   rK   r   r=   r+      s     0(
r+   c                	       s   e Zd ZdZdddedee deddf fdd	Zdd
eej	 deej
 ddffddZedjjd
ej	deej
 djjffddZdddededefddZdfddZ  ZS )z)_import_bitsandbytes.<locals>._Linear4bitzWraps `bnb.nn.Linear4bit` to enable: instantiation directly on the device, re-quantizaton when loading the
        state dict, meta-device initialization, and materialization.Nr   r   r   r   r%   c                   s|   t  j|d|i| tjj| j| _tttjj | j	| _	tj
d|djjdkr.|   | tt| j | t d S )Nr   r   r   r1   )r   r>   r   rB   r   rr   r	   r/   r   r   r   r   r   r   r   r   rx   r   r~   )r9   r   r   r   r   r<   r=   r>   1  s   z2_import_bitsandbytes.<locals>._Linear4bit.__init__rr   c                    r   r   )	rr   r`   r#   r/   r   rA   rB   r   r   r   rK   r<   r=   r   <  r   z3_import_bitsandbytes.<locals>._Linear4bit.quantize_
params4bitc                    sf   |pt d}|jdkrtd|j | j|t jd} jj|| j	| j
| jd\}}t| ||S )Nr1   r   r   )	blocksizer   r   )r/   r   r   rY   r   r   halfr   quantize_4bitr   r   r   r   )r   rr   r   ww_4bitr   rK   r<   r=   r   F  s   

z2_import_bitsandbytes.<locals>._Linear4bit.quantizeTr   r   c                S   s   | j jtjkrtj| j jj|tjd}n	tj| j j	|d}t
|}|jdkr.| || nt| j || _ | jd urGt| jtj| j|d| _| S )Nr   r   r1   )rr   r#   r/   r   emptyr   shaper   r   r`   r   r   r   r   r   r   r<   r<   r=   r   W  s   


z2_import_bitsandbytes.<locals>._Linear4bit.to_emptyc                    s   | j d ur%tjj| j\}}|dkrdt| nd}t| j | | t	| j jj
}|rE| jjtjkrEtj| jjj| jjtjd}n| jj}tjjj|tdd |rn| jjjdkre| | d S t| j|| _d S d S )Nr   r   r   r   r   r1   )r   r/   rB   r   r   rr   r   r   r   rA   r   r#   r   r   r   r   r   r   r`   r   r   r   r   r   rK   r<   r=   r   f  s   
z:_import_bitsandbytes.<locals>._Linear4bit.reset_parametersrh   r   )ri   rj   rk   rl   r   r	   r   r>   r/   r   r   r   r   rB   r   r   r   r   r   r   r   r<   rK   r   r=   _Linear4bit-  s     *(
r   c                       *   e Zd Zdededdf fddZ  ZS )z2_import_bitsandbytes.<locals>._Int8LinearInferencer   r   r%   Nc                    s   t  j|ddi| d S )Nr   Fr   r>   r9   r   r   r   r<   r=   r>   |  s   z;_import_bitsandbytes.<locals>._Int8LinearInference.__init__ri   rj   rk   r   r>   r   r<   r<   r   r=   r,   {      "r,   c                       r   )z(_import_bitsandbytes.<locals>._FP4Linearr   r   r%   Nc                       t  j|ddd| d S )Nr   Fr   r   r   r   r   r<   r=   r>        z1_import_bitsandbytes.<locals>._FP4Linear.__init__r   r<   r<   r   r=   r)     r   r)   c                       r   )z*_import_bitsandbytes.<locals>._FP4DQLinearr   r   r%   Nc                    r   )Nr   Tr   r   r   r   r<   r=   r>     r   z3_import_bitsandbytes.<locals>._FP4DQLinear.__init__r   r<   r<   r   r=   r*     r   r*   c                       r   )z(_import_bitsandbytes.<locals>._NF4Linearr   r   r%   Nc                    r   )Nr   Fr   r   r   r   r<   r=   r>     r   z1_import_bitsandbytes.<locals>._NF4Linear.__init__r   r<   r<   r   r=   r'     r   r'   c                       r   )z*_import_bitsandbytes.<locals>._NF4DQLinearr   r   r%   Nc                    r   )Nr   Tr   r   r   r   r<   r=   r>     r   z3_import_bitsandbytes.<locals>._NF4DQLinear.__init__r   r<   r<   r   r=   r(     r   r(   )r+   r   r,   r)   r*   r'   r(   )_BITSANDBYTES_AVAILABLEModuleNotFoundErrorrm   osenvironwarningsfilterwarningsbitsandbytesrB   rI   rJ   r6   update)	nowelcome_setr+   r   r,   r)   r*   r'   r(   classesr<   rK   r=   r-      s:   

TN	r-    
linear_clsr$   prefixc                    s  |   D ]\}}|r| d| n| t|tjjr~t fdd|D s~td d |jd u}t	 d }t
t||rAdnd}||j|j||d	}	|r[t|	j|jj |	_d
t||re|	jjnd i}
t|	j|jj fi |
|	_| ||	 qt||| d qd S )N.c                 3   s    | ]}  |V  qd S r@   )r.   )rE   sfullnamer<   r=   rG     s    z"_convert_layers.<locals>.<genexpr>zReplacing layer z with bitsandbytes equivalentr   r   cpu)r   r   r   )r   )named_childrenrA   r/   rB   rC   rM   logdebugr   r6   r   
issubclassin_featuresout_featuresr   r`   clonerr   r   __setattr__rP   )r?   r   r$   r   rt   childhas_biasr   r   replacementstater<   r   r=   rP     s(   $

rP   r@   )r   )9	functoolsloggingr   r   r   
contextlibr   r   typesr   typingr   r   r   r   r	   r
   r   r   r   r   r/   lightning_utilitiesr    lightning_utilities.core.importsr   r   torch.nnr   torch.nn.modules.moduler   typing_extensionsr   r   ,lightning_fabric.plugins.precision.precisionr   (lightning_fabric.plugins.precision.utilsr   r   r    lightning_fabric.utilities.typesr   	getLoggerri   r   r   r   rx   rB   rn   r~   r   r   	lru_cacher-   rm   rP   r<   r<   r<   r=   <module>   sL   0
&r
	

 ,W