o
    8wizS                     @   s  d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZm	Z	 d dl m
Z
 d dlmZ d dlmZmZmZmZmZ d dlZd dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZmZ d dl m!Z! d dl"m#Z#m$Z$m%Z% d dl&m'Z' e(e)Z*edZ+G dd de!Z,deejgdf dededdfddZ-dej.j/deddfddZ0	d,dej.j1dejdee2 dej.j1fd d!Z3e j4d"d#defd$d%Z5d-dej.j/d'e6d(e7e8 d)e8ddf
d*d+Z9dS ).    N)OrderedDict)AbstractContextManager	ExitStack)partial)
ModuleType)AnyCallableLiteralOptionalcast)apply_to_collection)RequirementCache)Tensor)init)_IncompatibleKeys)Selfoverride)	Precision)_ClassReplacementContextManager_convert_fp_tensor_DtypeContextManager)_DEVICEbitsandbytesc                	   @   s   e Zd ZdZ		dded deej deee	  ddfdd	Z
ed
ejjdejjfddZedefddZedefddZedefddZededefddZededefddZdS )BitsandbytesPrecisiona  Plugin for quantizing weights with `bitsandbytes <https://github.com/bitsandbytes-foundation/bitsandbytes>`__.

    .. warning::  This is an :ref:`experimental <versioning:Experimental API>` feature.

    .. note::
        The optimizer is not automatically replaced with ``bitsandbytes.optim.Adam8bit`` or equivalent 8-bit optimizers.

    Args:
        mode: The quantization mode to use.
        dtype: The compute dtype to use.
        ignore_modules: The submodules whose Linear layers should not be replaced, for example. ``{"lm_head"}``.
            This might be desirable for numerical stability. The string will be checked in as a prefix, so a value like
            "transformer.blocks" will ignore all linear layers in all of the transformer blocks.
    Nmode)nf4nf4-dqfp4fp4-dqint8int8-trainingdtypeignore_modulesreturnc                 C   s   t   |d u r |drtj}ntj rtj rtjntj}|dr4|tjur4t|d| dt	 }|d |d |d |d |d |d	 d
}|| | _
|| _|pXt | _d S )Nr   z7 only works with `dtype=torch.float16`, but you chose ``
_NF4Linear_NF4DQLinear
_FP4Linear_FP4DQLinear_Linear8bitLt_Int8LinearInference)r   r   r   r   r    r   )_import_bitsandbytes
startswithtorchfloat16cudais_availableis_bf16_supportedbfloat16
ValueErrorglobals_linear_clsr!   setr"   )selfr   r!   r"   globals_mode_to_cls r:   l/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/lightning_fabric/plugins/precision/bitsandbytes.py__init__F   s&   

zBitsandbytesPrecision.__init__modulec                    s|   t dd | D stdt  t  fdd| D s't|| j| j | D ]}t| jj	r;| j
|_d|_q+|S )Nc                 s   s    | ]
}t |tjjV  qd S N)
isinstancer-   nnLinear.0mr:   r:   r;   	<genexpr>j       z7BitsandbytesPrecision.convert_module.<locals>.<genexpr>z|You are using the bitsandbytes precision plugin, but your model has no Linear layers. This plugin won't work for your model.c                 3   s&    | ]}t | jj jjfV  qd S r>   )r?   r@   Linear8bitLt
Linear4bitrB   bnbr:   r;   rE   r   s   $ F)anymodules	TypeErrorr+   _convert_layersr5   r"   r?   r@   rH   r!   compute_dtypecompute_type_is_set)r7   r=   rD   r:   rI   r;   convert_moduleg   s   z$BitsandbytesPrecision.convert_modulec                 C   
   t | jS r>   r   r!   r7   r:   r:   r;   tensor_init_context}      
z)BitsandbytesPrecision.tensor_init_contextc                 C   sL   | j rtd| j  d|  }td| ji}t }|| || |S )NzInstantiating your model under the `init_module` context manager is not supported when used with `BitsandbytesPrecision(..., ignore_modules=z)` as this may initialize the layers on-device, defeating the purpose of quantization. You can remove `ignore_modules` or remove the `init_module` context manager.ztorch.nn.Linear)r"   RuntimeErrorrU   r   r5   r   enter_context)r7   	dtype_ctxcontext_managerstackr:   r:   r;   module_init_context   s   

z)BitsandbytesPrecision.module_init_contextc                 C   rR   r>   rS   rT   r:   r:   r;   forward_context   rV   z%BitsandbytesPrecision.forward_contextdatac                 C   s   t |tt| jdS N)functionr!   dst_type)r   r   r   r!   r7   r^   r:   r:   r;   convert_input   s   z#BitsandbytesPrecision.convert_inputc                 C   s   t |ttt dS r_   )r   r   r   r-   get_default_dtyperb   r:   r:   r;   convert_output   s   z$BitsandbytesPrecision.convert_outputNN)__name__
__module____qualname____doc__r	   r
   r-   r!   r6   strr<   r   r@   ModulerQ   r   rU   r\   r]   r   rc   re   r:   r:   r:   r;   r   .   s2    

!r   quantize_fn
state_dict_r#   c                 G   s6   t dd |D d }|d u rd S ||}| | d S )Nc                 s   s    | ]
}| d r|V  qdS )weightN)endswith)rC   namer:   r:   r;   rE      rF   z)_quantize_on_load_hook.<locals>.<genexpr>)nextpop)rm   rn   ro   
weight_keyrp   r:   r:   r;   _quantize_on_load_hook   s
   
rv   r=   incompatible_keysc                 C   s*   t |jD ]}|dr|j| qd S )Nrp   )reversedmissing_keysrq   remove)r=   rw   keyr:   r:   r;   _ignore_missing_weights_hook   s
   
r|   paramr^   quant_statec                 C   s|   t  }| jjdkr/t| |jjr&|jj||j|| j| j| j	| j
| j| jd	S tjj||jdS || _t| |jjr<|| _| S )Nmeta)	r^   requires_gradr~   	blocksizecompress_statistics
quant_typequant_storager=   bnb_quantized)r   )r+   devicetyper?   r@   
Params4bitr   r   r   r   r   r=   r   r-   	Parameterr^   r~   )r}   r^   r~   rJ   r:   r:   r;   _replace_param   s&   r      )maxsizec            	         s   t sttt dtjv } | sdtjd< tjddd tjddd dd l | s,tjd= G  fdd	d	 jj	}G  fd
dd jj
}G dd d|}G dd d|}G dd d|}G dd d|}G dd d|}|||||||d}t |  S )NBITSANDBYTES_NOWELCOME1ignorez1.*bitsandbytes was compiled without GPU support.*)messagezHMatMul8bitLt: inputs will be cast from .* to float16 during quantizationr   c                       s   e Zd ZdZddddedee deded	df
 fd
dZddee	j
 dee	j d	dffddZedjjde	j
dee	j d	jjffddZdddeded	efddZdfddZ  ZS )z+_import_bitsandbytes.<locals>._Linear8bitLtzWraps `bnb.nn.Linear8bitLt` and enables instantiation directly on the device and re-quantizaton when loading
        the state dict.Ng      @r   	thresholdargsr   r   kwargsr#   c                   sp   t  j|||d| tjj| j| _| j| _tjd|dj	j
dkr(|   | tt| j | t d S )Nr   r   r   r/   )superr<   r   r@   
Int8Paramsrp   biasr-   tensorr   r   	quantize_"_register_load_state_dict_pre_hookr   rv   "register_load_state_dict_post_hookr|   )r7   r   r   r   r   	__class__rJ   r:   r;   r<      s   z4_import_bitsandbytes.<locals>._Linear8bitLt.__init__rp   c                    sL   |du r| j j}|jjtjkrdS t| j  jjsJ | | j ||| _ dS )Inplace quantize.N)	rp   r^   r!   r-   r   r?   r@   r   quantizer7   rp   r   rI   r:   r;   r      s   z5_import_bitsandbytes.<locals>._Linear8bitLt.quantize_
int8paramsc                    s   |pt d}|jdkrtd|j | j|t jd}| jr&|| _| S t	 j
dr8 j
|\}}}}}n j
|\}}|| _t| d| t| d| | S )Nr/   Unexpected device type: r   r!   double_quantCBSCB)r-   r   r   rW   
contiguoustor.   has_fp16_weightsr^   hasattr
functionalr   int8_double_quantsetattr)r   rp   r   Br   ro   r   rI   r:   r;   r      s   
	z4_import_bitsandbytes.<locals>._Linear8bitLt.quantizeTrecurser   c                S   s   | j jjdkr	t| j jtjkrtt|}tj| j j|d}|jdkr,| 	|| nt
| j || _ | jd urEt
| jtj| j|d| _| S )Nr   r   r/   )rp   r   r   NotImplementedErrorr!   r-   uint8
empty_liker^   r   r   r   r7   r   r   rp   r:   r:   r;   to_empty  s   


z4_import_bitsandbytes.<locals>._Linear8bitLt.to_emptyc                    s   | j d ur%tjj| j\}}|dkrdt| nd}t| j | | t	| j jj
}|r8| jjtjkr8t| jj}tjjj|tdd |rj| jjjdkrSt| jjjdkra| | d S t| j|| _d S d S )Nr   r      ar   r/   )r   r-   r@   r   _calculate_fan_in_and_fan_outrp   mathsqrtuniform_r?   r   r!   r   r   r^   kaiming_uniform_r   r   r   r   r7   fan_inro   boundlinear_init_finishedrp   rI   r:   r;   reset_parameters  s    
z<_import_bitsandbytes.<locals>._Linear8bitLt.reset_parametersrf   r#   N)rg   rh   ri   rj   r   r
   r   floatr<   r-   r   r   r   staticmethodr@   r   r   boolr   r   r   __classcell__r:   rI   r   r;   r)      s     0(
r)   c                	       s   e Zd ZdZdddedee deddf fdd	Zdd
eej	 deej
 ddffddZedjjd
ej	deej
 djjffddZdddededefddZdfddZ  ZS )z)_import_bitsandbytes.<locals>._Linear4bitzWraps `bnb.nn.Linear4bit` to enable: instantiation directly on the device, re-quantizaton when loading the
        state dict, meta-device initialization, and materialization.Nr   r   r   r   r#   c                   sn   t  j|d|i| tjj| j| _| j| _tjd|dj	j
dkr'|   | tt| j | t d S )Nr   r   r   r/   )r   r<   r   r@   r   rp   r   r-   r   r   r   r   r   r   rv   r   r|   )r7   r   r   r   r   r:   r;   r<   7  s   z2_import_bitsandbytes.<locals>._Linear4bit.__init__rp   c                    sT   |du r| j j}|jjtjkrdS t| j  jjsJ | | j ||| _ d| j _	dS )r   NT)
rp   r^   r!   r-   r   r?   r@   r   r   r   r   rI   r:   r;   r   B  s   z3_import_bitsandbytes.<locals>._Linear4bit.quantize_
params4bitc                    sj   |pt d}|jdkrtd|j | j|t jd} jj|| j	| j
| j| jd\}}t| ||S )Nr/   r   r   )r   r   r   r   )r-   r   r   rW   r   r   halfr   quantize_4bitr   r   r   r   r   )r   rp   r   ww_4bitr~   rI   r:   r;   r   M  s   

z2_import_bitsandbytes.<locals>._Linear4bit.quantizeTr   r   c                S   s   | j jtjkrtj| j jj|tjd}n	tj| j j	|d}t
|}|jdkr.| || nt| j || _ | jd urGt| jtj| j|d| _| S )Nr   r   r/   )rp   r!   r-   r   emptyr~   shaper   r   r^   r   r   r   r   r   r   r:   r:   r;   r   _  s   


z2_import_bitsandbytes.<locals>._Linear4bit.to_emptyc                    s   | j d ur%tjj| j\}}|dkrdt| nd}t| j | | t	| j jj
}|rE| jjtjkrEtj| jjj| jjtjd}n| jj}tjjj|tdd |rn| jjjdkre| | d S t| j|| _d S d S )Nr   r   r   r   r   r/   )r   r-   r@   r   r   rp   r   r   r   r?   r   r!   r   r   r~   r   r   r   r^   r   r   r   r   r   rI   r:   r;   r   n  s   
z:_import_bitsandbytes.<locals>._Linear4bit.reset_parametersrf   r   )rg   rh   ri   rj   r   r
   r   r<   r-   r   r   r   r   r@   r   r   r   r   r   r   r   r:   rI   r   r;   _Linear4bit3  s     *(r   c                       *   e Zd Zdededdf fddZ  ZS )z2_import_bitsandbytes.<locals>._Int8LinearInferencer   r   r#   Nc                    s   t  j|ddi| d S )Nr   Fr   r<   r7   r   r   r   r:   r;   r<     s   z;_import_bitsandbytes.<locals>._Int8LinearInference.__init__rg   rh   ri   r   r<   r   r:   r:   r   r;   r*         "r*   c                       r   )z(_import_bitsandbytes.<locals>._FP4Linearr   r   r#   Nc                       t  j|ddd| d S )Nr   Fr   r   r   r   r   r:   r;   r<        z1_import_bitsandbytes.<locals>._FP4Linear.__init__r   r:   r:   r   r;   r'     r   r'   c                       r   )z*_import_bitsandbytes.<locals>._FP4DQLinearr   r   r#   Nc                    r   )Nr   Tr   r   r   r   r:   r;   r<     r   z3_import_bitsandbytes.<locals>._FP4DQLinear.__init__r   r:   r:   r   r;   r(     r   r(   c                       r   )z(_import_bitsandbytes.<locals>._NF4Linearr   r   r#   Nc                    r   )Nr   Fr   r   r   r   r:   r;   r<     r   z1_import_bitsandbytes.<locals>._NF4Linear.__init__r   r:   r:   r   r;   r%     r   r%   c                       r   )z*_import_bitsandbytes.<locals>._NF4DQLinearr   r   r#   Nc                    r   )Nr   Tr   r   r   r   r:   r;   r<     r   z3_import_bitsandbytes.<locals>._NF4DQLinear.__init__r   r:   r:   r   r;   r&     r   r&   )r)   r   r*   r'   r(   r%   r&   )_BITSANDBYTES_AVAILABLEModuleNotFoundErrorrk   osenvironwarningsfilterwarningsr   r@   rG   rH   r4   update)	nowelcome_setr)   r   r*   r'   r(   r%   r&   classesr:   rI   r;   r+      s:   

UP	r+    
linear_clsr"   prefixc                    s  |   D ]\}}|r| d| n| t|tjjr~t fdd|D s~td d |jd u}t	 d }t
t||rAdnd}||j|j||d	}	|r[t|	j|jj |	_d
t||re|	jjnd i}
t|	j|jj fi |
|	_| ||	 qt||| d qd S )N.c                 3   s    | ]}  |V  qd S r>   )r,   )rC   sfullnamer:   r;   rE     s    z"_convert_layers.<locals>.<genexpr>zReplacing layer z with bitsandbytes equivalentr   r   cpu)r   r   r~   )r   )named_childrenr?   r-   r@   rA   rK   logdebugr   r4   r   
issubclassin_featuresout_featuresr   r^   clonerp   r~   __setattr__rN   )r=   r   r"   r   rr   childhas_biasr   r   replacementstater:   r   r;   rN     s(   $

rN   r>   )r   ):	functoolsloggingr   r   r   collectionsr   
contextlibr   r   r   typesr   typingr   r   r	   r
   r   r-   lightning_utilitiesr    lightning_utilities.core.importsr   r   torch.nnr   torch.nn.modules.moduler   typing_extensionsr   r   ,lightning_fabric.plugins.precision.precisionr   (lightning_fabric.plugins.precision.utilsr   r   r    lightning_fabric.utilities.typesr   	getLoggerrg   r   r   r   rv   r@   rl   r|   r   tupler   	lru_cacher+   r   r6   rk   rN   r:   r:   r:   r;   <module>   sN   
&r
	

 ,Z