o
    3wiZ&                     @   s  d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZmZ d dlmZ d d	lmZ G d
d dejjZG dd dejjZG dd dejjZG dd dejZeeddZeedddZeeddZG dd dejjZG dd dejZdS )    )partialN)dequantize_rowwise)int8_matmul_mixed_dequantize)int8_matmul_rowwise_dequantize)!quantize_columnwise_and_transpose)quantize_globalquantize_global_transpose)quantize_rowwise)is_triton_availablec                   @   $   e Zd Zedd Zedd ZdS )_switchback_globalc           	      C   sd   | d|d}t|\}}t|\}}||f| _t|| |||j g | d d dR  S Nviewsizer	   r   save_for_backwardr   t	ctxX_3DWbiasXX_int8state_XW_int8state_W r   a/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/bitsandbytes/nn/triton_based_modules.pyforward   s
   
0z_switchback_global.forwardc                 C   s   | d|d}d  } }}| j\}}| jd r=t|\}}	t|\}
}t||
 |	|d jg | d d dR  }| jd rNt	
| ||j}| jd rY|jdd}|||fS Nr   r         dim)reshaper   r   needs_input_gradr	   r   r   r   r   torchmatmultodtypesum)r   G_3DGgrad_Xgrad_W	grad_biasr   r   G_int8state_Gr   r   r   r   r   backward)   s    




z_switchback_global.backwardN__name__
__module____qualname__staticmethodr    r4   r   r   r   r   r      s
    
r   c                   @   r   )_switchback_vectorrizec           	      C   sd   | d|d}||f| _t|\}}t|\}}t|| |||j g | d d dR  S r   )r   r   r   r	   r   r   r   r   r   r   r    E   s
   
0z_switchback_vectorrize.forwardc                 C   s   | j \}}|d|d}d  } }}| jd r=t|\}}	t|\}
}t||
 |	|d jg | d d dR  }| jd rNt	
| ||j}| jd rY|jdd}|||fS r!   )r   r&   r   r'   r	   r   r   r   r   r(   r)   r*   r+   r,   )r   r-   r   r   r.   r/   r0   r1   r2   r3   r   r   r   r   r   r4   T   s    




z_switchback_vectorrize.backwardNr5   r   r   r   r   r:   D   s
    
r:   c                   @   r   ) _switchback_global_mem_efficientc           
      C   sn   | d|d}| }t|\}}~t|\}}	||||	f| _t|| ||	|j g |d d dR  S r   r   )
r   r   r   r   r   X_3D_szr   r   r   r   r   r   r   r    o   s   ,z(_switchback_global_mem_efficient.forwardc                 C   s   | d|d}| }d  } }}| j\}}}	}
| jd r2t||}~t| ||j	}~| jd r=|j
dd}| jd ret|\}}~|	  }	t||	 ||
d jg |d d dR  }|||fS )Nr   r"   r#   r   r$   )r&   r   r   r'   r   r(   r)   r   r*   r+   r,   r	   
contiguousr   r   )r   r-   r.   G_3D_szr/   r0   r1   r   r   r   r   real_Xr2   r3   r   r   r   r4      s"   



,
z)_switchback_global_mem_efficient.backwardNr5   r   r   r   r   r;   n   s
    
r;   c                       sN   e Zd Z					ddededededef
 fd	d
Zdd Zdd Z  ZS )SwitchBackLinearTNFin_featuresout_featuresr   vector_wise_quantizationmem_efficientc                    sh   t  ||||| t std|| _| jr(t| _|r&td td d S d S |r/t	| _d S t
| _d S )NzCould not import triton. Please install triton to use SwitchBackLinear.
                               Alternatively, you can use bnb.nn.SwitchBackLinearBnb, but it will be slowerz<mem efficient is not supported for vector-wise quantization.r"   )super__init__r
   ImportErrorrC   r:   _fnprintexitr;   r   )selfrA   rB   r   devicer+   rC   rD   	__class__r   r   rF      s   


zSwitchBackLinear.__init__c                 C   sL   t d | jrt| j\}}nt| j\}}| d| | d| | `d S )Nz=> preparing for eval.r   r   )rI   rC   r	   weightr   register_buffer)rK   r   r   r   r   r   prepare_for_eval   s   z!SwitchBackLinear.prepare_for_evalc                 C   s   | j r| j|| j| jS t| ds| j|| j| jS |d|d}t|\}}| j	rIt
|| j || j| jjg | d d dR  S t|| j || j| jjg | d d dR  S )Nr   r   )trainingrH   applyrO   r   hasattrr   r   r	   rC   r   r   r   r   r   )rK   xr   r   r   r   r   r   r       s"   
zSwitchBackLinear.forward)TNNFF)	r6   r7   r8   intboolrF   rQ   r    __classcell__r   r   rM   r   r@      s&    r@   F)rC   T)rC   rD   c                   @   s&   e Zd ZedddZedd ZdS )StandardLinearFunctionNc                 C   sj   | d|d}| ||| || }|d ur%||d|7 }|j g | d d dR  S )Nr   r   )r   r   r   r)   r   	unsqueeze	expand_as)r   inputrO   r   r   outputr   r   r   r       s    zStandardLinearFunction.forwardc           	      C   s   | j \}}}|d|d}d  } }}| jd r1|||jjg | d d dR  }| jd rA| ||j}|d urO| jd rO|	d}|||fS )Nr   r   r"   r#   )
saved_tensorsr&   r   r'   r)   r*   r+   r   r   r,   )	r   grad_output_3Dr\   rO   r   grad_output
grad_inputgrad_weightr1   r   r   r   r4      s   
.


zStandardLinearFunction.backwardNr5   r   r   r   r   rY      s
    	rY   c                   @   s   e Zd Zdd ZdS )StandardLinearc                 C   s   t || j| jS rc   )rY   rS   rO   r   )rK   rU   r   r   r   r      s   zStandardLinear.forwardN)r6   r7   r8   r    r   r   r   r   rd     s    rd   ) 	functoolsr   r(   torch.nnnn&bitsandbytes.triton.dequantize_rowwiser   0bitsandbytes.triton.int8_matmul_mixed_dequantizer   2bitsandbytes.triton.int8_matmul_rowwise_dequantizer   5bitsandbytes.triton.quantize_columnwise_and_transposer   #bitsandbytes.triton.quantize_globalr   r   $bitsandbytes.triton.quantize_rowwiser	    bitsandbytes.triton.triton_utilsr
   autogradFunctionr   r:   r;   Linearr@   SwitchBackLinearGlobal"SwitchBackLinearGlobalMemEfficientSwitchBackLinearVectorwiserY   rd   r   r   r   r   <module>   s&    ,*,I