o
    2wi,                     @   s  d dl mZ d dlmZ d dlmZ d dlZdZeej	dr)dZej	j
Z
ej	jZnej	jZ
ej	jZej	dd	 e
d		dQd
ejdejdejdejdejdeej deej deejeej f fddZej	dd e
d		dQd
ejdejdejdejdeej deej dejfddZej	dd e
dd
ejdejfddZej	dd e
dd
ejdejd ejfd!dZej	d"d# e
d"dRd
ejfd%dZej	d&d' e
d&d
ejd(ejdejfd)dZed&d*d
ejd(ejfd+dZej	d,d- e
d,		dQd
ejdejdejdeej deej dejfd.dZej	d/d0 e
d/	$dRd
ejdeejejejejeej f fd1dZej	d2d3 e
d2d
ejd4ejd5ed6ed7ee dejdejfd8dZej	d9d: e
d9d
ejd4ejd5ed6ed7ee dejd ejddfd;dZej	d<d= e
d<d
ejd5ed6ed>ejdeejejf f
d?dZej	d@dA e
d@d
ejd4ejdBejd5edejdejfdCdZej	dDdE e
dDd
ejd4ejdBejd5edejd ejfdFdZej	dGdH e
dGd
ejdBejd5edeejejf fdIdZej	dJdK e
dJd
ejdejdLee d4ejdBejd5edejfdMdZej	dNdO e
dNd
ejdejdLee d4ejdBejd5ed ejddfdPdZdS )S    )Sequence)prod)OptionalNFregister_fakeTz"bitsandbytes::int8_mixed_scaled_mmz{(Tensor A, Tensor CA, Tensor CB, Tensor SCA, Tensor SCB, Tensor? outlier_cols=None, Tensor? bias=None) -> (Tensor, Tensor?)ACACBSCASCBoutlier_colsbiasreturnc           
      C   sX   g |j d d |j d R }tj|| j| jd}tj  }| j|tj	d}	||	fS )Nr   devicedtyper   )
shapetorchemptyr   r   libraryget_ctxnew_dynamic_size	new_emptyint64)
r   r   r   r	   r
   r   r   shapeCoutsubA r   N/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/bitsandbytes/_ops.py_   s
   
r    zbitsandbytes::int8_scaled_mmzm(Tensor A, Tensor B, Tensor row_stats, Tensor col_stats, Tensor? bias=None, ScalarType? dtype=None) -> TensorB	row_stats	col_statsr   c                 C   s6   g | j d d |j d R }tj|| j|ptjdS )Nr   r   r   )r   r   r   r   float16)r   r!   r"   r#   r   r   r   r   r   r   r    4   s   	z bitsandbytes::int8_linear_matmulz(Tensor A, Tensor B) -> Tensorc                 C   sb   t | jt jkdd  t |jt jkdd  g | jd d |jd R }t j|| jt jdS )Nc                   S      dS NzA must be int8r   r   r   r   r   <lambda>I       _.<locals>.<lambda>c                   S   r%   NzB must be int8r   r   r   r   r   r'   J   r(   r   r   r   )r   _checkr   int8r   r   r   int32)r   r!   r   r   r   r   r    G   s   z$bitsandbytes::int8_linear_matmul.outz'(Tensor A, Tensor B, Tensor! out) -> ()r   c                    s   g  j d d |j d R t jtjkdd  t|jtjkdd  tj kfdd tj jk fdd tjtjkfdd d S )	Nr   r   c                   S   r%   r&   r   r   r   r   r   r'   [   r(   r)   c                   S   r%   r*   r   r   r   r   r   r'   \   r(   c                         d d j  S NExpected out.shape == , got r   r   )r   r   r   r   r'   ]       c                         d j  dj  S NzExpected out.device == r1   r   r   r   r   r   r   r'   ^       c                         d j  S )Nz!Expected out.dtype == int32, got r   r   )r   r   r   r'   _       )r   r   r+   r   r,   r   r-   )r   r!   r   r   )r   r   r   r   r    W   s    z#bitsandbytes::int8_vectorwise_quantz<(Tensor A, float threshold=0.0) -> (Tensor, Tensor, Tensor?)        c                 C   sn   t j| j| jt jd}t jt| jd d | jt jd}|dkr%||d fS t j 	 }||| j
|t jdfS )Nr   r   r;   r   )r   r   r   r   r,   r   float32r   r   r   r   r   )r   	thresholdout_rowr"   r   r   r   r   r    h   s   "
z%bitsandbytes::int8_vectorwise_dequantz"(Tensor A, Tensor stats) -> Tensorstatsc                 C   s(   t | jt jkdd  t j| t jdS )Nc                   S   r%   r&   r   r   r   r   r   r'   z   r(   r)   r   )r   r+   r   r,   
empty_liker<   r   r?   r   r   r   r    x   s   defaultc                 C   s   | | dd d S )Nr      g   @ ?)viewrA   r   r   r   r       s   zbitsandbytes::int8_mm_dequantzc(Tensor A, Tensor row_stats, Tensor col_stats, ScalarType? dtype=None, Tensor? bias=None) -> Tensorc                 C   s,   t | jt jkdd  t j| |pt jdS )Nc                   S   r%   )NzA must be int32r   r   r   r   r   r'      r(   r)   r   )r   r+   r   r-   r@   r$   )r   r"   r#   r   r   r   r   r   r       s   zbitsandbytes::int8_double_quantzL(Tensor A, float threshold=0.0) -> (Tensor, Tensor, Tensor, Tensor, Tensor?)c                 C   s   t j| t jd}t j| t jd}t jt| jd d | jt jd}t j| jd | jt jd}t j	 
 }| j|t jd}|||||fS )Nr   r   r   )r   r@   r,   r   r   r   r   r<   r   r   r   r   r   )r   r=   r>   out_colr"   r#   	outlier_nr   r   r   r   r       s   "zbitsandbytes::dequantize_4bitza(Tensor A, Tensor absmax, int blocksize, str quant_type, int[] shape, ScalarType dtype) -> Tensorabsmax	blocksize
quant_typer   c                 C   s   t | t j||| jdS )N)r   r   )r   _check_is_sizer   r   )r   rG   rH   rI   r   r   r   r   r   r       s   
	z!bitsandbytes::dequantize_4bit.outzj(Tensor A, Tensor absmax, int blocksize, str quant_type, int[] shape, ScalarType dtype, Tensor! out) -> ()c                    sd   t | t jkfdd t j jk fdd t jkfdd d S )Nc                      r.   r/   r2   r   )r   r   r   r   r'      r3   r)   c                      r4   r5   r6   r   r7   r   r   r'      r8   c                         d  dj  S NzExpected out.dtype == r1   r   r   r   r   r   r   r'      r3   )r   rJ   r+   r   r   r   )r   rG   rH   rI   r   r   r   r   )r   r   r   r   r   r       s   

 zbitsandbytes::quantize_4bitzW(Tensor A, int blocksize, str quant_type, ScalarType quant_storage) -> (Tensor, Tensor)quant_storagec                 C   s`   t | |  }||   }t j|f| jt jd}t j|d |jd  df| j|d}||fS )Nr   rC      )r   rJ   numelr   r   r<   itemsize)r   rH   rI   rN   nblocksrG   r   r   r   r   r       s   
$z"bitsandbytes::dequantize_blockwisezQ(Tensor A, Tensor absmax, Tensor code, int blocksize, ScalarType dtype) -> Tensorcodec                    s4   t | t  jt jk fdd t j |dS )Nc                      r9   NzA must be uint8, got r   r   r   r   r   r'      r:   r)   r   )r   rJ   r+   r   uint8r@   )r   rG   rT   rH   r   r   rV   r   r       s   
z&bitsandbytes::dequantize_blockwise.outzZ(Tensor A, Tensor absmax, Tensor code, int blocksize, ScalarType dtype, Tensor! out) -> ()c                    s   t | t  jt jk fdd t j jk fdd t j jk fdd t jkfdd d S )Nc                      r9   rU   r   r   rV   r   r   r'      r:   r)   c                      r4   r/   r2   r   r7   r   r   r'      r8   c                      r4   r5   r6   r   r7   r   r   r'     r8   c                      rK   rL   r   r   rM   r   r   r'     r3   )r   rJ   r+   r   rW   r   r   )r   rG   rT   rH   r   r   r   )r   r   r   r   r       s
   
 z bitsandbytes::quantize_blockwisez:(Tensor A, Tensor code, int blocksize) -> (Tensor, Tensor)c                 C   sL   t | |  }||   }t j|f| jt jd}t j| t jd}||fS )Nr   r   )r   rJ   rP   r   r   r<   r@   rW   )r   rT   rH   rR   rS   rG   r   r   r   r   r      s   
zbitsandbytes::gemv_4bitzW(Tensor A, Tensor B, int[] shapeB, Tensor absmax, Tensor code, int blocksize) -> TensorshapeBc                    s   t | t    dk fdd t  jt jt jt jfv  fdd t jt j	t jt jt jfv fdd g  j
d d |d R }t j| j jdS )Nr   c                      r9   Nz5A must be a vector with leading dimensions of 1, got r2   r   rV   r   r   r'     r:   r)   c                      r9   Nz-A must be float16, bfloat16, or float32, got r   r   rV   r   r   r'      r:   c                      r9   NzNB must be backed by storage of type uint8, bfloat16, float16, or float32, got r   r   r!   r   r   r'   $  r:   r   r   )r   rJ   r+   rP   sizer   r$   bfloat16r<   rW   r   r   r   )r   r!   rX   rG   rT   rH   r   r   )r   r!   r   r      s   
"

zbitsandbytes::gemv_4bit.outz`(Tensor A, Tensor B, int[] shapeB, Tensor absmax, Tensor code, int blocksize, Tensor! out) -> ()c                    s   t | t    dk fdd t  jt jt jt jfv  fdd t jt j	t jt jt jfv fdd t j
g  j
d d d R k fdd t j jk fdd t j jk fd	d d S )
Nr   c                      r9   rY   r2   r   rV   r   r   r'   ;  r:   r)   c                      r9   rZ   r   r   rV   r   r   r'   >  r:   c                      r9   r[   r   r   r\   r   r   r'   B  r:   r   c                      s*   dg  j d d d R  dj  S )Nr0   r   r   r1   r2   r   )r   r   rX   r   r   r'   F  s   * c                      r4   r5   r6   r   r7   r   r   r'   H  r8   c                      r4   rL   r   r   r7   r   r   r'   I  r8   )r   rJ   r+   rP   r]   r   r$   r^   r<   rW   r   r   )r   r!   rX   rG   rT   rH   r   r   )r   r!   r   rX   r   r    0  s    

"

 ")NN)r;   )collections.abcr   mathr   typingr   r   _IS_TORCH_GTE_24hasattrr   r   register_kernelimpl_abstractimpldefineTensortupler    r   intstrr   r   r   r   <module>   s   
	
.
,	