o
    3wi                     @   s  d dl mZ d dlZd dlZd dlmZ ddlmZ ddl	m
Z
 ejdkr5edd	d
ejdejfddZedd	d
ejdejdedeejejf fddZedd	d
ejdejdejdedejdejfddZejg dejd	dZedd	d
ejdededejdeejejf f
ddZedd	d
ejdejdededee dejdejfddZed d	d
ejdejd!ee dejdejdedejfd"dZdS )#    )SequenceN)get_ptr   )register_kernel)lib)      z bitsandbytes::int8_linear_matmulcpuABc                 C   s>   t | d| jd | jg | jd d |jd R  S )Nr   )torch_int_mmreshapeshapet)r
   r    r   Z/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/bitsandbytes/backends/cpu/ops.py_   s   r   z bitsandbytes::quantize_blockwisecode	blocksizereturnc              
      s   t | t  jt jk fdd   }||   }t j|f jt jd}t j t j	d}t
t|t t|t|t|t| ||fS )Nc                         d j  S )NzA must be float32 on cpu, got dtyper   r
   r   r   <lambda>       _.<locals>.<lambda>)devicer   r   )r   _check_is_size_checkr   float32numelemptyr   
empty_likeuint8r   cquantize_blockwise_cpu_fp32r   ct
c_longlong)r
   r   r   nblocksabsmaxoutr   r   r   r      s   
	z"bitsandbytes::dequantize_blockwiser,   r   c                    s   t | t  jt jk fdd t t jkfdd t j d}tt	|t	 t	|t	|t
|t
   |S )Nc                      r   )NzA must be uint8, got r   r   r   r   r   r   4   r   r   c                      
   d  S )Nz"dtype must be float32 on cpu, got r   r   r   r   r   r   5      
 r   )r   r    r!   r   r&   r"   r%   r   cdequantize_blockwise_cpu_fp32r   r(   r)   r#   )r
   r,   r   r   r   r-   r   )r
   r   r   r   1   s   
	)g      g    6Gg    fg    TFٿg   I4ҿg   ০ǿg    Og        g   __?g   `\?g   ?g   @g?g    4?g   ` ?g   `v"?g      ?)r   r   zbitsandbytes::quantize_4bit
quant_typequant_storagec           	         s  t  t dkfdd t  jt jt jt jfv  fdd   t  dkfdd  d}|	 j
dd	j }||d }t jt 	|ddt dd
dt j}|d d d d> |dd d B }|t jkr| |d}|| fS )Nnf4c                      r.   Nz#quant_type must be nf4 on CPU, got r   r   r1   r   r   r   b   r/   r   c                      r   )NzDBlockwise 4bit quantization only supports 16/32-bit floats, but got r   r   r   r   r   r   e   r   r   c                      s   d d  S )Nz&n must be divisible by blocksize, got z and r   r   )r   r*   r   r   r   k   s    r      dimT)r8   keepdimr      )r   r    r!   r   bfloat16float16r"   r#   r   absmaxvaluesfloat	unsqueezeargminview_NF4_QUANT_TABLEtor&   squeeze)	r
   r   r1   r2   r+   r,   scaled	quantizedpackedr   )r
   r   r*   r1   r   r   ]   s    

* 
zbitsandbytes::dequantize_4bitr   c           	         s   t | t dkfdd t t jt jt jfv fdd t  jt jk fdd  dd  d? 	t j
} d	@ 	t j
}t j||fdd
d|}t| |d d d f  }|jdg|dd  R  }|	S )Nr3   c                      r.   r4   r   r   r5   r   r   r      r/   r   c                      r.   )NzFBlockwise 4bit dequantization only supports 16/32-bit floats, but got r   r   r   r   r   r      r/   c                      r   )NzFBlockwise 4bit dequantization on CPU only supports uint8 storage, got r   r   r   r   r   r      r   r   r6   r:      r7   )r   r    r!   r;   r<   r"   r   r&   rC   rE   int64catr   rD   )	r
   r,   r   r1   r   r   upperlowerr+   r   )r
   r   r1   r   r   ~   s"   
	



zbitsandbytes::gemv_4bitshapeBc                 C   s2   t jjjj|||d|| jd}t jjj| |d dS )Nr3   )r   r   )bias)	r   opsbitsandbytesdequantize_4bitdefaultr   nn
functionallinear)r
   r   rO   r,   r   r   B_dqr   r   r   r      s   
)collections.abcr   ctypesr(   r   bitsandbytes.functionalr   _opsr   
cextensionr   __version__Tensorr   inttupler   tensorr"   rD   strr   r   r   r   <module>   s|    
,. %