o
    3wi                     @   s  d dl mZ d dlmZ d dlZddlmZ edd		d$dejd	ejd
ejdeej deej dejfddZ	edd		d$dejdejdejdejdejdeej deej de
ejeej f fddZ	edd		d$dejdejd	ejd
ejdeej deej dejfddZ	edddejdejfddZ	edddejdejdejfddZ	d%dejdejdeej fdd Zed!dd&dejfd#dZ	dS )'    )prod)OptionalN   )register_kernelzbitsandbytes::int8_mm_dequantdefaultA	row_stats	col_statsdtypebiasreturnc                    s   t  jt jk fdd t jt jkfdd t jt jkfdd  d jd }dddd|  d }|d urS||7 }|	|pYt j
S )Nc                         d j  S )NzA must be int32, got r
    )r   r   ^/home/ubuntu/sommelier/.venv/lib/python3.10/site-packages/bitsandbytes/backends/default/ops.py<lambda>       z_.<locals>.<lambda>c                      r   )Nzrow_stats must be float32, got r   r   )r   r   r   r      r   c                      r   )Nzcol_stats must be float32, got r   r   )r	   r   r   r      r   r   g D1@?)torch_checkr
   int32float32viewshapereshape	unsqueezetofloat16)r   r   r	   r
   r   A_calcoutr   )r   r	   r   r   _	   s   r    z"bitsandbytes::int8_mixed_scaled_mmCACBSCASCBoutlier_colsc           
      C   s   d }|d ur,|  r,| d d |f  }tjjj|d d |f  || j	 }n
tj
d| j| jd}tjjjj|||||| jd}	|d urO|	||}	|	|fS )Nr   devicer
   )r   r
   )numel
contiguousr   opsbitsandbytesint8_vectorwise_dequantr   r   r
   temptyr'   int8_scaled_mmaddmm)
r   r!   r"   r#   r$   r%   r   subBsubAoutputr   r   r   r        s   
 
zbitsandbytes::int8_scaled_mmBc                 C   s2   t jjj| |}t jjjj||||pt j|dS )N)r
   r   )r   r*   r+   int8_linear_matmulr   int8_mm_dequantr   )r   r4   r   r	   r   r
   out_i32r   r   r   r    G   s   	
z bitsandbytes::int8_linear_matmulc                 C   s
   t | |S N)_int8_linear_matmul_impl)r   r4   r   r   r   r    Z   s   
z$bitsandbytes::int8_linear_matmul.outr   c                 C   s"   t |jt jk t| || d S r8   )r   r   r
   r   r9   )r   r4   r   r   r   r   r    _   s   c                 C   s6   t |  |  t j}|d ur||}|S r8   )r   matmulfloatr-   r   r   copy_)r   r4   r   resultr   r   r   r9   e   s    
r9   z#bitsandbytes::int8_vectorwise_quant        c                 C   s   t | jd d }d }d }|dkr<|  |k}| r2t|jddd}| |  }d| |< n
tjd| j	tj
d}tj|  ddj }t| d|d  tj}|dkrh|d urhd|d d |f< |d urp|| |< |||fS )Nr   r>   r   )dimr&      g     _@)r   r   absanyr   argwherer   cloner.   r'   int64maxvaluesr;   roundr   r   int8)r   	thresholdrowsr%   outlier_restoreoutliersr   out_rowr   r   r   r    m   s"   
 
)NNr8   )r>   )mathr   typingr   r   _opsr   Tensorr
   r    tupler9   r   r   r   r   <module>   s    &"