o
    i                     @   s   d dl Z d dlZd dlZd dlmZ d dlmZ d dlm	Z	 e 
eZee   zd dlmZ W n eyB   ed dZY nw eeedd Zdejd	ejd
ejfddZdejdejd
ejfddZdejdejdejd
ejfddZdS )    N)is_compiling)	out_dtype)check_cpu_version)intmm_tritonzTWarning: Detected no triton, on systems without Triton certain kernels will not workTORCHAO_AUTOTUNER_ENABLEinputmat2returnc              	   C   s  t  s	d|  v r+| jjdkrttjjjj	tj
|  | S ttjjjj	tj
| |S |j| jks=J d|j d| j d|jj| jjfv }| jd d dkoV| jd dk}|jd d dkof|jd dk}|oj| }|sp|rt|  tj
| tj
| jjS | s| }|  s| jd d dkr|  } zttjjjj	tj
| |W S  ty   t| tj|tjtj
 Y S w )a  
    Performs a safe integer matrix multiplication, considering different paths for
    torch.compile, cublas, and fallback cases.

    Args:
        input (torch.Tensor): The input tensor of shape [i, j].
        mat2 (torch.Tensor): The matrix to multiply with, of shape [j, k].

    Returns:
        torch.Tensor: The result of the matrix multiplication.

    Raises:
        AssertionError: If the tensors are not on the same device.
    
FakeTensorcpuz3need both tensors to be on the same device but got z and       r   )dynamo_is_compiling__repr__devicetyper   torchopsatenmmdefaultint32floatshapematmulr   tois_contiguous
contiguous	Exceptionfloat32)r   r   
device_cpuj_is_nonzero_multiple_of_8k_is_nonzero_multiple_of_8bad_dimensions_for_cublas r$   H/home/ubuntu/.local/lib/python3.10/site-packages/torchao/kernel/intmm.pysafe_int_mm   s>     $r&   abc                 C   s&   t durtrtjj| |S t| |S )a\  
    Performs integer matrix multiplication using intmm_triton if available and autotuner is enabled,
    otherwise falls back to safe_int_mm.

    Args:
        a (torch.Tensor): The first matrix to multiply.
        b (torch.Tensor): The second matrix to multiply.

    Returns:
        torch.Tensor: The result of the matrix multiplication.
    N)r   AUTOTUNER_ENABLEr   r   torchao
int_matmulr&   )r'   r(   r$   r$   r%   r+   [   s   
r+   scales1c                 C   s   | j \}}|j \}}||dks| dksJ d|dks"J | s(J |||f}| dks7J t|jrJt	| |}|
|j| S tdurYtrYtjj| ||S t| |}|| S )a  
    Performs scaled integer matrix multiplication.

    Args:
        a (torch.Tensor): The first matrix to multiply.
        b (torch.Tensor): The second matrix to multiply.
        scales1 (torch.Tensor): The scaling factors for the rows of the result.

    Returns:
        torch.Tensor: The result of the scaled matrix multiplication.

    Raises:
        AssertionError: If the dimensions of the input tensors do not match the expected shapes.
    r   r      N)r   sizenumelr   expanddimr   r   r   _int_mmr   dtyper   r)   r   r*   int_scaled_matmulr&   )r'   r(   r,   MKNcr$   r$   r%   r4   l   s   



r4   )loggingosr   torch._dynamor   r   !torch._higher_order_ops.out_dtyper   torchao.utilsr   	getLogger__name__logger
addHandlerNullHandlertorchao.kernelr   ImportErrorwarningboolintgetenvr)   Tensorr&   r+   r4   r$   r$   r$   r%   <module>   s8   
=