o
    X۷i-                     @  s   d dl mZ d dlmZ d dlZd dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlmZ ed	d"dd	Zedd#ddZd"ddZd$ddZdZejddddedZdd ZejddddedZed d%d!d ZdS )&    )annotations)warnN)cublas)device)runtime)_util)_uarray	lu_factorFTc                 C  s   t | ||S )a  LU decomposition.

    Decompose a given two-dimensional square matrix into ``P * L * U``,
    where ``P`` is a permutation matrix,  ``L`` lower-triangular with
    unit diagonal elements, and ``U`` upper-triangular matrix.

    Args:
        a (cupy.ndarray): The input matrix with dimension ``(M, N)``
        overwrite_a (bool): Allow overwriting data in ``a`` (may enhance
            performance)
        check_finite (bool): Whether to check that the input matrices contain
            only finite numbers. Disabling may give a performance gain, but may
            result in problems (crashes, non-termination) if the inputs do
            contain infinities or NaNs.

    Returns:
        tuple:
            ``(lu, piv)`` where ``lu`` is a :class:`cupy.ndarray`
            storing ``U`` in its upper triangle, and ``L`` without
            unit diagonal elements in its lower triangle, and ``piv`` is
            a :class:`cupy.ndarray` storing pivot indices representing
            permutation matrix ``P``. For ``0 <= i < min(M,N)``, row
            ``i`` of the matrix was interchanged with row ``piv[i]``

    .. seealso:: :func:`scipy.linalg.lu_factor`
    )
_lu_factor)aoverwrite_acheck_finite r   S/home/ubuntu/vllm_env/lib/python3.10/site-packages/cupyx/scipy/linalg/_decomp_lu.pyr	      s   luc                 C  s   t | ||\}}|j\}}t||}t|\}	}
|r(t|	d|d |d |	|
fS |jjdv r1tjntj	}t
t
j|f|d}t|d|d |d ||	|
fS )a  LU decomposition.

    Decomposes a given two-dimensional matrix into ``P @ L @ U``, where ``P``
    is a permutation matrix, ``L`` is a lower triangular or trapezoidal matrix
    with unit diagonal, and ``U`` is a upper triangular or trapezoidal matrix.

    Args:
        a (cupy.ndarray): The input matrix with dimension ``(M, N)``.
        permute_l (bool): If ``True``, perform the multiplication ``P @ L``.
        overwrite_a (bool): Allow overwriting data in ``a`` (may enhance
            performance)
        check_finite (bool): Whether to check that the input matrices contain
            only finite numbers. Disabling may give a performance gain, but may
            result in problems (crashes, non-termination) if the inputs do
            contain infinities or NaNs.

    Returns:
        tuple:
            ``(P, L, U)`` if ``permute_l == False``, otherwise ``(PL, U)``.
            ``P`` is a :class:`cupy.ndarray` storing permutation matrix with
            dimension ``(M, M)``. ``L`` is a :class:`cupy.ndarray` storing
            lower triangular or trapezoidal matrix with unit diagonal with
            dimension ``(M, K)`` where ``K = min(M, N)``. ``U`` is a
            :class:`cupy.ndarray` storing upper triangular or trapezoidal
            matrix with dimension ``(K, N)``. ``PL`` is a :class:`cupy.ndarray`
            storing permuted ``L`` matrix with dimension ``(M, K)``.

    .. seealso:: :func:`scipy.linalg.lu`
    r      fFdtype)r
   shapemin_cupy_split_lu_cupy_laswpr   charnumpyfloat32float64cupydiagones)r   	permute_lr   r   r   pivmnkLUr_dtypePr   r   r   r   .   s   


c              	   C  s  ddl m} t| } t|  | j}|jdkr|j}|j	}n*|jdkr+|j
}|j}n|jdkr7|j}|j}n|jdkrC|j}|j}nd}t|| j|d| d} |re| jjdkret|  setd	t }tjd
tjd}	| j\}
}tjt|
|ftjd}|||
|| jj|
}tj||d}|||
|| jj|
|jj|jj|	jj t j!s|	d dk rtd|	d   |	d dkrt"d|	d  t#dd |d
8 }| |fS )Nr   cusolverfdFD>Only float32, float64, complex64 and complex128 are supported.ordercopy#array must not contain infs or NaNsr   r   z=illegal value in %d-th argument of internal getrf (lu_factor)z4Diagonal number %d is exactly zero. Singular matrix.   )
stacklevel)$cupy_backends.cuda.libsr+   r   asarrayr   
_assert_2dr   r   sgetrfsgetrf_bufferSizedgetrfdgetrf_bufferSizecgetrfcgetrf_bufferSizezgetrfzgetrf_bufferSizeNotImplementedErrorastypekindisfiniteall
ValueErrorr   get_cusolver_handleemptyr   int32r   r   intcdataptrr   is_hipr   RuntimeWarning)r   r   r   r+   r   getrfgetrf_bufferSizemsgcusolver_handledev_infor#   r$   ipiv
buffersize	workspacer   r   r   r
   ]   sZ   






r
   Cc              
   C  s   | j sJ | j\}}t||}|dkrdnd}tj||f|| jd}tj||f|| jd}|| }t| ||||j|||d ||fS )Nr.   rX   )r2   r   size)_f_contiguousr   r   r   rI   r   _kernel_cupy_split_lu_c_contiguous)LUr2   r#   r$   r%   r&   r'   rZ   r   r   r   r      s   


r   z
__device__ inline int get_index(int row, int col, int num_rows, int num_cols,
                                bool c_contiguous)
{
    if (c_contiguous) {
        return col + num_cols * row;
    } else {
        return row + num_rows * col;
    }
}
z6raw T LU, int32 M, int32 N, int32 K, bool C_CONTIGUOUSzraw T L, raw T Ua$  
    // LU: shape: (M, N)
    // L: shape: (M, K)
    // U: shape: (K, N)
    const T* ptr_LU = &(LU[0]);
    T* ptr_L = &(L[0]);
    T* ptr_U = &(U[0]);
    int row, col;
    if (C_CONTIGUOUS) {
        row = i / N;
        col = i % N;
    } else {
        row = i % M;
        col = i / M;
    }
    T lu_val = ptr_LU[get_index(row, col, M, N, false)];
    T l_val, u_val;
    if (row > col) {
        l_val = lu_val;
        u_val = static_cast<T>(0);
    } else if (row == col) {
        l_val = static_cast<T>(1);
        u_val = lu_val;
    } else {
        l_val = static_cast<T>(0);
        u_val = lu_val;
    }
    if (col < K) {
        ptr_L[get_index(row, col, M, K, C_CONTIGUOUS)] = l_val;
    }
    if (row < K) {
        ptr_U[get_index(row, col, K, N, C_CONTIGUOUS)] = u_val;
    }
    cupyx_scipy_linalg_split_lu)preamblec                 C  s`   | j \}}|j d }d|kr||kr||k sJ | js | js J t||||||| j| |d	 d S )Nr   rY   )r   r]   r[   _kernel_cupy_laswp)Ak1k2rU   incxr#   r$   r%   r   r   r   r      s
   

 r   zOint32 M, int32 N, int32 K1, int32 K2, raw I IPIV, int32 INCX, bool C_CONTIGUOUSzraw T Aa  
    // IPIV: 0-based pivot indices. shape: (K,)  (*) K > K2
    // A: shape: (M, N)
    T* ptr_A = &(A[0]);
    if (K1 > K2) return;
    int row_start, row_end, row_inc;
    if (INCX > 0) {
        row_start = K1; row_end = K2; row_inc = 1;
    } else if (INCX < 0) {
        row_start = K2; row_end = K1; row_inc = -1;
    } else {
        return;
    }
    int col = i;
    int row1 = row_start;
    while (1) {
        int row2 = IPIV[row1];
        if (row1 != row2) {
            int idx1 = get_index(row1, col, M, N, C_CONTIGUOUS);
            int idx2 = get_index(row2, col, M, N, C_CONTIGUOUS);
            T tmp       = ptr_A[idx1];
            ptr_A[idx1] = ptr_A[idx2];
            ptr_A[idx2] = tmp;
        }
        if (row1 == row_end) break;
        row1 += row_inc;
    }
    cupyx_scipy_linalg_laswplu_solvec                 C  s  ddl m} | \}}t| t| t| |jd }||jd kr)td|j}	|	j	dkr5|j
}
n!|	j	dkr>|j}
n|	j	dkrG|j}
n|	j	dkrP|j}
nd}t||dkr^tj}n|d	krftj}n|d
krntj}ntd|j|	ddd}|j|jddd}|d	7 }|j|	d| d}|r|jjdkrt| std|jjdkrt| std|jd	krd	n|jd	 }t }tjd	tjd}|
|||||jj||jj|jj||jj
 t j!s|d dk rtd|d   |S )a9  Solve an equation system, ``a * x = b``, given the LU factorization of ``a``

    Args:
        lu_and_piv (tuple): LU factorization of matrix ``a`` (``(M, M)``)
            together with pivot indices.
        b (cupy.ndarray): The matrix with dimension ``(M,)`` or
            ``(M, N)``.
        trans ({0, 1, 2}): Type of system to solve:

            ========  =========
            trans     system
            ========  =========
            0         a x  = b
            1         a^T x = b
            2         a^H x = b
            ========  =========
        overwrite_b (bool): Allow overwriting data in b (may enhance
            performance)
        check_finite (bool): Whether to check that the input matrices contain
            only finite numbers. Disabling may give a performance gain, but may
            result in problems (crashes, non-termination) if the inputs do
            contain infinities or NaNs.

    Returns:
        cupy.ndarray:
            The matrix with dimension ``(M,)`` or ``(M, N)``.

    .. seealso:: :func:`scipy.linalg.lu_solve`
    r   r*   zincompatible dimensions.r,   r-   r.   r/   r0   r   r5   zunknown transFr1   Tzarray must not contain infs or NaNs.
Note that when a singular matrix is given, unlike scipy.linalg.lu_factor, cupyx.scipy.linalg.lu_factor returns an array containing NaN.r4   r   z<illegal value in %d-th argument of internal getrs (lu_solve))"r7   r+   r   _assert_cupy_arrayr9   _assert_stacked_squarer   rG   r   r   sgetrsdgetrscgetrszgetrsrB   r   CUBLAS_OP_NCUBLAS_OP_TCUBLAS_OP_CrC   rD   r   rE   rF   ndimr   rH   rI   r   rJ   rL   rM   r   rN   )
lu_and_pivbtransoverwrite_br   r+   r   rU   r#   r   getrsrR   r$   rS   rT   r   r   r   rg     sh   







)FT)FFT)rX   )r   FT)
__future__r   warningsr   r   r   	cupy.cudar   r   r   cupy.linalgr   cupyx.scipy.linalgr   
implementsr	   r   r
   r   _device_get_indexElementwiseKernelr\   r   ra   rg   r   r   r   r   <module>   s>    
.
;")$