o
    i-                     @   s   d dl mZ d dlZd dlZd dlmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZ edd!ddZedd"ddZd!ddZd#ddZdZejddddedZdd ZejddddedZedd$d dZdS )%    )warnN)cublas)device)runtime)_util)_uarray	lu_factorFTc                 C   s   t | ||S )a  LU decomposition.

    Decompose a given two-dimensional square matrix into ``P * L * U``,
    where ``P`` is a permutation matrix,  ``L`` lower-triangular with
    unit diagonal elements, and ``U`` upper-triangular matrix.

    Args:
        a (cupy.ndarray): The input matrix with dimension ``(M, N)``
        overwrite_a (bool): Allow overwriting data in ``a`` (may enhance
            performance)
        check_finite (bool): Whether to check that the input matrices contain
            only finite numbers. Disabling may give a performance gain, but may
            result in problems (crashes, non-termination) if the inputs do
            contain infinities or NaNs.

    Returns:
        tuple:
            ``(lu, piv)`` where ``lu`` is a :class:`cupy.ndarray`
            storing ``U`` in its upper triangle, and ``L`` without
            unit diagonal elements in its lower triangle, and ``piv`` is
            a :class:`cupy.ndarray` storing pivot indices representing
            permutation matrix ``P``. For ``0 <= i < min(M,N)``, row
            ``i`` of the matrix was interchanged with row ``piv[i]``

    .. seealso:: :func:`scipy.linalg.lu_factor`
    )
_lu_factor)aoverwrite_acheck_finite r   Z/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/cupyx/scipy/linalg/_decomp_lu.pyr      s   luc                 C   s   t | ||\}}|j\}}t||}t|\}	}
|r(t|	d|d |d |	|
fS |jjdv r1tjntj	}t
t
j|f|d}t|d|d |d ||	|
fS )a  LU decomposition.

    Decomposes a given two-dimensional matrix into ``P @ L @ U``, where ``P``
    is a permutation matrix, ``L`` is a lower triangular or trapezoidal matrix
    with unit diagonal, and ``U`` is a upper triangular or trapezoidal matrix.

    Args:
        a (cupy.ndarray): The input matrix with dimension ``(M, N)``.
        permute_l (bool): If ``True``, perform the multiplication ``P @ L``.
        overwrite_a (bool): Allow overwriting data in ``a`` (may enhance
            performance)
        check_finite (bool): Whether to check that the input matrices contain
            only finite numbers. Disabling may give a performance gain, but may
            result in problems (crashes, non-termination) if the inputs do
            contain infinities or NaNs.

    Returns:
        tuple:
            ``(P, L, U)`` if ``permute_l == False``, otherwise ``(PL, U)``.
            ``P`` is a :class:`cupy.ndarray` storing permutation matrix with
            dimension ``(M, M)``. ``L`` is a :class:`cupy.ndarray` storing
            lower triangular or trapezoidal matrix with unit diagonal with
            dimension ``(M, K)`` where ``K = min(M, N)``. ``U`` is a
            :class:`cupy.ndarray` storing upper triangular or trapezoidal
            matrix with dimension ``(K, N)``. ``PL`` is a :class:`cupy.ndarray`
            storing permuted ``L`` matrix with dimension ``(M, K)``.

    .. seealso:: :func:`scipy.linalg.lu`
    r      fFdtype)r	   shapemin_cupy_split_lu_cupy_laswpr   charnumpyfloat32float64cupydiagones)r
   	permute_lr   r   r   pivmnkLUr_dtypePr   r   r   r   ,   s   


c              	   C   s  ddl m} t| } t|  | j}|jdkr|j}|j	}n*|jdkr+|j
}|j}n|jdkr7|j}|j}n|jdkrC|j}|j}nd}t|| j|d| d} |re| jjdkret|  setd	t }tjd
tjd}	| j\}
}tjt|
|ftjd}|||
|| jj|
}tj||d}|||
|| jj|
|jj|jj|	jj t j!s|	d dk rtd|	d   |	d dkrt"d|	d  t#dd |d
8 }| |fS )Nr   cusolverfdFD>Only float32, float64, complex64 and complex128 are supported.ordercopy#array must not contain infs or NaNsr   r   z=illegal value in %d-th argument of internal getrf (lu_factor)z4Diagonal number %d is exactly zero. Singular matrix.   )
stacklevel)$cupy_backends.cuda.libsr*   r   asarrayr   
_assert_2dr   r   sgetrfsgetrf_bufferSizedgetrfdgetrf_bufferSizecgetrfcgetrf_bufferSizezgetrfzgetrf_bufferSizeNotImplementedErrorastypekindisfiniteall
ValueErrorr   get_cusolver_handleemptyr   int32r   r   intcdataptrr   is_hipr   RuntimeWarning)r
   r   r   r*   r   getrfgetrf_bufferSizemsgcusolver_handledev_infor"   r#   ipiv
buffersize	workspacer   r   r   r	   [   sZ   






r	   Cc              
   C   s   | j sJ | j\}}t||}|dkrdnd}tj||f|| jd}tj||f|| jd}|| }t| ||||j|||d ||fS )Nr-   rW   )r1   r   size)_f_contiguousr   r   r   rH   r   _kernel_cupy_split_lu_c_contiguous)LUr1   r"   r#   r$   r%   r&   rY   r   r   r   r      s   


r   z
__device__ inline int get_index(int row, int col, int num_rows, int num_cols,
                                bool c_contiguous)
{
    if (c_contiguous) {
        return col + num_cols * row;
    } else {
        return row + num_rows * col;
    }
}
z6raw T LU, int32 M, int32 N, int32 K, bool C_CONTIGUOUSzraw T L, raw T Ua$  
    // LU: shape: (M, N)
    // L: shape: (M, K)
    // U: shape: (K, N)
    const T* ptr_LU = &(LU[0]);
    T* ptr_L = &(L[0]);
    T* ptr_U = &(U[0]);
    int row, col;
    if (C_CONTIGUOUS) {
        row = i / N;
        col = i % N;
    } else {
        row = i % M;
        col = i / M;
    }
    T lu_val = ptr_LU[get_index(row, col, M, N, false)];
    T l_val, u_val;
    if (row > col) {
        l_val = lu_val;
        u_val = static_cast<T>(0);
    } else if (row == col) {
        l_val = static_cast<T>(1);
        u_val = lu_val;
    } else {
        l_val = static_cast<T>(0);
        u_val = lu_val;
    }
    if (col < K) {
        ptr_L[get_index(row, col, M, K, C_CONTIGUOUS)] = l_val;
    }
    if (row < K) {
        ptr_U[get_index(row, col, K, N, C_CONTIGUOUS)] = u_val;
    }
    cupyx_scipy_linalg_split_lu)preamblec                 C   s`   | j \}}|j d }d|kr||kr||k sJ | js | js J t||||||| j| |d	 d S )Nr   rX   )r   r\   rZ   _kernel_cupy_laswp)Ak1k2rT   incxr"   r#   r$   r   r   r   r      s
   

 r   zOint32 M, int32 N, int32 K1, int32 K2, raw I IPIV, int32 INCX, bool C_CONTIGUOUSzraw T Aa  
    // IPIV: 0-based pivot indices. shape: (K,)  (*) K > K2
    // A: shape: (M, N)
    T* ptr_A = &(A[0]);
    if (K1 > K2) return;
    int row_start, row_end, row_inc;
    if (INCX > 0) {
        row_start = K1; row_end = K2; row_inc = 1;
    } else if (INCX < 0) {
        row_start = K2; row_end = K1; row_inc = -1;
    } else {
        return;
    }
    int col = i;
    int row1 = row_start;
    while (1) {
        int row2 = IPIV[row1];
        if (row1 != row2) {
            int idx1 = get_index(row1, col, M, N, C_CONTIGUOUS);
            int idx2 = get_index(row2, col, M, N, C_CONTIGUOUS);
            T tmp       = ptr_A[idx1];
            ptr_A[idx1] = ptr_A[idx2];
            ptr_A[idx2] = tmp;
        }
        if (row1 == row_end) break;
        row1 += row_inc;
    }
    cupyx_scipy_linalg_laswplu_solvec                 C   s  ddl m} | \}}t| t| t| |jd }||jd kr)td|j}	|	j	dkr5|j
}
n!|	j	dkr>|j}
n|	j	dkrG|j}
n|	j	dkrP|j}
nd}t||dkr^tj}n|d	krftj}n|d
krntj}ntd|j|	ddd}|j|jddd}|d	7 }|j|	d| d}|r|jjdkrt| std|jjdkrt| std|jd	krd	n|jd	 }t }tjd	tjd}|
|||||jj||jj|jj||jj
 t j!s|d dk rtd|d   |S )a9  Solve an equation system, ``a * x = b``, given the LU factorization of ``a``

    Args:
        lu_and_piv (tuple): LU factorization of matrix ``a`` (``(M, M)``)
            together with pivot indices.
        b (cupy.ndarray): The matrix with dimension ``(M,)`` or
            ``(M, N)``.
        trans ({0, 1, 2}): Type of system to solve:

            ========  =========
            trans     system
            ========  =========
            0         a x  = b
            1         a^T x = b
            2         a^H x = b
            ========  =========
        overwrite_b (bool): Allow overwriting data in b (may enhance
            performance)
        check_finite (bool): Whether to check that the input matrices contain
            only finite numbers. Disabling may give a performance gain, but may
            result in problems (crashes, non-termination) if the inputs do
            contain infinities or NaNs.

    Returns:
        cupy.ndarray:
            The matrix with dimension ``(M,)`` or ``(M, N)``.

    .. seealso:: :func:`scipy.linalg.lu_solve`
    r   r)   zincompatible dimensions.r+   r,   r-   r.   r/   r   r4   zunknown transFr0   Tzarray must not contain infs or NaNs.
Note that when a singular matrix is given, unlike scipy.linalg.lu_factor, cupyx.scipy.linalg.lu_factor returns an array containing NaN.r3   r   z<illegal value in %d-th argument of internal getrs (lu_solve))"r6   r*   r   _assert_cupy_arrayr8   _assert_stacked_squarer   rF   r   r   sgetrsdgetrscgetrszgetrsrA   r   CUBLAS_OP_NCUBLAS_OP_TCUBLAS_OP_CrB   rC   r   rD   rE   ndimr   rG   rH   r   rI   rK   rL   r   rM   )
lu_and_pivbtransoverwrite_br   r*   r   rT   r"   r   getrsrQ   r#   rR   rS   r   r   r   rf     sh   







)FT)FFT)rW   )r   FT)warningsr   r   r   	cupy.cudar   r   r   cupy.linalgr   cupyx.scipy.linalgr   
implementsr   r   r	   r   _device_get_indexElementwiseKernelr[   r   r`   rf   r   r   r   r   <module>   s<    
.
;")$