o
    i                     @   s  d dl Z d dlZd dlZzd dlZdZW n ey   dZY nw d dlZd dlm	Z	 d dl
mZ d dl
mZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ G dd dejZdd Zd>ddZdd Zdd ZdZdZejjdddd Zejjdddd Zdd Zejjdddd  Zejjddd!d" Z d#Z!d$Z"d%Z#d&Z$d'Z%d(Z&d)Z'd*Z(d+d, Z)ejjddd?d.d/Z*ejjddd0d1 Z+d2d3 Z,ejjddd4d5 Z-d6d7 Z.ejjddd8d9 Z/ejjddd:d; Z0ejjddd<d= Z1dS )@    NTF)_accelerator)cub)runtime)_base)_compressed)_csc)SparseEfficiencyWarning)_utilc                   @   s|  e Zd ZdZdZdZddZdd Zdd	 Zd
d Zdd Z	dd Z
dd Zdd Zdd Zdd Zdd Zdd Zdd Zdd Zd d! Zd"d# Zd[d%d&Zd'd( Zd)d* Zd+d, Zd-d. Zd/d0 Zd[d1d2Zd3d4 Zd\d5d6Zd]d8d9Zd^d:d;Zd^d<d=Z d^d>d?Z!d@dA Z"d^dBdCZ#d^dDdEZ$d^dFdGZ%d]dHdIZ&dJdK Z'dLdM Z(dNdO Z)dPdQ Z*dRdS Z+dTdU Z,dVdW Z-dXdY Z.dS )_
csr_matrixa  Compressed Sparse Row matrix.

    This can be instantiated in several ways.

    ``csr_matrix(D)``
        ``D`` is a rank-2 :class:`cupy.ndarray`.
    ``csr_matrix(S)``
        ``S`` is another sparse matrix. It is equivalent to ``S.tocsr()``.
    ``csr_matrix((M, N), [dtype])``
        It constructs an empty matrix whose shape is ``(M, N)``. Default dtype
        is float64.
    ``csr_matrix((data, (row, col)))``
        All ``data``, ``row`` and ``col`` are one-dimenaional
        :class:`cupy.ndarray`.
    ``csr_matrix((data, indices, indptr))``
        All ``data``, ``indices`` and ``indptr`` are one-dimenaional
        :class:`cupy.ndarray`.

    Args:
        arg1: Arguments for the initializer.
        shape (tuple): Shape of a matrix. Its length must be two.
        dtype: Data type. It must be an argument of :class:`numpy.dtype`.
        copy (bool): If ``True``, copies of given arrays are always used.

    .. seealso::
        :class:`scipy.sparse.csr_matrix`

    csrNc                 C   sH   t std| j|}| j|}| j|}tjj|||f| j	dS )a:  Returns a copy of the array on host memory.

        Args:
            stream (cupy.cuda.Stream): CUDA stream object. If it is given, the
                copy runs asynchronously. Otherwise, the copy is synchronous.

        Returns:
            scipy.sparse.csr_matrix: Copy of the array on host memory.

        zscipy is not availableshape)
_scipy_availableRuntimeErrordatagetindicesindptrscipysparser
   _shape)selfstreamr   r   r    r   T/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/cupyx/scipy/sparse/_csr.pyr   8   s   zcsr_matrix.getc                 C   s   t |}|j|j|jfS N)	dense2csrr   r   r   )r   xmr   r   r   _convert_denseK   s   zcsr_matrix._convert_densec                 C   s   ||fS r   r   )r   r   yr   r   r   _swapO   s   zcsr_matrix._swapc                 C   sZ   ddl m} |   | }|  |dr|j}n|dr$|j}nt|| |||S )Nr   cusparsecsrgeam2csrgeam)cupyxr#   sum_duplicatestocsrcheck_availabilityr$   r%   NotImplementedError)r   otheralphabetar#   r%   r   r   r   _add_sparseR   s   

zcsr_matrix._add_sparsec           
      C   s8  t |rMtj|| jdd}t|d r.|dkr&ttj	| j
tjdS t| j
tjdS tjdtjd}tjdtjd}t|||fdd}t| ||S t |rY||  |S t|r|   |  |d	v rot| ||S td
t |dkr|d}n|dkrd}n|dkrd}t| ||}t| }	t|	S t)Ndtype   r   _ne_)r1      )r1   r1   r   )r2   _lt__gt_z]Comparing sparse matrices using ==, <=, and >= is inefficient, try using !=, <, or > instead._eq__le_r5   _ge_r4   )r	   isscalarlikecupyasarrayr0   reshapenumpyisnanr
   onesr   bool_zerosint32arange
binopt_csrisdensetodenseisspmatrix_csrr'   warningswarnr   logical_nottoarrayr*   )
r   r+   opop_namer   r   r   opposite_op_nameresoutr   r   r   _comparison`   s>   

zcsr_matrix._comparisonc                 C      |  |tjdS )Nr6   )rQ   operatoreqr   r+   r   r   r   __eq__      zcsr_matrix.__eq__c                 C   rR   )Nr2   )rQ   rS   nerU   r   r   r   __ne__   rW   zcsr_matrix.__ne__c                 C   rR   )Nr4   )rQ   rS   ltrU   r   r   r   __lt__   rW   zcsr_matrix.__lt__c                 C   rR   )Nr5   )rQ   rS   gtrU   r   r   r   __gt__   rW   zcsr_matrix.__gt__c                 C   rR   )Nr7   )rQ   rS   lerU   r   r   r   __le__   rW   zcsr_matrix.__le__c                 C   rR   )Nr8   )rQ   rS   gerU   r   r   r   __ge__   rW   zcsr_matrix.__ge__c              
   C   s  ddl m} t|r|   | | j| S t|rF|   |  |dr.|	| |S |dr9|
| |S |drD|| |S tt|r|   |  |drdtjsd|j| |jddS |drw| }|  |	| |S |dr| }|  |
| |S tt|r| |  S t|r^|jdkr|   | | j| S |jdkr2|   t|}| jjjj| jj| jjj k}|t d	k M }t  D ]'}|tj!krtjs|r|j"j#rt$| j%d | j%d | j&| j| j| j'|  S q|d
r| j&dkr|(| |r|j)}n|dr!|j*}n|dr+|j+}nt|| |S |jdkrZ|   |drF|j,}n|drP|j-}nt|| t|S t.dt/S )Nr   r"   spgemmcsrgemm2csrgemmT)transbr1   i*  csrmvExcsrmvspmvr3   csrmm2spmmzcould not interpret dimensions)0r&   r#   r:   isscalarr'   
_with_datar   rG   r)   rb   rc   rd   AssertionErrorr   isspmatrix_cscr   is_hipTr(   r   
isspmatrixrE   ndimasfortranarrayr   memsizer0   itemsizer   _get_cuda_build_versionr   get_routine_acceleratorsACCELERATOR_CUBflagsc_contiguousdevice_csrmvr   nnzr   csrmvExIsAlignedrf   rg   rh   ri   rj   
ValueErrorNotImplemented)r   r+   r#   bis_cub_safeacceleratorrg   csrmmr   r   r   __mul__   s   













zcsr_matrix.__mul__c                 C      t r   r*   rU   r   r   r   __div__      zcsr_matrix.__div__c                 C   r   r   r   rU   r   r   r   __rdiv__   r   zcsr_matrix.__rdiv__c                 C   s  t |r"| j}|tjkrtj}t||}tj||d}t	| |S t 
|rPt|}t|| j}t| j|j |  }t |j|j|j|jd ||_|S t|rt| j|jdd t| j|j}|jdvrrttj|}|  j|dd}||  S tS )z7Point-wise division by another matrix, vector or scalarr/   r1   F)allow_broadcastingFDcopy)r	   r9   r0   r=   float32float64r:   result_type
reciprocalmultiply_by_scalarrE   
atleast_2dbroadcast_tor   check_shape_for_pointwise_optocoo_cupy_divide_by_denser   rowcolr   rq   promote_typescharrF   astyper   )r   r+   r0   dret
self_denser   r   r   __truediv__   s4   







zcsr_matrix.__truediv__c                 C   s   t S r   )r   rU   r   r   r   __rtruediv__  r   zcsr_matrix.__rtruediv__r   c                 C   sx   | j \}}t|t|d |t|d }|dkr tjd| jdS |   tj|| jd}t |||| j| j	| j
| |S )Nr   r/   )r   minmaxr:   emptyr0   r'   _cupy_csr_diagonalr   r   r   )r   krowscolsylenr    r   r   r   diagonal  s   
zcsr_matrix.diagonalc                 C   s4   ddl m} || d}|j| _|j| _|j| _dS )zRemoves zero entories in place.r   r"   N)r&   r#   csr2csr_compressr   r   r   )r   r#   compressr   r   r   eliminate_zeros  s
   zcsr_matrix.eliminate_zerosc                 C   s   t |rUtj|| jd}||r=| j}|tjkrtj}n|tjkr%tj	}t
||}|j|dd}||  |}t|S |   || j|}t|| j| jf| j| jdS t |rj|   t|}||  |S t|r||   |  t| ||S t)Nr/   Fr   )r   r0   )r	   r9   r:   r;   r0   r=   r   r   	complex64
complex128r   r   rF   r
   r'   r   r   r   r   rE   r   rG   rD   r*   )r   r+   cupy_oprM   dense_checkr0   	new_arraynew_datar   r   r   _maximum_minimum(  s4   




zcsr_matrix._maximum_minimumc                 C      |  |tjddd S )N	_maximum_c                 S   s   | dkS Nr   r   r   r   r   r   <lambda>I      z$csr_matrix.maximum.<locals>.<lambda>)r   r:   maximumrU   r   r   r   r   G     zcsr_matrix.maximumc                 C   r   )N	_minimum_c                 S   s   | dk S r   r   r   r   r   r   r   M  r   z$csr_matrix.minimum.<locals>.<lambda>)r   r:   minimumrU   r   r   r   r   K  r   zcsr_matrix.minimumc                 C   sh   t |r
t| |S t|r|   t |}t| |S t|r.|   |  t	| |S d}t
|)z=Point-wise multiplication by another matrix, vector or scalarz2expected scalar, dense matrix/vector or csr matrix)r:   rk   r   r	   rE   r'   r   multiply_by_denserG   multiply_by_csr	TypeError)r   r+   msgr   r   r   multiplyO  s   





zcsr_matrix.multiplyc                 C   s2  | j \}}td| td|}}t|| || }|dkr"td|| j}|jdkr8tj|f|| jd}nt||j	}|d| }tj
||| dd}	tj|d fdd}
tj
|d dd|
||| d < ||
|| d d< || j|dd| 8 }| t||	|
f| j d }|j| _|j| _|j| _dS )	z3Set diagonal or off-diagonal elements of the array.r   zk exceeds matrix dimensionsr/   Nir1   )r   r   )r   r   r   r   r   r0   rr   r:   fullru   rC   rA   r   r
   r   r   r   )r   valuesr   r   r   row_stcol_stx_lenx_data	x_indicesx_indptrr    r   r   r   setdiaga  s&   

"zcsr_matrix.setdiagc                 C   s*   ddl m} | js||  d| _dS dS )zSorts the indices of this matrix *in place*.

        .. warning::
            Calling this function might synchronize the device.

        r   r"   TN)r&   r#   has_sorted_indicescsrsort)r   r#   r   r   r   sort_indicesy  s
   

zcsr_matrix.sort_indicesc                 C   s   ddl m} |du rdn| }| jdkrtj| j| j|dS | jjdvr*t	| |S | 
 }d|_|  |drZtjrB|jdkrZ||}|d	krM|S |dkrVt|S td
|dkre||jjS |d	krn|	|S td
)a  Returns a dense matrix representing the same value.

        Args:
            order ({'C', 'F', None}): Whether to store data in C (row-major)
                order or F (column-major) order. Default is C-order.
            out: Not supported.

        Returns:
            cupy.ndarray: Dense array representing the same matrix.

        .. seealso:: :meth:`scipy.sparse.csr_matrix.toarray`

        r   r"   NC)r   r0   orderfdFDFsparseToDenseFzorder not understood)r&   r#   upperr}   r:   rA   r   r0   r   	csr2denser   has_canonical_formatr'   r)   r   ro   r   ascontiguousarrayr   	csc2denserp   )r   r   rP   r#   r   r    r   r   r   rK     s0   






zcsr_matrix.toarrayFc                 C   r   r   r   )r   	blocksizer   r   r   r   tobsr     zcsr_matrix.tobsrc                 C   s@   ddl m} |r| j }| j }n| j}| j}|| ||S )zConverts the matrix to COOrdinate format.

        Args:
            copy (bool): If ``False``, it shares data arrays as much as
                possible.

        Returns:
            cupyx.scipy.sparse.coo_matrix: Converted matrix.

        r   r"   )r&   r#   r   r   r   csr2coo)r   r   r#   r   r   r   r   r   r     s   
zcsr_matrix.tocooc                 C   s@   ddl m} |dr|j}|| S |dr|j}|| S t)a{  Converts the matrix to Compressed Sparse Column format.

        Args:
            copy (bool): If ``False``, it shares data arrays as much as
                possible. Actually this option is ignored because all
                arrays in a matrix cannot be shared in csr to csc conversion.

        Returns:
            cupyx.scipy.sparse.csc_matrix: Converted matrix.

        r   r"   csr2csc
csr2cscEx2)r&   r#   r)   r   r   r*   )r   r   r#   r   r   r   r   tocsc  s   

zcsr_matrix.tocscc                 C   s   |r|   S | S )a  Converts the matrix to Compressed Sparse Row format.

        Args:
            copy (bool): If ``False``, the method returns itself.
                Otherwise it makes a copy of the matrix.

        Returns:
            cupyx.scipy.sparse.csr_matrix: Converted matrix.

        r   r   r   r   r   r   r(     s   zcsr_matrix.tocsrc                 C   s   |   S )zInverts the format.
        )r   )r   r   r   r   _tocsx  s   zcsr_matrix._tocsxc                 C   r   r   r   r   r   r   r   todia  r   zcsr_matrix.todiac                 C   r   r   r   r   r   r   r   todok  r   zcsr_matrix.todokc                 C   r   r   r   r   r   r   r   tolil  r   zcsr_matrix.tolilc                 C   sL   |durt d| jd | jd f}tj| j| j| jf||d}| j|_|S )aT  Returns a transpose matrix.

        Args:
            axes: This option is not supported.
            copy (bool): If ``True``, a returned matrix shares no data.
                Otherwise, it shared data arrays as much as possible.

        Returns:
            cupyx.scipy.sparse.csc_matrix: `self` with the dimensions reversed.

        NzoSparse matrices do not support an 'axes' parameter because swapping dimensions is the only logical permutation.r1   r   )r   r   )r   r   r   
csc_matrixr   r   r   r   )r   axesr   r   transr   r   r   	transpose  s   zcsr_matrix.transposec                 C      | j t||d ddS )zReturns a copy of row i of the matrix, as a (1 x n)
        CSR matrix (row vector).

        Args:
            i (integer): Row

        Returns:
            cupyx.scipy.sparse.csr_matrix: Sparse matrix with single row
        r1   Tr   )_major_sliceslicer   r   r   r   r   getrow     
zcsr_matrix.getrowc                 C   r   )zReturns a copy of column i of the matrix, as a (m x 1)
        CSR matrix (column vector).

        Args:
            i (integer): Column

        Returns:
            cupyx.scipy.sparse.csr_matrix: Sparse matrix with single column
        r1   Tr   )_minor_slicer   r   r   r   r   getcol*  r   zcsr_matrix.getcolc                 C   s   t ||d }| ||S Nr1   )r   r   _minor_index_fancyr   r   r   r   r   r   _get_intXarray6     zcsr_matrix._get_intXarrayc                 C   s"   t ||d }| |j|ddS )Nr1   Tr   )r   r   r   r   r   r   r   _get_intXslice:  s   zcsr_matrix._get_intXslicec                 C   s,   t ||d }|jdv }| |j||dS )Nr1   r1   Nr   )r   stepr   r   )r   r   r   r   r   r   r   _get_sliceXint>  s   
zcsr_matrix._get_sliceXintc                 C   s   |  ||S r   )r   r   r   r   r   r   _get_sliceXarrayC  rW   zcsr_matrix._get_sliceXarrayc                 C   s   t ||d }| ||S r   )r   _major_index_fancyr   r   r   r   r   _get_arrayXintF  r   zcsr_matrix._get_arrayXintc                 C   sP   |j dvr || jd \}}}t|||| jj}| ||S | ||S )Nr   r1   )	r   r   r   r:   rC   r0   _get_arrayXarrayr   r   )r   r   r   startstopr   r   r   r   r   _get_arrayXsliceJ  s
   
zcsr_matrix._get_arrayXslicer   )r   )NN)NF)F)/__name__
__module____qualname____doc__formatr   r   r!   r.   rQ   rV   rY   r[   r]   r_   ra   r   r   r   r   r   r   r   r   r   r   r   r   r   rK   r   r   r   r(   r   r   r   r   r   r   r   r   r   r   r   r   r  r   r   r   r   r
      sZ    
#Q#
	


.






r
   c                 C   s
   t | tS )zChecks if a given matrix is of CSR format.

    Returns:
        bool: Returns if ``x`` is :class:`cupyx.scipy.sparse.csr_matrix`.

    )
isinstancer
   r   r   r   r   rG   R  s   
rG   c                 C   st   |r0| \}}|\}}||ks|dks|dkst d||ks*|dks,|dks.t dd S d S d S | |kr8t dd S )Nr1   zinconsistent shape)r   )a_shapeb_shaper   a_ma_nb_mb_nr   r   r   r   \  s   r   c                 C   s2   | j | }| j }| j }t|||f| jdS )Nr   )r   r   r   r   r
   r   )spar   r   r   r   r   r   r   i  s   


r   c                 C   s   t | j|j | j\}}|j\}}t||t||}}| j||  ||  }t| j|j}	tj||	d}
tj|| j	jd}||krd||krUtj
d|d || jjd}ntj
d|d | j| jjd}n| j }||krq||9 }t | j| j| j	|||||||||
| t|
||f||fdS )Nr/   r   r1   r   )r   r   r   r}   r=   r   r0   r:   r   r   rC   r   r   cupy_multiply_by_denser   r
   )r  dnsp_msp_ndn_mdn_nr   nr}   r0   r   r   r   r   r   r   r   p  s&   


r   al  
__device__ inline int get_row_id(int i, int min, int max, const int *indptr) {
    int row = (min + max) / 2;
    while (min < max) {
        if (i < indptr[row]) {
            max = row - 1;
        } else if (i >= indptr[row + 1]) {
            min = row + 1;
        } else {
            break;
        }
        row = (min + max) / 2;
    }
    return row;
}
a  
__device__ inline int find_index_holding_col_in_row(
        int row, int col, const int *indptr, const int *indices) {
    int j_min = indptr[row];
    int j_max = indptr[row+1] - 1;
    while (j_min <= j_max) {
        int j = (j_min + j_max) / 2;
        int j_col = indices[j];
        if (j_col == col) {
            return j;
        } else if (j_col < col) {
            j_min = j + 1;
        } else {
            j_max = j - 1;
        }
    }
    return -1;
}
)for_each_devicec                   C      t jddddtdS )Nz
        raw S SP_DATA, raw I SP_INDPTR, raw I SP_INDICES,
        int32 SP_M, int32 SP_N,
        raw D DN_DATA, int32 DN_M, int32 DN_N,
        raw I OUT_INDPTR, int32 OUT_M, int32 OUT_N
        zO OUT_DATA, I OUT_INDICESa  
        int i_out = i;
        int m_out = get_row_id(i_out, 0, OUT_M - 1, &(OUT_INDPTR[0]));
        int i_sp = i_out;
        if (OUT_M > SP_M && SP_M == 1) {
            i_sp -= OUT_INDPTR[m_out];
        }
        if (OUT_N > SP_N && SP_N == 1) {
            i_sp /= OUT_N;
        }
        int n_out = SP_INDICES[i_sp];
        if (OUT_N > SP_N && SP_N == 1) {
            n_out = i_out - OUT_INDPTR[m_out];
        }
        int m_dn = m_out;
        if (OUT_M > DN_M && DN_M == 1) {
            m_dn = 0;
        }
        int n_dn = n_out;
        if (OUT_N > DN_N && DN_N == 1) {
            n_dn = 0;
        }
        OUT_DATA = (O)(SP_DATA[i_sp] * DN_DATA[n_dn + (DN_N * m_dn)]);
        OUT_INDICES = n_out;
        (cupyx_scipy_sparse_csr_multiply_by_densepreambler:   ElementwiseKernel_GET_ROW_ID_r   r   r   r   r    s   r  c                   C      t ddddS )Nz*T data, I row, I col, I width, raw T otherzT resz7
        res = data / other[row * width + col]
        #cupyx_scipy_sparse_coo_divide_denser:   r  r   r   r   r   r     s   r   c                 C   s  t | j|j | j\}}|j\}}t||t||}}| j||  ||  }|j||  ||  }	||	kr;t|| S |}
t| j|j}tj	|
|d}tj	|
| j
jd}||krz||krktjd|
d || jjd}ntjd|
d | j| jjd}n| j }||kr||9 }tj|
d | j
jd}tj|d | jjd}t | j| j| j
|||j|j|j
||||||||| tj|| jjd}tj|| jjd}t|d }tj	||d}tj	|| j
jd}t ||||| t|||f||fdS )Nr/   r   r1   r   )r   r   r   r}   r   r=   r   r0   r:   r   r   rC   r   r   rA   cupy_multiply_by_csr_step1r   cumsumintcupy_multiply_by_csr_step2r
   )r  r   r  r  r  r  r   r  a_nnzb_nnzc_nnzr0   c_data	c_indicesc_indptrrz   nnz_each_rowd_indptrd_nnzd_data	d_indicesr   r   r   r     sB   



r   c                   C   s   t jddddtt dS )Nz
        raw A A_DATA, raw I A_INDPTR, raw I A_INDICES, int32 A_M, int32 A_N,
        raw B B_DATA, raw I B_INDPTR, raw I B_INDICES, int32 B_M, int32 B_N,
        raw I C_INDPTR, int32 C_M, int32 C_N
        z6C C_DATA, I C_INDICES, raw I FLAGS, raw I NNZ_EACH_ROWaW  
        int i_c = i;
        int m_c = get_row_id(i_c, 0, C_M - 1, &(C_INDPTR[0]));

        int i_a = i;
        if (C_M > A_M && A_M == 1) {
            i_a -= C_INDPTR[m_c];
        }
        if (C_N > A_N && A_N == 1) {
            i_a /= C_N;
        }
        int n_c = A_INDICES[i_a];
        if (C_N > A_N && A_N == 1) {
            n_c = i % C_N;
        }
        int m_b = m_c;
        if (C_M > B_M && B_M == 1) {
            m_b = 0;
        }
        int n_b = n_c;
        if (C_N > B_N && B_N == 1) {
            n_b = 0;
        }
        int i_b = find_index_holding_col_in_row(m_b, n_b,
            &(B_INDPTR[0]), &(B_INDICES[0]));
        if (i_b >= 0) {
            atomicAdd(&(NNZ_EACH_ROW[m_c+1]), 1);
            FLAGS[i+1] = 1;
            C_DATA = (C)(A_DATA[i_a] * B_DATA[i_b]);
            C_INDICES = n_c;
        }
        ,cupyx_scipy_sparse_csr_multiply_by_csr_step1r  )r:   r  r   _FIND_INDEX_HOLDING_COL_IN_ROW_r   r   r   r   r%    s    r%  c                   C   r!  )Nz"T C_DATA, I C_INDICES, raw I FLAGSzraw D D_DATA, raw I D_INDICESz
        int j = FLAGS[i];
        if (j < FLAGS[i+1]) {
            D_DATA[j] = (D)(C_DATA);
            D_INDICES[j] = C_INDICES;
        }
        ,cupyx_scipy_sparse_csr_multiply_by_csr_step2r#  r   r   r   r   r(  =  s   r(  zH
__device__ inline O binopt(T in1, T in2) {
    return max(in1, in2);
}
zH
__device__ inline O binopt(T in1, T in2) {
    return min(in1, in2);
}
zG
__device__ inline O binopt(T in1, T in2) {
    return (in1 == in2);
}
zG
__device__ inline O binopt(T in1, T in2) {
    return (in1 != in2);
}
zF
__device__ inline O binopt(T in1, T in2) {
    return (in1 < in2);
}
zF
__device__ inline O binopt(T in1, T in2) {
    return (in1 > in2);
}
zG
__device__ inline O binopt(T in1, T in2) {
    return (in1 <= in2);
}
zG
__device__ inline O binopt(T in1, T in2) {
    return (in1 >= in2);
}
c                 C   s  t | j|j | j\}}|j\}}t||t||}}| j||  ||  }	|j||  ||  }
tj|	d | jjd}tj|
d |jjd}tj|	tj	d}tj|
tj	d}tj|d | j
jd}t| j|j}| jj|dd}|jj|dd}t}|dkr|t7 }|}nZ|dkr|t7 }|}nO|dkr|t7 }tj}nC|dkr|t7 }tj}n7|d	kr|t7 }tj}n+|d
kr|t7 }tj}n|dkr|t7 }tj}n|dkr|t7 }tj}ntd|tj|	|d}tj|
|d}tj|	| jjd}tj|
|jjd}|	|
 }t||d||| j
| j|||| j|	|j
|j||||j|
||||||||||d tj||jd}tj||jd}tj||jd}t|d }tj|| jjd}tj||d}t||||||	|||||
|||d t|||f||fdS )Nr1   r/   Fr   r   r   r6   r2   r4   r5   r7   r8   zinvalid op_name: {}r  )ru   r$  r   ) r   r   r   r}   r:   rA   r   r0   r=   int8r   r   r   r   r   _BINOPT_MAX__BINOPT_MIN__BINOPT_EQ_r@   _BINOPT_NE__BINOPT_LT__BINOPT_GT__BINOPT_LE__BINOPT_GE_r   r  r   cupy_binopt_csr_step1r&  r'  cupy_binopt_csr_step2r
   )r  r   rM   r  r  r  r  r   r  r)  r*  a_infob_infoa_validb_validr.  in_dtypea_datab_datafuncs	out_dtype
a_tmp_data
b_tmp_dataa_tmp_indicesb_tmp_indices_sizer+  r-  r,  r   r   r   rD   w  s   




rD    c                 C   s    d|  d }t jddd||dS )Ncupyx_scipy_sparse_csr_binopt_step1a  
        int32 M, int32 N,
        raw I A_INDPTR, raw I A_INDICES, raw T A_DATA,
        int32 A_M, int32 A_N, int32 A_NNZ_ACT, int32 A_NNZ,
        raw I B_INDPTR, raw I B_INDICES, raw T B_DATA,
        int32 B_M, int32 B_N, int32 B_NNZ_ACT, int32 B_NNZ
        z
        raw I A_INFO, raw B A_VALID, raw I A_TMP_INDICES, raw O A_TMP_DATA,
        raw I B_INFO, raw B B_VALID, raw I B_TMP_INDICES, raw O B_TMP_DATA,
        raw I C_INFO
        ab  
        if (i >= A_NNZ + B_NNZ) return;

        const int *MY_INDPTR, *MY_INDICES;  int *MY_INFO;  const T *MY_DATA;
        const int *OP_INDPTR, *OP_INDICES;  int *OP_INFO;  const T *OP_DATA;
        int MY_M, MY_N, MY_NNZ_ACT, MY_NNZ;
        int OP_M, OP_N, OP_NNZ_ACT, OP_NNZ;
        signed char *MY_VALID;  I *MY_TMP_INDICES;  O *MY_TMP_DATA;

        int my_j;
        if (i < A_NNZ) {
            // in charge of one of non-zero element of sparse matrix A
            my_j = i;
            MY_INDPTR  = &(A_INDPTR[0]);   OP_INDPTR  = &(B_INDPTR[0]);
            MY_INDICES = &(A_INDICES[0]);  OP_INDICES = &(B_INDICES[0]);
            MY_INFO    = &(A_INFO[0]);     OP_INFO    = &(B_INFO[0]);
            MY_DATA    = &(A_DATA[0]);     OP_DATA    = &(B_DATA[0]);
            MY_M       = A_M;              OP_M       = B_M;
            MY_N       = A_N;              OP_N       = B_N;
            MY_NNZ_ACT = A_NNZ_ACT;        OP_NNZ_ACT = B_NNZ_ACT;
            MY_NNZ     = A_NNZ;            OP_NNZ     = B_NNZ;
            MY_VALID   = &(A_VALID[0]);
            MY_TMP_DATA= &(A_TMP_DATA[0]);
            MY_TMP_INDICES = &(A_TMP_INDICES[0]);
        } else {
            // in charge of one of non-zero element of sparse matrix B
            my_j = i - A_NNZ;
            MY_INDPTR  = &(B_INDPTR[0]);   OP_INDPTR  = &(A_INDPTR[0]);
            MY_INDICES = &(B_INDICES[0]);  OP_INDICES = &(A_INDICES[0]);
            MY_INFO    = &(B_INFO[0]);     OP_INFO    = &(A_INFO[0]);
            MY_DATA    = &(B_DATA[0]);     OP_DATA    = &(A_DATA[0]);
            MY_M       = B_M;              OP_M       = A_M;
            MY_N       = B_N;              OP_N       = A_N;
            MY_NNZ_ACT = B_NNZ_ACT;        OP_NNZ_ACT = A_NNZ_ACT;
            MY_NNZ     = B_NNZ;            OP_NNZ     = A_NNZ;
            MY_VALID   = &(B_VALID[0]);
            MY_TMP_DATA= &(B_TMP_DATA[0]);
            MY_TMP_INDICES = &(B_TMP_INDICES[0]);
        }
        int _min, _max, _mid;

        // get column location
        int my_col;
        int my_j_act = my_j;
        if (MY_M == 1 && MY_M < M) {
            if (MY_N == 1 && MY_N < N) my_j_act = 0;
            else                       my_j_act = my_j % MY_NNZ_ACT;
        } else {
            if (MY_N == 1 && MY_N < N) my_j_act = my_j / N;
        }
        my_col = MY_INDICES[my_j_act];
        if (MY_N == 1 && MY_N < N) {
            my_col = my_j % N;
        }

        // get row location
        int my_row = get_row_id(my_j_act, 0, MY_M - 1, &(MY_INDPTR[0]));
        if (MY_M == 1 && MY_M < M) {
            if (MY_N == 1 && MY_N < N) my_row = my_j / N;
            else                       my_row = my_j / MY_NNZ_ACT;
        }

        int op_row = my_row;
        int op_row_act = op_row;
        if (OP_M == 1 && OP_M < M) {
            op_row_act = 0;
        }

        int op_col = 0;
        _min = OP_INDPTR[op_row_act];
        _max = OP_INDPTR[op_row_act + 1] - 1;
        int op_j_act = _min;
        bool op_nz = false;
        if (_min <= _max) {
            if (OP_N == 1 && OP_N < N) {
                op_col = my_col;
                op_nz = true;
            }
            else {
                _mid = (_min + _max) / 2;
                op_col = OP_INDICES[_mid];
                while (_min < _max) {
                    if (op_col < my_col) {
                        _min = _mid + 1;
                    } else if (op_col > my_col) {
                        _max = _mid;
                    } else {
                        break;
                    }
                    _mid = (_min + _max) / 2;
                    op_col = OP_INDICES[_mid];
                }
                op_j_act = _mid;
                if (op_col == my_col) {
                    op_nz = true;
                } else if (op_col < my_col) {
                    op_col = N;
                    op_j_act += 1;
                }
            }
        }

        int op_j = op_j_act;
        if (OP_M == 1 && OP_M < M) {
            if (OP_N == 1 && OP_N < N) {
                op_j = (op_col + N * op_row) * OP_NNZ_ACT;
            } else {
                op_j = op_j_act + OP_NNZ_ACT * op_row;
            }
        } else {
            if (OP_N == 1 && OP_N < N) {
                op_j = op_col + N * op_j_act;
            }
        }

        if (i < A_NNZ || !op_nz) {
            T my_data = MY_DATA[my_j_act];
            T op_data = 0;
            if (op_nz) op_data = OP_DATA[op_j_act];
            O out;
            if (i < A_NNZ) out = binopt(my_data, op_data);
            else           out = binopt(op_data, my_data);
            if (out != static_cast<O>(0)) {
                MY_VALID[my_j] = 1;
                MY_TMP_DATA[my_j] = out;
                MY_TMP_INDICES[my_j] = my_col;
                atomicAdd( &(C_INFO[my_row + 1]), 1 );
                atomicAdd( &(MY_INFO[my_j + 1]), 1 );
                atomicAdd( &(OP_INFO[op_j]), 1 );
            }
        }
        r  r#  )rM   r  namer   r   r   r@    s     r@  c                 C   s   d|  d }t ddd|S )Ncupyx_scipy_sparse_csr_binoptstep2z
        raw I A_INFO, raw B A_VALID, raw I A_TMP_INDICES, raw O A_TMP_DATA,
        int32 A_NNZ,
        raw I B_INFO, raw B B_VALID, raw I B_TMP_INDICES, raw O B_TMP_DATA,
        int32 B_NNZ
        zraw I C_INDICES, raw O C_DATAa  
        if (i < A_NNZ) {
            int j = i;
            if (A_VALID[j]) {
                C_INDICES[A_INFO[j]] = A_TMP_INDICES[j];
                C_DATA[A_INFO[j]]    = A_TMP_DATA[j];
            }
        } else if (i < A_NNZ + B_NNZ) {
            int j = i - A_NNZ;
            if (B_VALID[j]) {
                C_INDICES[B_INFO[j]] = B_TMP_INDICES[j];
                C_DATA[B_INFO[j]]    = B_TMP_DATA[j];
            }
        }
        r#  )rM   rS  r   r   r   rA  S  s   rA  c                 C   sJ   t j| j| j|d}| j\}}t| j}|||| j| j| j|dk| |S )N)r0   r   r   )r:   rA   r   r0   _cupy_csr2denser   r   r   )r  r   rP   r   r  kernr   r   r   r   q  s
   

r   c                 C   s*   | dkrd}nd}t jddd| dtdS )	N?zif (DATA) OUT[index] = true;zatomicAdd(&OUT[index], DATA);z?int32 M, int32 N, raw I INDPTR, I INDICES, T DATA, bool C_ORDERz	raw T OUTz
        int row = get_row_id(i, 0, M - 1, &(INDPTR[0]));
        int col = INDICES;
        int index = C_ORDER ? col + N * row : row + M * col;
        cupyx_scipy_sparse_csr2denser  r  )r0   rL   r   r   r   rV  y  s   rV  c           	      C   s  ddl m} | jjdv r|dr|j| ddS || S | j\}}t	| } tj
|d tjd}tj
|| d tjd}t ||| || tj|tjd}tj|tjd}t|d	 }tj|tjd}tj|| jd}t ||| ||| t|||f||fd
S )Nr   r"   r   denseToSparser   )r  r1   r/   r$  r   )r&   r#   r0   r   r)   rZ  r   r   r:   r   rA   r=   rB   cupy_dense2csr_step1r&  r'  r   cupy_dense2csr_step2r
   )	r  r#   r   r  r   infor}   r   r   r   r   r   r     s"   



r   c                   C   r!  )Nzint32 M, int32 N, T Azraw I INDPTR, raw I INFOz
        int row = i / N;
        int col = i % N;
        if (A != static_cast<T>(0)) {
            atomicAdd( &(INDPTR[row + 1]), 1 );
            INFO[i + 1] = 1;
        }
        "cupyx_scipy_sparse_dense2csr_step1r#  r   r   r   r   r[    s   r[  c                   C   r!  )Nz!int32 M, int32 N, T A, raw I INFOzraw I INDICES, raw T DATAz
        int row = i / N;
        int col = i % N;
        if (A != static_cast<T>(0)) {
            int idx = INFO[i];
            INDICES[idx] = col;
            DATA[idx] = A;
        }
        "cupyx_scipy_sparse_dense2csr_step2r#  r   r   r   r   r\    s   	r\  c                   C   r  )NzHint32 k, int32 rows, int32 cols, raw T data, raw I indptr, raw I indiceszT yal  
        int row = i;
        int col = i;
        if (k < 0) row -= k;
        if (k > 0) col += k;
        if (row >= rows || col >= cols) return;
        int j = find_index_holding_col_in_row(row, col,
            &(indptr[0]), &(indices[0]));
        if (j >= 0) {
            y = data[j];
        } else {
            y = static_cast<T>(0);
        }
        cupyx_scipy_sparse_csr_diagonalr  )r:   r  r5  r   r   r   r   r     s   r   )T)rP  )2rS   rH   r=   scipy.sparser   r   ImportErrorr:   
cupy._corer   	cupy.cudar   r   cupyx.scipy.sparser   r   r   r   r	   _compressed_sparse_matrixr
   rG   r   r   r   r   r5  memoizer  r   r   r%  r(  r8  r9  r:  r;  r<  r=  r>  r?  rD   r@  rA  r   rV  r   r[  r\  r   r   r   r   r   <module>   s|        ?


'
+
-
D 



