o
    X۷i                     @  s   d dl mZ d dlZd dlZd dlZzd dlZdZW n ey%   dZY nw d dl	Z	d dl
Z
d dl	mZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZ G dd dejejejZdS )    )annotationsNTF)_core)_scalar)basic)_base)_coo)_data)_sputils)_util)_indexc                   @  s  e Zd ZdZeeejdddddZ	eeejdddddZ
eeejdd	dddZeeejd
d	ddd
ZdZejeejdddg ddZejeejdd	dg ddZeddddZeddddZdsddZdtddZdd  Zd!d" Zd#d$ Zd%d& Zd'd( Zd)d* Zd+d, Zd-d. Zd/d0 Zd1d2 Zdud4d5Z d6d7 Z!d8d9 Z"d:Z#ee#d;Z$d<Z%ee%d=Z&d>Z'eje'd?d@dAgdBZ(dCZ)eje)d?dDdEgdBZ*dFdG Z+dvdHdIZ,dvdJdKZ-dLdM Z.dNdO Z/dPdQ Z0dRdS Z1dTdU Z2dVdW Z3dXdY Z4dZd[ Z5d\d] Z6d^d_ Z7e8e6e7d`Z9dadb Z:dcdd Z;e8e:e;d`Z<dedf Z=dwdgdhZ>didj Z?dkdl Z@dmdn ZAdodp ZBdqdr ZCdS )x_compressed_sparse_matrixa  
        extern "C" __global__
        void ${func}(double* data, int* x, int* y, int length,
                           double* z) {
            // Get the index of the block
            int tid = blockIdx.x * blockDim.x + threadIdx.x;

            // Calculate the block length
            int block_length = y[tid] - x[tid];

            // Select initial value based on the block density
            double running_value = 0;
            if (${cond}){
                running_value = data[x[tid]];
            } else {
                running_value = 0;
            }

            // Iterate over the block and update
            for (int entry = x[tid]; entry < y[tid]; entry++){
                if (data[entry] != data[entry]){
                    // Check for NaN
                    running_value = nan("");
                    break;
                } else {
                    // Check for a value update
                    if (data[entry] ${op} running_value){
                        running_value = data[entry];
                    }
                }
            }

            // Store in the return function
            z[tid] = running_value;
        }max_reduction>zblock_length == length)funcopcondmax_nonzero_reductionzblock_length > 0min_reduction<min_nonzero_reductiona  
        template<typename T1, typename T2> __global__ void
        ${func}_arg_reduction(T1* data, int* indices, int* x, int* y,
                              int length, T2* z) {
            // Get the index of the block
            int tid = blockIdx.x * blockDim.x + threadIdx.x;

            // Calculate the block length
            int block_length = y[tid] - x[tid];

            // Select initial value based on the block density
            int data_index = 0;
            double data_value = 0;

            if (block_length == length){
                // Block is dense. Fill the first value
                data_value = data[x[tid]];
                data_index = indices[x[tid]];
            } else if (block_length > 0)  {
                // Block has at least one zero. Assign first occurrence as the
                // starting reference
                data_value = 0;
                for (data_index = 0; data_index < length; data_index++){
                    if (data_index != indices[x[tid] + data_index] ||
                        x[tid] + data_index >= y[tid]){
                        break;
                    }
                }
            } else {
                // Zero valued array
                data_value = 0;
                data_index = 0;
            }

            // Iterate over the section of the sparse matrix
            for (int entry = x[tid]; entry < y[tid]; entry++){
                if (data[entry] != data[entry]){
                    // Check for NaN
                    data_value = nan("");
                    data_index = 0;
                    break;
                } else {
                    // Check for a value update
                    if (data[entry] ${op} data_value){
                        data_index = indices[entry];
                        data_value = data[entry];
                    }
                }
            }

            // Store in the return function
            z[tid] = data_index;
        }max)r   r   )zmax_arg_reduction<float, int>z#max_arg_reduction<float, long long>zmax_arg_reduction<double, int>z$max_arg_reduction<double, long long>)codename_expressionsmin)zmin_arg_reduction<float, int>z#min_arg_reduction<float, long long>zmin_arg_reduction<double, int>z$min_arg_reduction<double, long long>zraw T indptr, raw T indicesz	bool diffz
        bool diff_out = true;
        for (T jj = indptr[i]; jj < indptr[i+1] - 1; jj++) {
            if (indices[jj] > indices[jj+1]){
                diff_out = false;
            }
        }
        diff = diff_out;
        %cupyx_scipy_sparse_has_sorted_indicesaF  
        bool diff_out = true;
        if (indptr[i] > indptr[i+1]) {
            diff = false;
            return;
        }
        for (T jj = indptr[i]; jj < indptr[i+1] - 1; jj++) {
            if (indices[jj] >= indices[jj+1]) {
                diff_out = false;
            }
        }
        diff = diff_out;
        'cupyx_scipy_sparse_has_canonical_formatNFc                 C  s~  ddl m} |d urt|stdt|d t|d f}t|rD|| j	}|j
}|j}|j}	|j	| j	kr;d}|d u rB|j}n	t|r||\}
}t|
t|}
}td|r]|nd}tdd}tj| |
|d d dd}	|
|f}d}ntrtj|r|| j	}t|j
}tj|jdd}tj|jdd}	d}|d u r|j}nt|trt|d	krtj||||d
}|| j	}|j
}|j}|j}	nt|trt|dkr|\}}}	t|r|jdkrt|r|jdkrt|	r|	jdkstdt|t|krtdn<t|rI|jd	kr!td|jdkr,|d  }n
|jdkr6|d }| |\}}}	d}|d u rH|j}ntd|d u rV|j}nt|}|j dvretd|j!||d}t"j#$| | |j!d|d| _|	j!d|d| _|d u r| t|	d t|% d }| j| \}}t|	|d krtdt|	|d f |j&' | _(|| _)d S )Nr   )cusparsez(invalid shape (must be a 2-tuple of int)   Fdidtype   )shaper!   copy   z'data, indices, and indptr should be 1-Dz*indices and data should have the same sizez'expected dimension <= 2 array or matrix)NNzUnsupported initializer formatz?fdFDzCOnly bool, float32, float64, complex64 and complex128 are supportedr$   z&index pointer size (%d) should be (%d))*cupyxr   r
   isshape
ValueErrorintr   issparseasformatformatdataindicesindptrr#   r   zeros_swapscipy_availablescipysparsecupyarray
isinstancetuplelenr   
coo_matrixisdensendim_convert_denser!   numpycharastypesparse_data_data_matrix__init__r   MatDescriptorcreate_descr_shape)selfarg1r#   r!   r$   r   xr.   r/   r0   mnsp_coosp_compressedmajorminor rR   T/home/ubuntu/vllm_env/lib/python3.10/site-packages/cupyx/scipy/sparse/_compressed.pyrD      s   









 
z"_compressed_sparse_matrix.__init__Tc                 C  sH   |r| j || j | j f| j|jdS | j || j| jf| j|jdS )N)r#   r!   )	__class__r/   r$   r0   r#   r!   )rI   r.   r$   rR   rR   rS   
_with_data,  s   z$_compressed_sparse_matrix._with_datac                 C     t NNotImplementedError)rI   rK   rR   rR   rS   r>   8     z(_compressed_sparse_matrix._convert_densec                 C  rV   rW   rX   )rI   rK   yrR   rR   rS   r2   ;  rZ   z_compressed_sparse_matrix._swapc                 C  rV   rW   rX   )rI   otheralphabetarR   rR   rS   _add_sparse>  rZ   z%_compressed_sparse_matrix._add_sparsec                 C  s   t |r|dkr|r|  S |  S tdt|r.|rdnd}|r%dnd}| |||S t|rR|rD|r>|   | S ||   S |rL|  | S |  | S t	S )Nr   z;adding a nonzero scalar to a sparse matrix is not supportedr   )
r6   isscalarr$   rY   r   
isspmatrixr_   r<   todenseNotImplemented)rI   r\   lhs_negativerhs_negativer]   r^   rR   rR   rS   _addA  s*   


z_compressed_sparse_matrix._addc                 C     |  |ddS NFrg   rI   r\   rR   rR   rS   __add__^     z!_compressed_sparse_matrix.__add__c                 C  rh   ri   rj   rk   rR   rR   rS   __radd__a  rm   z"_compressed_sparse_matrix.__radd__c                 C     |  |ddS )NFTrj   rk   rR   rR   rS   __sub__d  rm   z!_compressed_sparse_matrix.__sub__c                 C  ro   )NTFrj   rk   rR   rR   rS   __rsub__g  rm   z"_compressed_sparse_matrix.__rsub__c           
      C  s   |  ||\}}t| j| j| j||d \}}}|j}tjd|d}	|j	dkr8t
|j|j|||	j|	j |	S t||||	 |	S )Nr   rR   r    c)r2   r   _get_csr_submatrix_major_axisr.   r/   r0   r!   r6   r1   kind_compress_getitem_complex_kernrealimag_compress_getitem_kern)
rI   rowcolrP   rQ   r.   r/   _r!   resrR   rR   rS   _get_intXintj  s   

z&_compressed_sparse_matrix._get_intXintc                 C  s.   |  ||\}}|jdv }| |j||dS )N)r   Nr&   )r2   step_major_slice_minor_slice)rI   ry   rz   rP   rQ   r$   rR   rR   rS   _get_sliceXslicew  s   
z*_compressed_sparse_matrix._get_sliceXslicer   c           
   
   C  s   | j j}| j| j \}}| ||\}}|j|dd}|j|dd}t||| j| j | j|	 |	 |}	|j
dkr@t|	dS | |	|jS )NFr&   r   r   )r/   r!   r2   r#   rA   r   _csr_sample_valuesr0   r.   ravelr=   r6   expand_dimsrT   reshape)
rI   ry   rz   not_found_val	idx_dtypeMNrP   rQ   valrR   rR   rS   _get_arrayXarray|  s   
z*_compressed_sparse_matrix._get_arrayXarrayc                 C  s    |  ||\}}| ||S rW   )r2   _major_index_fancy_minor_index_fancy)rI   ry   rz   rP   rQ   rR   rR   rS   _get_columnXarray  s   z+_compressed_sparse_matrix._get_columnXarrayc                 C  sf   | j | j \}}|j}|  ||}| jdks|dkr"| j|| jdS | jt| j| j	| j
||ddS )zBIndex along the major axis where idx is an array of ints.
        r   r    Fr#   r$   )r2   r#   sizennzrT   r!   r   _csr_row_indexr.   r/   r0   )rI   idxr{   r   r   	new_shaperR   rR   rS   r     s   z,_compressed_sparse_matrix._major_index_fancyaO  
    extern "C" __global__
    void bincount_idx_global(const int  n_idx,
                            const int* __restrict__ idx,
                            int*       __restrict__ col_cnt)
    {
        int k = blockIdx.x * blockDim.x + threadIdx.x;
        if (k >= n_idx) return;
        atomicAdd(col_cnt + idx[k], 1);
    }
    bincount_idx_globala*  
    extern "C" __global__
    void row_kept_count(const int  n_row,
                        const int* __restrict__ Ap,
                        const int* __restrict__ Aj,
                        const int* __restrict__ col_cnt,
                        int*       __restrict__ Bp)
{
    // 1 block = 1 row
    const int row = blockIdx.x;
    if (row >= n_row) return;

    int local = 0;
    for (int p = Ap[row] + threadIdx.x; p < Ap[row + 1]; p += blockDim.x)
        local += col_cnt[Aj[p]];

    #pragma unroll
    for (int offs = 16; offs; offs >>= 1)
        local += __shfl_down_sync(0xffffffff, local, offs);

    static __shared__ int s[32];              // one per warp
    if ((threadIdx.x & 31) == 0) s[threadIdx.x>>5] = local;
    __syncthreads();

    if (threadIdx.x < 32) {
        int val = (threadIdx.x < (blockDim.x>>5)) ? s[threadIdx.x] : int(0);
        #pragma unroll
        for (int offset = 16; offset > 0; offset >>= 1)
            val += __shfl_down_sync(0xffffffff, val, offset);
        if (threadIdx.x == 0) Bp[row + 1] = val;
    }
}
row_kept_counta;  
    template<typename T> __global__ void
    fill_B(const int  n_row,
                        const int* __restrict__ Ap,
                        const int* __restrict__ Aj,
                        const   T* __restrict__ Ax,
                        const int* __restrict__ col_offset,
                        const int* __restrict__ col_order,
                        const int* __restrict__ Bp,
                        int*       __restrict__ Bj,
                        T*       __restrict__ Bx)
    {
        // 1 block = 1 row
        const int row = blockIdx.x;
        if (row >= n_row) return;

        // atomic write pointer
        __shared__ int row_ptr;
        if (threadIdx.x == 0) row_ptr = Bp[row];
        __syncthreads();

        for (int p = Ap[row] +threadIdx.x; p < Ap[row + 1]; p +=blockDim.x)
        {
            int col   = Aj[p];
            int stop  = col_offset[col];
            int start = (col == 0) ? 0 : col_offset[col - 1];
            int cnt   = stop - start;
            if (cnt == 0) continue;

            T v = Ax[p];
            // unique slice for this thread
            int my_out = atomicAdd(&row_ptr, cnt);
            for (int k = 0; k < cnt; ++k)
            {
                Bj[my_out + k] = col_order[start + k];
                Bx[my_out + k] = v;
            }
        }
    }
    )z
-std=c++17zfill_B<float>zfill_B<double>)r   optionsr   a  
    template<typename T> __global__ void
    fill_B_complex(const int  n_row,
                        const int* __restrict__ Ap,
                        const int* __restrict__ Aj,
                        const   T* __restrict__ Ax,
                        const int* __restrict__ col_offset,
                        const int* __restrict__ col_order,
                        const int* __restrict__ Bp,
                        int*       __restrict__ Bj,
                        T*       __restrict__ Bx)
    {
        // 1 block = 1 row
        const int row = blockIdx.x;
        if (row >= n_row) return;

        // atomic write pointer
        __shared__ int row_ptr;
        if (threadIdx.x == 0) row_ptr = Bp[row];
        __syncthreads();

        for (int p = Ap[row] +threadIdx.x; p < Ap[row + 1]; p +=blockDim.x)
        {
            int col   = Aj[p];
            int stop  = col_offset[col];
            int start = (col == 0) ? 0 : col_offset[col - 1];
            int cnt   = stop - start;
            if (cnt == 0) continue;

            T v = Ax[p*2];
            T i = Ax[p*2+1];
            // unique slice for this thread
            int my_out = atomicAdd(&row_ptr, cnt);
            for (int k = 0; k < cnt; ++k)
            {
                Bj[my_out + k] = col_order[start + k];
                Bx[(my_out + k)*2] = v;
                Bx[(my_out + k)*2 + 1] = i;
            }
        }
    }
    zfill_B_complex<float>zfill_B_complex<double>c                 C  s  | j | j \}}|j}|  ||}| jdks|dkr"| j|| jdS tj|tjd}tj	|d tjd}d|d< d}|| d | }	| 
|	f|f|||f | |f|f|| j| j||f t|tj}
tj|tjd}tj|dd tjd|dd< t|d  }tj	|tjd}tj	|| jjd}| jjdkrdt| jjj}| j|}nd	t| jj}| j|}d
}||f|f|| j| j| j||
|||f	 | j|||f| j|d}|S )zBIndex along the minor axis where idx is an array of ints.
        r   r    r      Nr`   rr   zfill_B_complex<{}>z
fill_B<{}>    )r!   r#   )r2   r#   r   r   rT   r!   r6   r1   int32empty	_bincount_calc_Bp_minorr0   r/   argsortrA   cumsumr*   getr.   rt   r-   r   get_typenamerv   _fill_B_complexget_function_fill_B)rI   r   r   r   n_idxr   
col_countsBpthread_countblock_count	col_order
col_offsetnnzBBjBxker_namefillBthreadsoutrR   rR   rS   r   6  st   
 z,_compressed_sparse_matrix._minor_index_fancyc           
      C  s   | j | j \}}||\}}}|dkr$||kr$|dkr$|r"|  S | S tt|||}|  ||}|dkrY|dks?| jdkrG| j|| jdS | jt	
| j| j| j||||dS tj|||| jjd}	| |	S )z@Index along the major axis where idx is a slice object.
        r   r   r    r   )r2   r#   r/   r$   r:   ranger   rT   r!   r   rs   r.   r0   r6   aranger   )
rI   r   r$   r   r   startstopr~   r   rowsrR   rR   rS   r     s"   
z&_compressed_sparse_matrix._major_slicec           
      C  s   | j | j \}}||\}}}|dkr$||kr$|dkr$|r"|  S | S tt|||}|  ||}|dks;| jdkrC| j|| jdS |dkrY| jt	
| j| j| j|||ddS tj|||| jjd}	| |	S )z@Index along the minor axis where idx is a slice object.
        r   r   r    Fr   )r2   r#   r/   r$   r:   r   r   rT   r!   r   _get_csr_submatrix_minor_axisr.   r0   r6   r   r   )
rI   r   r$   r   r   r   r   r~   r   colsrR   rR   rS   r     s"   
z&_compressed_sparse_matrix._minor_slicec                 C  "   |  ||\}}| ||| d S rW   r2   	_set_manyrI   ry   rz   rK   r   jrR   rR   rS   _set_intXint     z&_compressed_sparse_matrix._set_intXintc                 C  r   rW   r   r   rR   rR   rS   _set_arrayXarray  r   z*_compressed_sparse_matrix._set_arrayXarrayc                 C  s   | j | ||  |j\}}|dko|jd dk}|dko#|jd dk}|j|j}}	tj|j| jd}|rLt	t
||j}t|	|}	t||}|rdt	||}tt
||	j}	t	||}| |||	f |||	f \}
}| |
|| d S )Nr   r   r    )
_zero_manyr2   r#   ry   rz   r6   asarrayr.   r!   repeatr   r   tiler   )rI   ry   rz   rK   r   r   broadcast_rowbroadcast_colrrr   r   r   rR   rR   rS   _set_arrayXarray_sparse  s    
 z1_compressed_sparse_matrix._set_arrayXarray_sparsec                 C  sl   | j | j \}}dd }tj|| jjddd }tj|| jjddd }||| ||| ||||fS )Nc                 S  sF   |   }||krtd||f |  }|| k r!td||f d S )Nzindex (%d) out of range (>= %d)zindex (%d) out of range (< -%d))r   
IndexErrorr   )r/   boundr   rR   rR   rS   check_bounds  s   
z@_compressed_sparse_matrix._prepare_indices.<locals>.check_boundsTr   r!   r$   ndmin)r2   r#   r6   r7   r0   r!   r   r/   )rI   r   r   r   r   r   rR   rR   rS   _prepare_indices  s   


z*_compressed_sparse_matrix._prepare_indicesc           	      C  s  |  ||\}}}}tj|| jddd }tjjjtj	| j
tjd| j| jf||fd}|j||ddtj }|dk}|| | j|| < | rNdS td	| jtj | }|| }||d
k   |7  < || }||d
k   |7  < | ||||  dS )zSets value at each (i, j) to x
        Here (i,j) index major and minor respectively, and must not contain
        duplicate entries.
        Tr   r   r    r#   r`   r   Nz<Changing the sparsity structure of a {}_matrix is expensive.r   )r   r6   r7   r!   r   r'   r4   r5   
csr_matrixr   r   float32r/   r0   r   rA   r   r.   allwarningswarnr-   r   SparseEfficiencyWarning_insert_many)	rI   r   r   rK   r   r   new_spoffsetsmaskrR   rR   rS   r     s6   z#_compressed_sparse_matrix._set_manyc                 C  st   |  ||\}}}}tjjjtj| jtjd| j	| j
f||fd}|j||ddtj }d| j||dk < dS )zSets value at each (i, j) to zero, preserving sparsity structure.
        Here (i,j) index major and minor respectively.
        r    r   r`   r   r   N)r   r'   r4   r5   r   r6   r   r   r   r/   r0   r   rA   r   r   r.   )rI   r   r   r   r   r   r   rR   rR   rS   r     s   z$_compressed_sparse_matrix._zero_manyc                 C  s   t | j}||  |7  < t j| jj|d}|d|d< ||dd< t j||d t|d }t j||d}	t j|| jjd}
t j	|j
|d}||dd |< t j||d tj|||| j| j| j||	|
| jj
d d
 || _|	| _|
| _dS )z>Insert new elements into current sparse matrix in sorted orderr    r   r   N)r   r`   r   )r6   diffr0   r   r#   r   r*   r.   r!   r1   r   r   _insert_many_populate_arraysr/   )rI   indices_insertsdata_insertsr   
row_countsr   indptr_diff
new_indptrout_nnznew_indicesnew_datanew_indptr_lookuprR   rR   rS   _perform_insert  s(   
z)_compressed_sparse_matrix._perform_insertc                 C  s
  t |}||}||}||}tj| j| jf| j|j d}| j	|| _| j	|| _| j
	| j| _
t||||\}}}t j|dd\}	}
t |
jd |
j}|j|d< |
|dd< |}
t j|
jd |d}t j|t |	|d | |||	|| dS )a9  Inserts new nonzero at each (i, j) with value x
        Here (i,j) index major and minor respectively.
        i, j and x must be non-empty, 1d arrays.
        Inserts each major group (e.g. all entries per row) at a time.
        Maintains has_sorted_indices property.
        Modifies i, j, x in place.
        )maxvalT)return_indexr   r`   Nr    )r6   r   taker	   get_index_dtyper/   r0   r   r   rA   r.   r!   r   _select_last_indicesuniquer   r1   addatsearchsortedr   )rI   r   r   rK   orderr   indptr_insertsr   r   r   	ui_indptrto_addr   rR   rR   rS   r   2  s.   
	





z&_compressed_sparse_matrix._insert_manyc                 C  sj   | j jdkrd| _| jS t| ddsd| _| jS t| ds2| j| j| j| jjd d}t|	 | _| jS )a  Determine whether the matrix has sorted indices and no duplicates.

        Returns
            bool: ``True`` if the above applies, otherwise ``False``.

        .. note::
            :attr:`has_canonical_format` implies :attr:`has_sorted_indices`, so
            if the latter flag is ``False``, so will the former be; if the
            former is found ``True``, the latter flag is also set.

        .. warning::
            Getting this property might synchronize the device.

        r   T_has_sorted_indicesF_has_canonical_formatr   r   )
r.   r   r   getattrhasattr_has_canonical_format_kernr0   r/   boolr   )rI   is_canonicalrR   rR   rS   __get_has_canonical_format[  s   	
z4_compressed_sparse_matrix.__get_has_canonical_formatc                 C  s   t || _|rd| _dS dS )zTaken from SciPy as is.TN)r   r   has_sorted_indicesrI   r   rR   rR   rS   __set_has_canonical_format{  s   

z4_compressed_sparse_matrix.__set_has_canonical_format)fgetfsetc                 C  sR   | j jdkrd| _| jS t| ds&| j| j| j| jjd d}t| | _| jS )a#  Determine whether the matrix has sorted indices.

        Returns
            bool:
                ``True`` if the indices of the matrix are in sorted order,
                otherwise ``False``.

        .. warning::
            Getting this property might synchronize the device.

        r   Tr   r   r   )	r.   r   r   r   _has_sorted_indices_kernr0   r/   r   r   )rI   	is_sortedrR   rR   rS   __get_sorted  s   
z&_compressed_sparse_matrix.__get_sortedc                 C  s   t || _d S rW   )r   r   r   rR   rR   rS   __set_sorted  rm   z&_compressed_sparse_matrix.__set_sortedc                 C  s   | j S )zdReturns the shape of the matrix.

        Returns:
            tuple: Shape of the matrix.

        )rH   rI   rR   rR   rS   	get_shape  s   z#_compressed_sparse_matrix.get_shapec                 C  s   |du r| j jS t)zReturns the number of stored values, including explicit zeros.

        Args:
            axis: Not supported yet.

        Returns:
            int: The number of stored values.

        N)r.   r   r)   )rI   axisrR   rR   rS   getnnz  s   
z _compressed_sparse_matrix.getnnzc                 C  s   |   }|  |S )zReturn a copy of this matrix with sorted indices

        .. warning::
            Calling this function might synchronize the device.
        )r$   sort_indices)rI   ArR   rR   rS   sorted_indices  s   z(_compressed_sparse_matrix.sorted_indicesc                 C  rV   rW   rX   r  rR   rR   rS   r    s   z&_compressed_sparse_matrix.sort_indicesc                 C  s6   | j rdS |  }|  | || j d| _ dS )ae  Eliminate duplicate matrix entries by adding them together.

        .. note::
            This is an *in place* operation.

        .. warning::
            Calling this function might synchronize the device.

        .. seealso::
           :meth:`scipy.sparse.csr_matrix.sum_duplicates`,
           :meth:`scipy.sparse.csc_matrix.sum_duplicates`
        NT)has_canonical_formattocoosum_duplicatesrD   r,   r-   )rI   coorR   rR   rS   r    s   
z(_compressed_sparse_matrix.sum_duplicatesc              
   C  s   | j d|  }t|tj}|rtj| jtj| ji}n
tj| j	tj| j
i}|| |fd| jtj| jdt| jd  | jdd t| j | |f |S )a  Reduce nonzeros with a ufunc over the minor axis when non-empty

        Can be applied to a function of self.data by supplying data parameter.
        Warning: this does not call sum_duplicates()

        Args:
            ufunc (object): Function handle giving the operation to be
                conducted.
            axis (int): Matrix over which the reduction should be
                conducted.

        Returns:
            (cupy.ndarray): Reduce result for nonzeros in each
            major_index.

        r   r   N)r#   r6   r1   rA   float64amax_max_nonzero_reduction_kernamin_min_nonzero_reduction_kern_max_reduction_kern_min_reduction_kernr.   r0   r:   int64)rI   ufuncr	  nonzero	out_shaper   kernsrR   rR   rS   _minor_reduce  s"   z'_compressed_sparse_matrix._minor_reducec                 C  s   | j d|  }tj|td}dt| jjt|j}|tj	kr+| j
d| }n|tjkr8| jd| }||fd| j| j| jdt| jd  | jdd t| j | |f |S )a  Reduce nonzeros with a ufunc over the minor axis when non-empty

        Can be applied to a function of self.data by supplying data parameter.
        Warning: this does not call sum_duplicates()

        Args:
            ufunc (object): Function handle giving the operation to be
                conducted.
            axis (int): Maxtrix over which the reduction should be conducted

        Returns:
            (cupy.ndarray): Reduce result for nonzeros in each
            major_index

        r   r    z_arg_reduction<{}, {}>r   r   r  N)r#   r6   r1   r*   r-   r   r   r.   r!   argmax_max_arg_reduction_modr   argmin_min_arg_reduction_modr/   r0   r:   r  )rI   r  r	  r  r   r   kerrR   rR   rS   _arg_minor_reduce  s$   


z+_compressed_sparse_matrix._arg_minor_reduce)NNF)T)r   )FrW   )D__name__
__module____qualname___max_min_reduction_coder   	RawKernelstringTemplate
substituter  r  r  r  _argmax_argmin_code	RawModuler!  r#  ElementwiseKernelr  r   rD   rU   r>   r2   r_   rg   rl   rn   rp   rq   r}   r   r   r   r   _bincount_kernelr   _calc_Bp_kernelr   _fill_B_kernelr   _fill_B_kernel_complexr   r   r   r   r   r   r   r   r   r   r   r   3_compressed_sparse_matrix__get_has_canonical_format3_compressed_sparse_matrix__set_has_canonical_formatpropertyr  %_compressed_sparse_matrix__get_sorted%_compressed_sparse_matrix__set_sortedr   r  r
  r  r  r  r  r%  rR   rR   rR   rS   r      s    $



	6

	

j
")+
M
! ) 
	#r   )
__future__r   r+  r   r?   scipy.sparser4   r3   ImportErrorr6   r'   r   
cupy._corer   cupy._creationr   cupyx.scipy.sparser   r   r   rB   r	   r
   r   rC   _minmax_mixin
IndexMixinr   rR   rR   rR   rS   <module>   s2    