o
    X۷i                     @  s   d dl mZ d dlZd dlmZ d dlZejdk r d dlmZ nd dl	mZ d dl
Z
d dlmZ d dlmZ d dlmZmZmZmZmZmZ d	d
 Zdd Zdd Zdd Zdd Z	 	d*ddZdd Zdd ZdZdgZe
j eddd eD d g d!Z!d"d# Z"d$d% Z#d+d&d'd(d)Z$dS ),    )annotationsN)prod2)normalize_axis_index)sparse)spsolve)
_get_dtype_as_float_array_get_module_funcINTERVAL_MODULED_BOOR_MODULEBSplinec                 C  st   t | } |d dkrtd| |d d }| |d | d  }t j| d f|d  || d f|d  f }|S )zSGiven data x, construct the knot vector w/ not-a-knot BC.
    cf de Boor, XIII(12).      z Odd degree for now only. Got %s.r   )cupyasarray
ValueErrorr_)xkmt r   W/home/ubuntu/vllm_env/lib/python3.10/site-packages/cupyx/scipy/interpolate/_bspline2.py_not_a_knot   s   
,r   c                 C  s$   t j| d f| | | d f| f S )zBConstruct a knot vector appropriate for the order-k interpolation.r   r   )r   r   )r   r   r   r   r   _augknt%   s   $r   c                 C  s   t | }t|}|d dkr$t |}|dd  |dd d 8  < t |}t |d|  }|||| < td|D ]/}|||  |||d   d   ||| d < || | d  |||d    || | < q>|S )z$Returns vector of nodes on a circle.r   r   r   r   N)r   copylendiffzerosrange)r   r   xcndxr   ir   r   r   _periodic_knots*   s   

 
..r&   c                 C  sR   t | tr'| dkrdt|fg} | S | dkr!dt|fg} | S td|  | S )Nclampedr   naturalr   zUnknown boundary condition : %s)
isinstancestrr   r    r   )derivtarget_shaper   r   r   _convert_string_aliases<   s   
r-   c              
   C  sV   | d ur zt |  \}}W n ty } zd}t||d }~ww g g }}t||S )Nz^Derivatives, `bc_type`, should be specified as a pair of iterables of pairs of (order, value).)zip	TypeErrorr   r   
atleast_1d)r+   ordsvalsemsgr   r   r   _process_deriv_specG   s   

r5      Tc           #      C  s~  |du s|dks|dkrd\}}n%t |tr||}}nz|\}}W n ty5 }	 ztd| |	d}	~	ww t|}t||j}t| |} t||}t	||d}|dkrftj
|d |d dd	sftd
| j|jd krxtd| j|j| dd | dd k rtd| jdks| dd | dd k  rtd|dkrtdd |||fD rtdtj| | d f }t|}
tj|
t|
jd}
tj||
||dS |dkr|du r|du r|du stdtj| d | | d f }t|}
tj|
t|
jd}
tj||
||dS t|}|dkr#|dur#td|du rv|du rq|du rq|dkr=t| |}n9|dkrk| dd | dd  d }tj| d f|d  |dd | d f|d  f }nt| |}nt| |}t||}|dk rtd|jdks|dd |dd k  rtd|j| j| d k rtd|j| j| d f | d || k s| d ||  krtd|  |dkrt| ||||S t||jdd }t|\}}|jd }t||jdd }t|\}}|jd }| j}|j| d }|| || kr$td|| ||f |jdkrBtj|f|jdd  td}
tj||
||dS t | ||}|dksS|dkrtjd| d ftd}d}tj!||ftd}tj!d|jd}t"t#d|}tj!dtj$d}t"t%d}|d d!|tj| d | d f |||d"df |dkrtj&| d g| jd}tj||ftd}t'|D ].\}}|d d |||t(||||||ddf |d }|d|d  |||| |d f< qt)*t)+||g}|dkrM|d |d< tj&| d g| jd}tj||ftd}t'|D ].\}}|d d |||t(||||||ddf |d }|d|d  |||| |d f< qt)*|t)+|g}t,|jdd } tj!|| f|jd}!|dkro|-d| |!d|< |-d| |!||| < |dkr|-d| |!|| d< t.|!jtj/rt0||!j1t0||!j2d#  }"nt0||!}"t|"-|f|jdd  }"t||"|S )$ai  Compute the (coefficients of) interpolating B-spline.

    Parameters
    ----------
    x : array_like, shape (n,)
        Abscissas.
    y : array_like, shape (n, ...)
        Ordinates.
    k : int, optional
        B-spline degree. Default is cubic, ``k = 3``.
    t : array_like, shape (nt + k + 1,), optional.
        Knots.
        The number of knots needs to agree with the number of data points and
        the number of derivatives at the edges. Specifically, ``nt - n`` must
        equal ``len(deriv_l) + len(deriv_r)``.
    bc_type : 2-tuple or None
        Boundary conditions.
        Default is None, which means choosing the boundary conditions
        automatically. Otherwise, it must be a length-two tuple where the first
        element (``deriv_l``) sets the boundary conditions at ``x[0]`` and
        the second element (``deriv_r``) sets the boundary conditions at
        ``x[-1]``. Each of these must be an iterable of pairs
        ``(order, value)`` which gives the values of derivatives of specified
        orders at the given edge of the interpolation interval.
        Alternatively, the following string aliases are recognized:

        * ``"clamped"``: The first derivatives at the ends are zero. This is
           equivalent to ``bc_type=([(1, 0.0)], [(1, 0.0)])``.
        * ``"natural"``: The second derivatives at ends are zero. This is
          equivalent to ``bc_type=([(2, 0.0)], [(2, 0.0)])``.
        * ``"not-a-knot"`` (default): The first and second segments are the
          same polynomial. This is equivalent to having ``bc_type=None``.
        * ``"periodic"``: The values and the first ``k-1`` derivatives at the
          ends are equivalent.

    axis : int, optional
        Interpolation axis. Default is 0.
    check_finite : bool, optional
        Whether to check that the input arrays contain only finite numbers.
        Disabling may give a performance gain, but may result in problems
        (crashes, non-termination) if the inputs do contain infinities or NaNs.
        Default is True.

    Returns
    -------
    b : a BSpline object of the degree ``k`` and with knots ``t``.

    N
not-a-knotperiodicNNUnknown boundary condition: %sr   r   gV瞯<)atolzAFirst and last points does not match while periodic case expectedz(Shapes of x {} and y {} are incompatibler   zExpect x to not have duplicatesz1Expect x to be a 1D strictly increasing sequence.c                 s  s    | ]}|d uV  qd S )Nr   ).0_r   r   r   	<genexpr>   s    z%make_interp_spline.<locals>.<genexpr>z6Too much info for k=0: t and bc_type can only be None.dtypeaxisz0Too much info for k=1: bc_type can only be None.zOFor periodic case t is constructed automatically and can not be passed manuallyr   g       @Expect non-negative k.'Expect t to be a 1-D sorted array_like.zGot %d knots, need at least %d.Out of bounds w/ x = %s.zPThe number of derivatives at boundaries does not match: expected %s, got %s + %s)r   r   d_boorfind_intervalr   r   Fy              ?)3r)   r*   r/   r   r   r   r   ndimr	   moveaxisallclosesizeshapeformatanyr   ascontiguousarrayr   r@   r   construct_fastoperatorindexNotImplementedErrorr&   r   r   _make_periodic_spliner-   r5   r    floatdesign_matrixemptyr
   r   int64r   array	enumerateintr   vstack
csr_matrixr   reshape
issubdtypecomplexfloatingr   realimag)#r   yr   r   bc_typerB   check_finitederiv_lderiv_rr3   cderiv_l_ordsderiv_l_valsnleftderiv_r_ordsderiv_r_valsnrightr#   ntmatrtempnum_cdummy_coutd_boor_kernelintervals_bcinterval_kernelx0rowsjr   leftextradimrhscoefr   r   r   make_interp_splineT   s*  3




 &









*&


	

(
(



 r   c           #      C  s  |du s|dkrd\}}n%t |tr||}}nz|\}}W n ty1 } ztd| |d}~ww t||jdd }t|\}}	|jd }
t||jdd }t|\}}|jd }| j}|j| d }t||jdd }t|\}}	|jd }
t||jdd }t|\}}|jd }| j}|j| d }|| |
| ksJ tj	| tj
d}ttd}|| jd d	 d d	 fd
|| |||d| jd f tj|dftd}tjt| t|jdd f|jd}t|jdd }t| jd d| d  }ttd|}|| jd d	 d d	 fd
|||d| ||||d| jd f tj||ftd}|
}tt| D ])}|| }||d| d  |d| d  | d  ||| || |d f< q3tjdtj
d}|
dkr|d |d< tj| d g| jd}t|D ].\}}|dd|||t||||||ddf |d }|d|d  |||| |d f< q|dkr|d |d< tj| d g| jd}t|D ]6\}}|dd|||t||||||ddf |d }|
t|  | }|d|d  |||| |d f< qt|jdd }tj||f|jd} |
dkr!|	d|| d|
< |d|| |
|| < |dkr>|d|| || d< ddlm}! |!|| }"t|"|f|jdd  }"t||"|S )z Construct the interpolating spline spl(x) = y with *full* linalg.

        Only useful for testing, do not call directly!
        This version is O(N**2) in memory and O(N**3) in flop count.
    Nr7   r9   r:   r   r   r?   rG      )r   Fr   rF   rH   r   )solve)r)   r*   r/   r   r-   rN   r5   rM   r   
empty_likerZ   r
   r   rY   rW   r   r   r@   r   r    r!   r[   r\   r]   r`   cupy.linalgr   rQ   r   )#r   re   r   r   rf   rh   ri   r3   rk   rl   rm   rn   ro   rp   r#   rq   	intervalsry   ru   rv   rt   rs   rw   Aoffsetr|   r}   rx   rz   r   rowr~   r   r   r   r   r   r   _make_interp_spline_full_matrix]  s   






	H
(
(


 r   c                 C  s*  | j }t| ||}tjdd| d  td}d}tj|j | d |ftd}	tjd|	jd}
tt	d|	}tj
| d | d f }tj||| d gtjd}tj|d || d ftd}t|d D ]F}|dd	||	||d |||
||ddf |d |d  ||d |d f< ||| d f  |d| d d| d | d  d d 8  < qctt||g}t|jdd  }tj|| d |ftd}d|d |d d d f< |j dkr||dn|d|f||d d d d f< t||}t||| d f|jdd   }tj|||d
|dS )Nr   r   r?   )r   r   rF   r   r   rH   rI   r8   )extrapolaterB   )rM   r   rX   r   r    rW   rY   r@   r
   r   r   r[   rZ   r!   r   r^   r_   r   rN   r`   r   rQ   rR   )r   re   r   r   rB   r#   rr   rs   rt   ru   rv   rw   rz   rx   r{   r   matr_csrr~   r   r   r   r   r   rV     sH   
 D
(
rV   a  
typedef long long ssize_t ;

/*
 * Compute the parameters of the Givens transformation: LAPACK's dlartg
 * replacement.
 *
 * Naive computation, following
 * https://www.netlib.org/lapack/explore-3.1.1-html/dlartg.f.html
 *
 */
template<typename T>
__global__ void
dlartg(T *f, T *g, T *cs, T *sn, T *r) {

    if (*g == 0) {
        *cs = 1.0;
        *sn = 0.0;
        *r = *f;
    }
    else if (*f == 0){
        *cs = 0.0;
        *sn = 1.0;
        *r = *g;
    }
    else {
        T piv = fabs(*f);

        if (piv >= *g) {
            T sq = *g / *f;
            *r = piv * sqrt(1.0 + sq*sq);
        } else {
            T sq = *f / *g;
            *r = *g * sqrt(1.0 + sq*sq);
        }

        *cs = *f / *r;
        *sn = *g / *r;
    }
}


/*
 * Givens-rotate a pair [f, g] -> [f_out, g_out]
 */
template<typename T>
__global__ void
fprota(T c, T s, T f, T g, T *f_out, T *g_out) {
    *f_out =  c*f + s*g;
    *g_out = -s*f + c*g;
}


// 2D array indexing: R(i, j)
#define IDX(i, j, ncols) ( (ncols)*(i) + (j) )



/*
 *  Solve the LSQ problem ||y - A@c||^2 via QR factorization.
 *
 *  QR factorization follows FITPACK: we reduce A row-by-row by Givens
 *  rotations. To zero out the lower triangle, we use in the row `i`
 *   and column `j < i`, the diagonal element in that column. That way, the
 *  sequence is (here `[x]` are the pair of elements to Givens-rotate)
 *
 *   [x] x x x       x  x  x x      x  x  x x      x x  x  x      x x x x
 *   [x] x x x  ->   0 [x] x x  ->  0 [x] x x  ->  0 x  x  x  ->  0 x x x
 *    0  x x x       0 [x] x x      0  0  x x      0 0 [x] x      0 0 x x
 *    0  x x x       0  x  x x      0 [x] x x      0 0 [x] x      0 0 0 x
 *
 *  The matrix A has a special structure: each row has at most (k+1)
 *  consecutive non-zeros, so we only store them.
 *
 *  On exit, the return matrix, also of shape (m, k+1), contains
 *  elements of the upper triangular matrix `R[i, i: i + k + 1]`.
 *
 *  When we process the element (i, j), we store the rotated row in R[i, :],
 *  and *shift it to the left*, so that the the diagonal element is always in
 *  the zero-th place. This way, the process above becomes
 *
 *   [x] x x x       x  x x x       x  x x x       x  x x x      x x x x
 *   [x] x x x  ->  [x] x x -  ->  [x] x x -   ->  x  x x -  ->  x x x -
 *    x  x x -      [x] x x -       x  x - -      [x] x - -      x x - -
 *    x  x x -       x  x x -      [x] x x -      [x] x - -      x - - -
 *
 *  The most confusing part is that when rotating the row `i` with a row `j`
 *  above it, the offsets differ: for the upper row  `j`, `R[j, 0]` is the
 *  diagonal element, while for the row `i`, `R[i, 0]` is the element being
 *  annihilated.
 *
 *  NB. This row-by-row Givens reduction process follows FITPACK:
 *  https://github.com/scipy/scipy/blob/maintenance/1.11.x/scipy/interpolate/fitpack/fpcurf.f#L112-L161
 *
 *  A possibly more efficient way could be to note that all data points which
 *  lie between two knots all have the same offset: if
 *  `t[i] < x_1 .... x_s < t[i+1]`, the `s-1` corresponding rows form an
 *  `(s-1, k+1)`-sized "block".
 *  Then a blocked QR implementation could look like
 *  https://people.sc.fsu.edu/~jburkardt/f77_src/band_qr/band_qr.f
 *
 *  We implement the FITPACK procedure here, even though it is inherently
 *  sequential.
 *
 *  The `startrow` optional argument accounts for the scenatio with a two-step
 *  factorization. Namely, the preceding rows are assumed to be already
 *  processed and are skipped.
 *  This is to account for the scenario where we append new rows to an already
 *  triangularized matrix.
 *
 *  Note that this routine MODIFIES `a` & `y` in-place.
 *
 */
__global__ void
qr_reduce(double *a, int m, int nz, // a(m, nz), packed
          ssize_t *offset,          // offset(m)
          int nc,                   // dense would be a(m, nc)
          double *y, int ydim1,     // y(m, ydim1)
          int startrow=1
)
{
    for (ssize_t i=startrow; i < m; i++) {
        ssize_t oi = offset[i];
        ssize_t i_nc = i < nc ? i : nc;  // the diagonal in row i
        for (ssize_t j=oi; j < nc; j++) {

            // rotate only the lower diagonal
            if (j >= i_nc) {
                break;
            }

            // in dense format: diag a1[j, j] vs a1[i, j]
            double c, s, r;
            dlartg(&a[IDX(j, 0, nz)],    // R(j, 0)
                   &a[IDX(i, 0, nz)],    // R(i, 0)
                   &c,
                   &s,
                   &r);

            // rotate l.h.s.
            a[IDX(j, 0, nz)] = r;  //R(j, 0) = r;
            for (ssize_t l=1; l < nz; ++l) {
                double r0, r1;
                fprota(c, s,
                       a[IDX(j, l, nz)], a[IDX(i, l, nz)],
                       &r0, &r1);
                a[IDX(j, l, nz)] = r0;
                a[IDX(i, l-1, nz)] = r1;
            }
            a[IDX(i, nz-1, nz)] = 0.0;

            // rotate r.h.s.
            for (ssize_t l=0; l < ydim1; ++l) {
                double r0, r1;
                fprota(c, s,
                       y[IDX(j, l, ydim1)], y[IDX(i, l, ydim1)],
                       &r0, &r1);
                y[IDX(j, l, ydim1)] = r0; // y(j, l) = r0;
                y[IDX(i, l, ydim1)] = r1; // y(i, l) = r1;
            }
        }
        if (i < nc) {
            offset[i] = i;
        }

    } // for(i = ...
}

double)z
-std=c++17c                 C  s   g | ]}d | dqS )zdlartg<>r   )r<   	type_namer   r   r   
<listcomp>  s    r   	qr_reduce)codeoptionsname_expressionsc                 C  s   ddl m} | jd }tj||d  tjd}|| ||d|\}}|||d }	|dd|d   }
|jd | d }|jdksDJ ||dddf  }t	t
d}|d	d	|	||d |
|||jd df t|	||}|	||fS )
zSolve for the LSQ spline coeffs given x, y and knots.
    `y` is always 2D: for 1D data, the shape is ``(m, 1)``.
    `w` is always 1D: one weight value per `x` value.
    r   )_make_design_matrixr   r?   TNr   r   rH   )_bspliner   rN   r   rY   rZ   r`   r   rJ   r
   	QR_MODULEfpback)r   re   r   r   wr   r   indicesr   Rr   ncy_wr   ccr   r   r   _lsq_solve_qr  s&   



r   c           	      C  s   | j \}}|j d | j d ksJ t|d| }||d df | |d df  ||d df< t|d ddD ]2}t||| }| |d|df ||d || df  jdd}|| | | |df  ||df< q8|S )zBacksubsitution solve upper triangular banded `R @ c = y.`
    `R` is in the "packed" format: `R[i, :]` is `a[i, i:i+k+1]`
    r   Nr   .r   r   rA   )rN   r   
zeros_liker!   minsum)	r   r   re   r=   nzrj   r%   nelsummr   r   r   r     s   
,2"r   qr)methodc                C  s:  t | |} t ||}t ||}|durt ||}nt| }t|}t||j}t||d}| jdksEt| dd | dd  dkrIt	d| jdkrRt	d| j
d |d k r_t	d|dk rgt	d|jdks|t|dd |dd  dk rt	d	| j|j
d krt	d| j
 d|j
 d|dkrt| || k | ||  kB rt	d|  | j|jkrt	d| j
 d|j
 d|dkrt	d|dt| dd | dd  dk rt	d|j| d }t|j
dd }	|d|	}
t| |
|||\}}}||f|j
dd  }t|}tj||||dS )a  Construct a BSpline via an LSQ (Least SQuared) fit.

    The result is a linear combination

        .. math::
            S(x) = \sum_j c_j B_j(x; t)

    of the B-spline basis elements, :math:`B_j(x; t)`, which minimizes

        .. math::
            \sum_{j} \left( w_j \times (S(x_j) - y_j) \right)^2

    Parameters
    ----------
    x : array_like, shape (m,)
        Abscissas.
    y : array_like, shape (m, ...)
        Ordinates.
    t : array_like, shape (n + k + 1,).
        Knots.
        Knots and data points must satisfy Schoenberg-Whitney conditions.
    k : int, optional
        B-spline degree. Default is cubic, ``k = 3``.
    w : array_like, shape (m,), optional
        Weights for spline fitting. Must be positive. If ``None``,
        then weights are all equal.
        Default is ``None``.
    axis : int, optional
        Interpolation axis. Default is zero.
    check_finite : bool, optional
        Whether to check that the input arrays contain only finite numbers.
        Disabling may give a performance gain, but may result in problems
        (crashes, non-termination) if the inputs do contain infinities or NaNs.
        Default is True.

    Returns
    -------
    b : a BSpline object of the degree ``k`` with knots ``t``.

    See Also
    --------
    scipy.interpolate.make_lsq_spline
    BSpline : base class representing the B-spline objects
    make_interp_spline : a similar factory function for interpolating splines


    Notes
    -----
    The number of data points must be larger than the spline degree ``k``.
    Knots ``t`` must satisfy the Schoenberg-Whitney conditions,
    i.e., there must be a subset of data points ``x[j]`` such that
    ``t[j] < x[j] < t[j+k+1]``, for ``j=0, 1,...,n-k-2``.

    Nr   r   r   z'Expect x to be a 1-D sorted array_like.zExpect x to be a 1-D sequence.zNeed more x points.rC   rD   z1Expect t to be a 1D strictly increasing sequence.zShapes of x z and y z are incompatiblerE   z and w r   zmethod=z is not supported.z,Expect x to be a 1D non-decreasing sequence.rA   )r	   r   	ones_likerS   rT   r   rJ   rK   rP   r   rN   rM   r   r`   r   rQ   r   rR   )r   re   r   r   r   rB   rg   r   r   r~   yyr=   rj   r   r   r   make_lsq_spline  sP   
8



*
*& 
r   )r6   NNr   T)r6   Nr   T)%
__future__r   rS   mathr   numpy__version__numpy.core.multiarrayr   numpy.lib.array_utilsr   cupyx.scipyr   cupyx.scipy.sparse.linalgr    cupyx.scipy.interpolate._bspliner   r	   r
   r   r   r   r   r   r&   r-   r5   r   r   rV   	QR_KERNELTYPES	RawModuler   r   r   r   r   r   r   r   <module>   sP    
 	
   < ,"