o
    W۷iAB                     @  s  d dl mZ d dlZd dlmZ d dlmZ d dlmZ d dlm	Z	 d dl
mZ d dlZedd	d
ddddZdd Zdd ZdddZdd Zdd ZdddZej ZZejddge ddR d d!d"Zejd#d$d%d&d'd(d)d*d+d,d-dge ddd.d/R d0d1d"ZejZejd2dge ddR d3d4d"Zd5Zd6Zejd7d$d%efd&d'efd(d)efd*d+efd,d-efdge ddd.efd/efR d8d9d"Zd:Zed7d$d%efd&d'efd(d)efd*d+efd,d-efdge ddd.efd/efR d8Z ejd;d<ged=d>d?R d@dAd"Z!dBZ"dCZ#ejdDdEdFdGdHdIdJdKdLdMdNdOd<e#fged=e#dPd>e#fd?e#fdQe#fdRe#fR dSe"dTdUdVdWZ$dXZ%ejdYdEdFdGdHdIdJdKdLdMdNdOd<e%fged=e%dPd>e%fd?e%fdQe%fdRe%fR dZe"d[d\d]dWZ&ejd^dEdFdGdHdIdJdKdLdMdNdOd_ged=d`dPdadbdQdRR dSdcd"Z'ejdddEdFdGdHdIdJdKdLdMdNdOdeged=dfdPdgdhdQdRR dZdid"Z(djZ)ejdkdldmdndodpdqdrdsdtdudvdwgedxdydzdPd{d|d}d~R de)ddZ*dddZ+dddZ,dddZ-ej.j/dddd Z0dddZ1dS )    )annotationsN)_core)_routines_math)fusion)stride_tricks)	bf16_loopz
T x1, T x2zT yzx1 * x2za + bzy = a0dot_productc                 C  sL   | j dks
|j dkrtd| jjdv s|jjdv rdS t| ||r$dS dS )N   z&Only 1d inputs are supported currentlybuidirectfft)ndimNotImplementedErrordtypekind_fftconv_faster)in1in2mode r   E/home/ubuntu/vllm_env/lib/python3.10/site-packages/cupy/_math/misc.py_choose_conv_method   s   r   c                 C  s   dS )zJ
    .. seealso:: :func: `scipy.signal._signaltools._fftconv_faster`

    Tr   )xhr   r   r   r   r   &   s   r   fullc                 C  s   | j dkr	td|j dkrtd|jdkrtd|j | j kr&|| } }|  } | }t| ||}|dkr@t| ||}|S |dkrLt| ||}|S td)	ar  Returns the discrete, linear convolution of two one-dimensional sequences.

    Args:
        a (cupy.ndarray): first 1-dimensional input.
        v (cupy.ndarray): second 1-dimensional input.
        mode (str, optional): `valid`, `same`, `full`

    Returns:
        cupy.ndarray: Discrete, linear convolution of a and v.

    .. seealso:: :func:`numpy.convolve`

    r   za cannot be emptyzv cannot be emptyr
   z"v cannot be multidimensional arrayr   r   zUnsupported method)size
ValueErrorr   ravelr   _dot_convolve_fft_convolve)avr   methodoutr   r   r   convolve/   s$   



r%   c                 C  sd  d}| j d |j d k r|| } }d|j d d  }| jjdks&|jjdkr0tjjtjj}}n	tjjtjj}}t| |}| j d |j d }}dd l	}	|	j
j|| d }
|| |
}|||
}||| |
}|dkrxd|| d }}n!|dkr|d d | }|| }n|dkr|d |}}ntd	|d
||f }|jdv rt|}|j|ddS )Nr   r
      cr   samevalidz5acceptable mode flags are `valid`, `same`, or `full`..iuFcopy)shaper   r   cupyr   ifftrfftirfftresult_typecupyx.scipy.fftscipynext_fast_lenr   aroundastype)a1a2r   offsetr   r0   r   n1n2cupyxout_sizefa1fa2r$   startendr   r   r   r    R   s8   





r    c                 C  s
  d}| j |j k r|| } }d|j d  }t| |}| j |j }}| j|dd} |j|dd}|dkrB|| d }t| |d } n%|dkr]|}|d d | }t| |d | |f} n
|dkrg|| d }| jd }	t| ||f|	|	f} t| |d d d	 dd
}
|
S )Nr   r
   r'   Fr,   r   r)   r*   r&   )axis)	r   r/   r3   r8   padstridesr   
as_strided_dot_kernel)r9   r:   r   r;   r   r<   r=   r?   pad_sizestrideoutputr   r   r   r   y   s*   

r   c                 C  s.   t  rt jtj| |||dS | j|||dS )a  Clips the values of an array to a given interval.

    This is equivalent to ``maximum(minimum(a, a_max), a_min)``, while this
    function is more efficient.

    Args:
        a (cupy.ndarray): The source array.
        a_min (scalar, cupy.ndarray or None): The left side of the interval.
            When it is ``None``, it is ignored.
        a_max (scalar, cupy.ndarray or None): The right side of the interval.
            When it is ``None``, it is ignored.
        out (cupy.ndarray): Output array.

    Returns:
        cupy.ndarray: Clipped array.

    .. seealso:: :func:`numpy.clip`

    Notes
    -----
    When `a_min` is greater than `a_max`, `clip` returns an
    array in which all values are equal to `a_max`.
    r$   )r   
_is_fusing_call_ufunc_mathclip)r!   a_mina_maxr$   r   r   r   rP      s
   rP   	cupy_cbrtze->ezf->fzd->dzout0 = cbrt(in0)zJElementwise cube root function.

    .. seealso:: :data:`numpy.cbrt`

    )doccupy_squarezb->bzB->Bzh->hzH->Hzi->izI->Izl->lzL->Lzq->qzQ->QzF->FzD->Dzout0 = in0 * in0zIElementwise square function.

    .. seealso:: :data:`numpy.square`

    	cupy_fabszout0 = abs(in0)zuCalculates absolute values element-wise.
    Only real values are handled.

    .. seealso:: :data:`numpy.fabs`

    zout0 = in0 > 0z
out0 = in0 / abs(in0)
	cupy_signzout0 = (in0 > 0) - (in0 < 0)zElementwise sign function.

    It returns -1, 0, or 1 depending on the sign of the input.

    .. seealso:: :data:`numpy.sign`

    z
if (in0.real() == 0) {
  out0 = (in0.imag() > 0) - (in0.imag() < 0);
} else {
  out0 = (in0.real() > 0) - (in0.real() < 0);
}
cupy_heavisideee->er'   ff->fdd->dz
    if (isnan(in0)) {
        out0 = in0;
    } else if (in0 == 0) {
        out0 = in1;
    } else {
        out0 = (in0 > 0);
    }
    zTCompute the Heaviside step function.

    .. seealso:: :data:`numpy.heaviside`

    z;
#ifndef NAN
#define NAN __int_as_float(0x7fffffff)
#endif
zLout0 = (isnan(in0) | isnan(in1)) ? out0_type(NAN) : out0_type(max(in0, in1))cupy_maximumz??->?zbb->bzBB->Bzhh->hzHH->Hzii->izII->Izll->lzLL->Lzqq->qzQQ->Q)codezFF->FzDD->Dzout0 = max(in0, in1)zTakes the maximum of two arrays elementwise.

    If NaN appears, it returns the NaN.

    .. seealso:: :data:`numpy.maximum`

    )OP_MAXr
   r
   max)preamblerT   cutensor_op
scatter_opzLout0 = (isnan(in0) | isnan(in1)) ? out0_type(NAN) : out0_type(min(in0, in1))cupy_minimumzout0 = min(in0, in1)zTakes the minimum of two arrays elementwise.

    If NaN appears, it returns the NaN.

    .. seealso:: :data:`numpy.minimum`

    )OP_MINr
   r
   min	cupy_fmax)rY   out0 = fmax(in0, in1)rg   )rZ   rg   )r[   rg   zTakes the maximum of two arrays elementwise.

    If NaN appears, it returns the other operand.

    .. seealso:: :data:`numpy.fmax`

    	cupy_fmin)rY   out0 = fmin(in0, in1)ri   )rZ   ri   )r[   ri   zTakes the minimum of two arrays elementwise.

    If NaN appears, it returns the other operand.

    .. seealso:: :data:`numpy.fmin`

    a  
template <class T>
__device__ T nan_to_num(T x, T nan, T posinf, T neginf) {
    if (isnan(x))
        return nan;
    if (isinf(x))
        return x > 0 ? posinf : neginf;
    return x;
}

template <class T>
__device__ complex<T> nan_to_num(complex<T> x, T nan, T posinf, T neginf) {
    T re = nan_to_num(x.real(), nan, posinf, neginf);
    T im = nan_to_num(x.imag(), nan, posinf, neginf);
    return complex<T>(re, im);
}
cupy_nan_to_num_z????->?zbbbb->bzBBBB->Bzhhhh->hzHHHH->Hziiii->izIIII->Izllll->lzLLLL->Lzqqqq->qzQQQQ->Q)zeeee->e%out0 = nan_to_num(in0, in1, in2, in3)   r
   rk   )zffff->frk   )zdddd->drk   )zFfff->Frk   )zDddd->Drk   z
out0 = in0zQElementwise nan_to_num function.

    .. seealso:: :func:`numpy.nan_to_num`

    )r`   rT   c                 C  sb   |j dv rt|j  }|j dvrd} n| d u r+|d ur+|r%t|jnt|j} t| |S )NFDefdr   )charr/   r   lowerfinfore   r_   
asanyarray)r   r   negr   r   r   _check_nan_inf  s   

rt   T        c                 C  sf   t | tjstd| }n	|rt| n| }|j}t||}t||d}t||d}t| ||||dS )zReplace NaN with zero and infinity with large finite numbers (default
    behaviour) or with the numbers defined by the user using the `nan`,
    `posinf` and/or `neginf` keywords.

    .. seealso:: :func:`numpy.nan_to_num`

    r   FTrL   )
isinstancer/   ndarrayr   
empty_liker   rt   _nan_to_num)r   r-   nanposinfneginfr$   r   r   r   r   
nan_to_num  s   
r}   d   c                 C  sT   t | jjtjs
| S |dkrt| jj}|j| }tt	| j
|k r(| j} | S )a#  If input is complex with all imaginary parts close to zero, return real
    parts.
    "Close to zero" is defined as `tol` * (machine epsilon of the type for
    `a`).

    .. warning::

            This function may synchronize the device.

    .. seealso:: :func:`numpy.real_if_close`
    r
   )
issubclassr   typer/   complexfloatingnumpyrq   epsallabsoluteimagreal)r!   tolfr   r   r   real_if_close  s   
r   )for_each_devicec                 C  sL   d}|d7 }d}| rd}nd}|d7 }|t jjj7 }d}t j|||d|d	S )
Nzraw V x, raw U idx, z2raw W fx, raw Y fy, U len, raw Y left, raw Y rightzZ yztypedef double real_t;
ztypedef Z real_t;
ztypedef Z value_t;
a  
        U x_idx = idx[i] - 1;

        if ( _isnan<V>(x[i]) ) { y = x[i]; }
        else if (x_idx < 0) { y = left[0]; }
        else if (x[i] == fx[len - 1]) {
            // searchsorted cannot handle both of the boundary points,
            // so we must detect and correct ourselves...
            y = fy[len - 1];
        }
        else if (x_idx >= len - 1) { y = right[0]; }
        else {
            const Z slope = (value_t)(fy[x_idx+1] - fy[x_idx]) / \
                            ((real_t)fx[x_idx+1] - (real_t)fx[x_idx]);
            Z out = slope * ((real_t)x[i] - (real_t)fx[x_idx]) \
                    + (value_t)fy[x_idx];
            if (_isnan<Z>(out)) {
                out = slope * ((real_t)x[i] - (real_t)fx[x_idx+1]) \
                      + (value_t)fy[x_idx+1];
                if (_isnan<Z>(out) && (fy[x_idx] == fy[x_idx+1])) {
                    out = fy[x_idx];
                }
            }
            y = out;
        }
    cupy_interp)r`   )r/   _sortingsearch	_preambleElementwiseKernel)
is_complex	in_params
out_paramsr`   r]   r   r   r   _get_interp_kernel  s   
r   c              	   C  s  |j dks
|j dkrtd|j|jkrtd|jdkr!td| jjs)tdt| |}t|tj	s?t
d|tj	|dur|dkrKtd	t|}d}d}| tj	} |tj	}| |; } ||; }t|}|| }|| }t|d
d | ||dd | f}t|d
d ||dd f}|jjsJ |jjsJ |jjdkrdnd}tj| j|d}	tj|| dd}
|du r|d nt||j}|du r|d
 nt||j}t|dk}|| |
|||j|||	 |	S )a   One-dimensional linear interpolation.

    Args:
        x (cupy.ndarray): a 1D array of points on which the interpolation
            is performed.
        xp (cupy.ndarray): a 1D array of points on which the function values
            (``fp``) are known.
        fp (cupy.ndarray): a 1D array containing the function values at the
            the points ``xp``.
        left (float or complex): value to return if ``x < xp[0]``. Default is
            ``fp[0]``.
        right (float or complex): value to return if ``x > xp[-1]``. Default is
            ``fp[-1]``.
        period (None or float): a period for the x-coordinates. Parameters
            ``left`` and ``right`` are ignored if ``period`` is specified.
            Default is ``None``.

    Returns:
        cupy.ndarray: The interpolated values, same shape as ``x``.

    .. note::
        This function may synchronize if ``left`` or ``right`` is not already
        on the device.

    .. seealso:: :func:`numpy.interp`

    r
   zxp and fp must be 1D arraysz$fp and xp are not of the same lengthr   zarray of sample points is emptyz-Non-C-contiguous x is currently not supportedzACannot cast array data from {} to {} according to the rule 'safe'Nzperiod must be a non-zero valuer&   r(   Dd)r   right)side)r   r   r   flagsc_contiguousr   r/   common_typecan_castfloat64	TypeErrorformatabsr8   argsortconcatenater   r   emptyr.   searchsortedarrayr   )r   xpfpleftr   periodx_dtypeasort_xp	out_dtyperK   idxkernr   r   r   interp  sL   


( r   )r   )N)Tru   NN)r~   )NNN)2
__future__r   r/   r   
cupy._corer   rO   r   cupy.libr   
cupy._utilr   r   ReductionKernelrH   r   r   r%   r    r   rP   sqrt
sqrt_fixedcreate_ufunccbrtsquarer   fabs_unsigned_sign_complex_signsign_legacy_complex_sign_legacy_sign	heaviside_float_preamble_float_maximummaximum_float_minimumminimumfmaxfmin_nan_to_num_preamblery   rt   r}   r   _utilmemoizer   r   r   r   r   r   <module>   s   
	#'

"		




	




*