# --------------------------------------------------------------------------
# ⚠️ WARNING - AUTO-GENERATED CODE - DO NOT EDIT ⚠️
# ⚙️ Generated by 'python -m opgen'
# --------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
# pylint: disable=W0221,W0222,R0901,W0237
# mypy: disable-error-code=override
# ruff: noqa: D402
# --------------------------------------------------------------------------

from __future__ import annotations

from typing import Optional, Sequence, Tuple, TypeVar, Union

from onnx import SparseTensorProto, TensorProto
from onnx.defs import get_schema
from typing_extensions import TypeAlias

from onnxscript.onnx_opset._impl.opset11 import Opset11
from onnxscript.onnx_types import (
    BOOL,
    COMPLEX64,
    COMPLEX128,
    DOUBLE,
    FLOAT,
    FLOAT16,
    INT8,
    INT16,
    INT32,
    INT64,
    STRING,
    UINT8,
    UINT16,
    UINT32,
    UINT64,
)
from onnxscript.values import Op, Opset


class Opset12(Opset11):
    def __new__(cls):
        return Opset.__new__(cls, "", 12)

    T_ArgMax = TypeVar(
        "T_ArgMax",
        DOUBLE,
        FLOAT,
        FLOAT16,
        INT16,
        INT32,
        INT64,
        INT8,
        UINT16,
        UINT32,
        UINT64,
        UINT8,
    )

    def ArgMax(
        self,
        data: T_ArgMax,
        *,
        axis: int = 0,
        keepdims: int = 1,
        select_last_index: int = 0,
    ) -> INT64:
        r"""[🌐 ArgMax(12)](https://onnx.ai/onnx/operators/onnx__ArgMax.html#argmax-12 "Online Documentation")


        Computes the indices of the max elements of the input tensor's element along the
        provided axis. The resulting tensor has the same rank as the input if keepdims equals 1.
        If keepdims equal 0, then the resulting tensor has the reduced dimension pruned.
        If select_last_index is True (default False), the index of the last occurrence of the max
        is selected if the max appears more than once in the input. Otherwise the index of the
        first occurrence is selected.
        The type of the output tensor is integer.

        Args:
            data: An input tensor.

            axis: The axis in which to compute the arg indices. Accepted range is [-r,
                r-1] where r = rank(data).

            keepdims: Keep the reduced dimension or not, default 1 means keep reduced
                dimension.

            select_last_index: Whether to select the last index or the first index if
                the {name} appears in multiple indices, default is False (first index).
        """

        schema = get_schema("ArgMax", 12, "")
        op = Op(self, "ArgMax", schema)
        return op(
            *self._prepare_inputs(schema, data),
            axis=axis,
            keepdims=keepdims,
            select_last_index=select_last_index,
        )

    T_ArgMin = TypeVar(
        "T_ArgMin",
        DOUBLE,
        FLOAT,
        FLOAT16,
        INT16,
        INT32,
        INT64,
        INT8,
        UINT16,
        UINT32,
        UINT64,
        UINT8,
    )

    def ArgMin(
        self,
        data: T_ArgMin,
        *,
        axis: int = 0,
        keepdims: int = 1,
        select_last_index: int = 0,
    ) -> INT64:
        r"""[🌐 ArgMin(12)](https://onnx.ai/onnx/operators/onnx__ArgMin.html#argmin-12 "Online Documentation")


        Computes the indices of the min elements of the input tensor's element along the
        provided axis. The resulting tensor has the same rank as the input if keepdims equals 1.
        If keepdims equal 0, then the resulting tensor has the reduced dimension pruned.
        If select_last_index is True (default False), the index of the last occurrence of the min
        is selected if the min appears more than once in the input. Otherwise the index of the
        first occurrence is selected.
        The type of the output tensor is integer.

        Args:
            data: An input tensor.

            axis: The axis in which to compute the arg indices. Accepted range is [-r,
                r-1] where r = rank(data).

            keepdims: Keep the reduced dimension or not, default 1 means keep reduced
                dimension.

            select_last_index: Whether to select the last index or the first index if
                the {name} appears in multiple indices, default is False (first index).
        """

        schema = get_schema("ArgMin", 12, "")
        op = Op(self, "ArgMin", schema)
        return op(
            *self._prepare_inputs(schema, data),
            axis=axis,
            keepdims=keepdims,
            select_last_index=select_last_index,
        )

    T_Celu: TypeAlias = FLOAT

    def Celu(self, X: T_Celu, *, alpha: float = 1.0) -> T_Celu:
        r"""[🌐 Celu(12)](https://onnx.ai/onnx/operators/onnx__Celu.html#celu-12 "Online Documentation")


        Continuously Differentiable Exponential Linear Units:
        Perform the linear unit element-wise on the input tensor X
        using formula:

        ::

            max(0,x) + min(0,alpha*(exp(x/alpha)-1))


        Args:
            X: (differentiable) Input tensor

            alpha: The Alpha value in Celu formula which control the shape of the unit.
                The default value is 1.0.
        """

        schema = get_schema("Celu", 12, "")
        op = Op(self, "Celu", schema)
        return op(*self._prepare_inputs(schema, X), alpha=alpha)

    T_Clip = TypeVar(
        "T_Clip",
        DOUBLE,
        FLOAT,
        FLOAT16,
        INT16,
        INT32,
        INT64,
        INT8,
        UINT16,
        UINT32,
        UINT64,
        UINT8,
    )

    def Clip(
        self, input: T_Clip, min: Optional[T_Clip] = None, max: Optional[T_Clip] = None
    ) -> T_Clip:
        r"""[🌐 Clip(12)](https://onnx.ai/onnx/operators/onnx__Clip.html#clip-12 "Online Documentation")


        Clip operator limits the given input within an interval. The interval is
        specified by the inputs 'min' and 'max'. They default to
        numeric_limits::lowest() and numeric_limits::max(), respectively.


        Args:
            input: Input tensor whose elements to be clipped

            min: (optional) Minimum value, under which element is replaced by min. It
                must be a scalar(tensor of empty shape).

            max: (optional) Maximum value, above which element is replaced by max. It
                must be a scalar(tensor of empty shape).
        """

        schema = get_schema("Clip", 12, "")
        op = Op(self, "Clip", schema)
        return op(*self._prepare_inputs(schema, input, min, max))

    T_Constant: TypeAlias = Union[
        BOOL,
        COMPLEX128,
        COMPLEX64,
        DOUBLE,
        FLOAT,
        FLOAT16,
        INT16,
        INT32,
        INT64,
        INT8,
        STRING,
        UINT16,
        UINT32,
        UINT64,
        UINT8,
    ]

    def Constant(
        self,
        *,
        sparse_value: Optional[SparseTensorProto] = None,
        value: Optional[TensorProto] = None,
        value_float: Optional[float] = None,
        value_floats: Optional[Sequence[float]] = None,
        value_int: Optional[int] = None,
        value_ints: Optional[Sequence[int]] = None,
        value_string: Optional[str] = None,
        value_strings: Optional[Sequence[str]] = None,
    ) -> T_Constant:
        r"""[🌐 Constant(12)](https://onnx.ai/onnx/operators/onnx__Constant.html#constant-12 "Online Documentation")


        This operator produces a constant tensor. Exactly one of the provided attributes, either value, sparse_value,
        or value_* must be specified.


        Args:
            sparse_value: The value for the elements of the output tensor in sparse
                format.

            value: The value for the elements of the output tensor.

            value_float: The value for the sole element for the scalar, float32, output
                tensor.

            value_floats: The values for the elements for the 1D, float32, output
                tensor.

            value_int: The value for the sole element for the scalar, int64, output
                tensor.

            value_ints: The values for the elements for the 1D, int64, output tensor.

            value_string: The value for the sole element for the scalar, UTF-8 string,
                output tensor.

            value_strings: The values for the elements for the 1D, UTF-8 string, output
                tensor.
        """

        schema = get_schema("Constant", 12, "")
        op = Op(self, "Constant", schema)
        return op(
            sparse_value=sparse_value,
            value=value,
            value_float=value_float,
            value_floats=value_floats,
            value_int=value_int,
            value_ints=value_ints,
            value_string=value_string,
            value_strings=value_strings,
        )

    T_Dropout = TypeVar("T_Dropout", DOUBLE, FLOAT, FLOAT16)

    T1_Dropout = TypeVar("T1_Dropout", DOUBLE, FLOAT, FLOAT16)

    T2_Dropout: TypeAlias = BOOL

    def Dropout(
        self,
        data: T_Dropout,
        ratio: Optional[T1_Dropout] = None,
        training_mode: Optional[T2_Dropout] = None,
        *,
        seed: Optional[int] = None,
    ) -> Tuple[T_Dropout, T2_Dropout]:
        r"""[🌐 Dropout(12)](https://onnx.ai/onnx/operators/onnx__Dropout.html#dropout-12 "Online Documentation")


        Dropout takes an input floating-point tensor, an optional input ratio (floating-point scalar) and an optional input training_mode (boolean scalar). It produces two tensor outputs,
        output (floating-point tensor) and mask (optional `Tensor<bool>`). If `training_mode` is true then the output Y will be a random dropout;
        Note that this Dropout scales the masked input data by the following equation, so to convert the trained model into inference mode,
        the user can simply not pass `training_mode` input or set it to false.
        ::

            output = scale * data * mask,


        where
        ::

            scale = 1. / (1. - ratio).


        This operator has **optional** inputs/outputs. See `ONNX <https://github.com/onnx/onnx/blob/master/docs/IR.md>`_ for more details about the representation of optional arguments. An empty string may be used in the place of an actual argument's name to indicate a missing argument. Trailing optional arguments (those not followed by an argument that is present) may also be simply omitted.


        Args:
            data: The input data as Tensor.

            ratio: (optional) The ratio of random dropout, with value in [0, 1). If this
                input was not set, or if it was set to 0, the output would be a simple
                copy of the input. If it's non-zero, output will be a random dropout of
                the scaled input, which is typically the case during training. It is an
                optional value, if not specified it will default to 0.5.

            training_mode: (optional) If set to true then it indicates dropout is being
                used for training. It is an optional value hence unless specified
                explicitly, it is false. If it is false, ratio is ignored and the
                operation mimics inference mode where nothing will be dropped from the
                input data and if mask is requested as output it will contain all ones.

            seed: (Optional) Seed to the random generator, if not specified we will auto
                generate one.
        """

        schema = get_schema("Dropout", 12, "")
        op = Op(self, "Dropout", schema)
        return op(*self._prepare_inputs(schema, data, ratio, training_mode), seed=seed)

    T_Einsum = TypeVar(
        "T_Einsum",
        DOUBLE,
        FLOAT,
        FLOAT16,
        INT16,
        INT32,
        INT64,
        INT8,
        UINT16,
        UINT32,
        UINT64,
        UINT8,
    )

    def Einsum(self, *Inputs: T_Einsum, equation: str) -> T_Einsum:
        r"""[🌐 Einsum(12)](https://onnx.ai/onnx/operators/onnx__Einsum.html#einsum-12 "Online Documentation")


        An einsum of the form `term1, term2 -> output-term` produces an output tensor using the following equation

        ::

            output[output-term] = reduce-sum( input1[term1] * input2[term2] )


        where the reduce-sum performs a summation over all the indices occurring in the input terms (term1, term2)
        that do not occur in the output-term.

        The Einsum operator evaluates algebraic tensor operations on a sequence of tensors, using the Einstein summation
        convention. The equation string contains a comma-separated sequence of lower case letters. Each term corresponds to
        an operand tensor, and the characters within the terms correspond to operands dimensions.

        This sequence may be followed by "->" to separate the left and right hand side of the equation.
        If the equation contains "->" followed by the right-hand side, the explicit (not classical) form of the Einstein
        summation is performed, and the right-hand side indices indicate output tensor dimensions. In other cases,
        output indices are (implicitly) set to the alphabetically sorted sequence of indices appearing exactly once in the
        equation.

        When a dimension character is repeated in the left-hand side, it represents summation along the dimension.

        The equation may contain ellipsis ("...") to enable broadcasting. Ellipsis must indicate a fixed number of dimensions.
        Specifically, every occurrence of ellipsis in the equation must represent the same number of dimensions.
        The right-hand side may contain exactly one ellipsis. In implicit mode, the ellipsis dimensions are set to the
        beginning of the output. The equation string may contain space (U+0020) character.


        Args:
            Inputs: (variadic, differentiable) Operands

            equation: Einsum expression string.
        """

        schema = get_schema("Einsum", 12, "")
        op = Op(self, "Einsum", schema)
        return op(*self._prepare_inputs(schema, *Inputs), equation=equation)

    T_GatherND = TypeVar(
        "T_GatherND",
        BOOL,
        COMPLEX128,
        COMPLEX64,
        DOUBLE,
        FLOAT,
        FLOAT16,
        INT16,
        INT32,
        INT64,
        INT8,
        STRING,
        UINT16,
        UINT32,
        UINT64,
        UINT8,
    )

    def GatherND(self, data: T_GatherND, indices: INT64, *, batch_dims: int = 0) -> T_GatherND:
        r"""[🌐 GatherND(12)](https://onnx.ai/onnx/operators/onnx__GatherND.html#gathernd-12 "Online Documentation")


        Given `data` tensor of rank `r` >= 1, `indices` tensor of rank `q` >= 1, and `batch_dims` integer `b`, this operator gathers
        slices of `data` into an output tensor of rank `q + r - indices_shape[-1] - 1 - b`.

        `indices` is an q-dimensional integer tensor, best thought of as a `(q-1)`-dimensional tensor of index-tuples into `data`,
        where each element defines a slice of `data`

        `batch_dims` (denoted as `b`) is an integer indicating the number of batch dimensions, i.e the leading `b` number of dimensions of
        `data` tensor and `indices` are representing the batches, and the gather starts from the `b+1` dimension.

        Some salient points about the inputs' rank and shape:

        1) r >= 1 and q >= 1 are to be honored. There is no dependency condition to be met between ranks `r` and `q`

        2) The first `b` dimensions of the shape of `indices` tensor and `data` tensor must be equal.

        3) b < min(q, r) is to be honored.

        4) The `indices_shape[-1]` should have a value between 1 (inclusive) and rank `r-b` (inclusive)

        5) All values in `indices` are expected to be within bounds [-s, s-1] along axis of size `s` (i.e.) `-data_shape[i] <= indices[...,i] <= data_shape[i] - 1`.
           It is an error if any of the index values are out of bounds.

        The output is computed as follows:

        The output tensor is obtained by mapping each index-tuple in the `indices` tensor to the corresponding slice of the input `data`.

        1) If `indices_shape[-1] > r-b` => error condition

        2) If `indices_shape[-1] == r-b`, since the rank of `indices` is `q`, `indices` can be thought of as `N` `(q-b-1)`-dimensional tensors
           containing 1-D tensors of dimension `r-b`, where `N` is an integer equals to the product of 1 and all the elements in the batch dimensions
           of the indices_shape. Let us think of each such `r-b` ranked tensor as `indices_slice`. Each *scalar value* corresponding to `data[0:b-1,indices_slice]`
           is filled into the corresponding location of the `(q-b-1)`-dimensional tensor to form the `output` tensor (Example 1 below)

        3) If `indices_shape[-1] < r-b`, since the rank of `indices` is `q`, `indices` can be thought of as `N` `(q-b-1)`-dimensional tensor
           containing 1-D tensors of dimension `< r-b`. Let us think of each such tensors as `indices_slice`. Each *tensor slice* corresponding
           to `data[0:b-1, indices_slice , :]` is filled into the corresponding location of the `(q-b-1)`-dimensional tensor
           to form the `output` tensor (Examples 2, 3, 4 and 5 below)

        This operator is the inverse of `ScatterND`.

        `Example 1`

          batch_dims = 0

          data    = [[0,1],[2,3]]   # data_shape = [2, 2]

          indices = [[0,0],[1,1]]   # indices_shape = [2, 2]

          output  = [0,3]           # output_shape = [2]

        `Example 2`

          batch_dims = 0

          data    = [[0,1],[2,3]]  # data_shape = [2, 2]

          indices = [[1],[0]]      # indices_shape = [2, 1]

          output  = [[2,3],[0,1]]  # output_shape = [2, 2]

        `Example 3`

          batch_dims = 0

          data    = [[[0,1],[2,3]],[[4,5],[6,7]]] # data_shape = [2, 2, 2]

          indices = [[0,1],[1,0]]                 # indices_shape = [2, 2]

          output  = [[2,3],[4,5]]                 # output_shape = [2, 2]

        `Example 4`

          batch_dims = 0

          data    = [[[0,1],[2,3]],[[4,5],[6,7]]] # data_shape = [2, 2, 2]

          indices = [[[0,1]],[[1,0]]]             # indices_shape = [2, 1, 2]

          output  = [[[2,3]],[[4,5]]]             # output_shape = [2, 1, 2]

        `Example 5`

          batch_dims = 1

          data    = [[[0,1],[2,3]],[[4,5],[6,7]]] # data_shape = [2, 2, 2]

          indices = [[1],[0]]             # indices_shape = [2, 1]

          output  = [[2,3],[4,5]]             # output_shape = [2, 2]


        Args:
            data: Tensor of rank r >= 1.

            indices: Tensor of rank q >= 1. All index values are expected to be within
                bounds [-s, s-1] along axis of size s. It is an error if any of the
                index values are out of bounds.

            batch_dims: The number of batch dimensions. The gather of indexing starts
                from dimension of data[batch_dims:]
        """

        schema = get_schema("GatherND", 12, "")
        op = Op(self, "GatherND", schema)
        return op(*self._prepare_inputs(schema, data, indices), batch_dims=batch_dims)

    T_GreaterOrEqual = TypeVar(
        "T_GreaterOrEqual",
        DOUBLE,
        FLOAT,
        FLOAT16,
        INT16,
        INT32,
        INT64,
        INT8,
        UINT16,
        UINT32,
        UINT64,
        UINT8,
    )

    T1_GreaterOrEqual: TypeAlias = BOOL

    def GreaterOrEqual(self, A: T_GreaterOrEqual, B: T_GreaterOrEqual) -> T1_GreaterOrEqual:
        r"""[🌐 GreaterOrEqual(12)](https://onnx.ai/onnx/operators/onnx__GreaterOrEqual.html#greaterorequal-12 "Online Documentation")


        Returns the tensor resulted from performing the `greater_equal` logical operation
        elementwise on the input tensors `A` and `B` (with Numpy-style broadcasting support).

        This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check `Broadcasting in ONNX <https://github.com/onnx/onnx/blob/master/docs/Broadcasting.md>`_.


        Args:
            A: (non-differentiable) First input operand for the logical operator.

            B: (non-differentiable) Second input operand for the logical operator.
        """

        schema = get_schema("GreaterOrEqual", 12, "")
        op = Op(self, "GreaterOrEqual", schema)
        return op(*self._prepare_inputs(schema, A, B))

    T_LessOrEqual = TypeVar(
        "T_LessOrEqual",
        DOUBLE,
        FLOAT,
        FLOAT16,
        INT16,
        INT32,
        INT64,
        INT8,
        UINT16,
        UINT32,
        UINT64,
        UINT8,
    )

    T1_LessOrEqual: TypeAlias = BOOL

    def LessOrEqual(self, A: T_LessOrEqual, B: T_LessOrEqual) -> T1_LessOrEqual:
        r"""[🌐 LessOrEqual(12)](https://onnx.ai/onnx/operators/onnx__LessOrEqual.html#lessorequal-12 "Online Documentation")


        Returns the tensor resulted from performing the `less_equal` logical operation
        elementwise on the input tensors `A` and `B` (with Numpy-style broadcasting support).

        This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check `Broadcasting in ONNX <https://github.com/onnx/onnx/blob/master/docs/Broadcasting.md>`_.


        Args:
            A: (non-differentiable) First input operand for the logical operator.

            B: (non-differentiable) Second input operand for the logical operator.
        """

        schema = get_schema("LessOrEqual", 12, "")
        op = Op(self, "LessOrEqual", schema)
        return op(*self._prepare_inputs(schema, A, B))

    T_Max = TypeVar(
        "T_Max",
        DOUBLE,
        FLOAT,
        FLOAT16,
        INT16,
        INT32,
        INT64,
        INT8,
        UINT16,
        UINT32,
        UINT64,
        UINT8,
    )

    def Max(self, *data_0: T_Max) -> T_Max:
        r"""[🌐 Max(12)](https://onnx.ai/onnx/operators/onnx__Max.html#max-12 "Online Documentation")


        Element-wise max of each of the input tensors (with Numpy-style broadcasting support).
        All inputs and outputs must have the same data type.
        This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check `Broadcasting in ONNX <https://github.com/onnx/onnx/blob/master/docs/Broadcasting.md>`_.


        Args:
            data_0: (variadic) List of tensors for max.
        """

        schema = get_schema("Max", 12, "")
        op = Op(self, "Max", schema)
        return op(*self._prepare_inputs(schema, *data_0))

    T_MaxPool = TypeVar("T_MaxPool", DOUBLE, FLOAT, FLOAT16, INT8, UINT8)

    I_MaxPool: TypeAlias = INT64

    def MaxPool(
        self,
        X: T_MaxPool,
        *,
        auto_pad: str = "NOTSET",
        ceil_mode: int = 0,
        dilations: Optional[Sequence[int]] = None,
        kernel_shape: Sequence[int],
        pads: Optional[Sequence[int]] = None,
        storage_order: int = 0,
        strides: Optional[Sequence[int]] = None,
    ) -> Tuple[T_MaxPool, I_MaxPool]:
        r"""[🌐 MaxPool(12)](https://onnx.ai/onnx/operators/onnx__MaxPool.html#maxpool-12 "Online Documentation")


         MaxPool consumes an input tensor X and applies max pooling across
         the tensor according to kernel sizes, stride sizes, and pad lengths.
         max pooling consisting of computing the max on all values of a
         subset of the input tensor according to the kernel size and downsampling the
         data into the output tensor Y for further processing. The output spatial shape is calculated differently
         depending on whether explicit padding is used, where pads is employed, or auto padding is used, where auto_pad is utilized.
         With explicit padding (https://pytorch.org/docs/stable/generated/torch.nn.MaxPool2d.html?highlight=maxpool#torch.nn.MaxPool2d):
         ```
         output_spatial_shape[i] = floor((input_spatial_shape[i] + pad_shape[i] - dilation[i] * (kernel_shape[i] - 1) - 1) / strides_spatial_shape[i] + 1)
         ```
         or
         ```
         output_spatial_shape[i] = ceil((input_spatial_shape[i] + pad_shape[i] - dilation[i] * (kernel_shape[i] - 1) - 1) / strides_spatial_shape[i] + 1)
         ```
         if ceil_mode is enabled. `pad_shape[i]` is the sum of pads along axis `i`.

         `auto_pad` is a DEPRECATED attribute. If you are using them currently, the output spatial shape will be following when ceil_mode is enabled:
         ```
         VALID: output_spatial_shape[i] = ceil((input_spatial_shape[i] - ((kernel_spatial_shape[i] - 1) * dilations[i] + 1) + 1) / strides_spatial_shape[i])
         SAME_UPPER or SAME_LOWER: output_spatial_shape[i] = ceil(input_spatial_shape[i] / strides_spatial_shape[i])
         ```
         or when ceil_mode is disabled (https://www.tensorflow.org/api_docs/python/tf/keras/layers/AveragePooling2D):
         ```
         VALID: output_spatial_shape[i] = floor((input_spatial_shape[i] - ((kernel_spatial_shape[i] - 1) * dilations[i] + 1)) / strides_spatial_shape[i]) + 1
         SAME_UPPER or SAME_LOWER: output_spatial_shape[i] = floor((input_spatial_shape[i] - 1) / strides_spatial_shape[i]) + 1
         ```
         And pad shape will be following if `SAME_UPPER` or `SAME_LOWER`:
         ```
         pad_shape[i] = (output_spatial_shape[i] - 1) * strides_spatial_shape[i] + ((kernel_spatial_shape[i] - 1) * dilations[i] + 1) - input_spatial_shape[i]
         ```
         The output of each pooling window is maximum number of elements exclude pad.


        Args:
            X: (differentiable) Input data tensor from the previous operator; dimensions
                for image case are (N x C x H x W), where N is the batch size, C is the
                number of channels, and H and W are the height and the width of the
                data. For non image case, the dimensions are in the form of (N x C x D1
                x D2 ... Dn), where N is the batch size. Optionally, if dimension
                denotation is in effect, the operation expects the input data tensor to
                arrive with the dimension denotation of [DATA_BATCH, DATA_CHANNEL,
                DATA_FEATURE, DATA_FEATURE ...].

            auto_pad: auto_pad must be either NOTSET, SAME_UPPER, SAME_LOWER or VALID.
                Where default value is NOTSET, which means explicit padding is used.
                SAME_UPPER or SAME_LOWER mean pad the input so that `output_shape[i] =
                ceil(input_shape[i] / strides[i])` for each axis `i`. The padding is
                split between the two sides equally or almost equally (depending on
                whether it is even or odd). In case the padding is an odd number, the
                extra padding is added at the end for SAME_UPPER and at the beginning
                for SAME_LOWER.

            ceil_mode: Whether to use ceil or floor (default) to compute the output
                shape.

            dilations: Dilation value along each spatial axis of filter. If not present,
                the dilation defaults to 1 along each spatial axis.

            kernel_shape: The size of the kernel along each axis.

            pads: Padding for the beginning and ending along each spatial axis, it can
                take any value greater than or equal to 0. The value represent the
                number of pixels added to the beginning and end part of the
                corresponding axis. `pads` format should be as follow [x1_begin,
                x2_begin...x1_end, x2_end,...], where xi_begin the number of pixels
                added at the beginning of axis `i` and xi_end, the number of pixels
                added at the end of axis `i`. This attribute cannot be used
                simultaneously with auto_pad attribute. If not present, the padding
                defaults to 0 along start and end of each spatial axis.

            storage_order: The storage order of the tensor. 0 is row major, and 1 is
                column major. This attribute is used only to convert an n-tuple index
                value into a single integer value for producing the second output.

            strides: Stride along each spatial axis. If not present, the stride defaults
                to 1 along each spatial axis.
        """

        schema = get_schema("MaxPool", 12, "")
        op = Op(self, "MaxPool", schema)
        return op(
            *self._prepare_inputs(schema, X),
            auto_pad=auto_pad,
            ceil_mode=ceil_mode,
            dilations=dilations,
            kernel_shape=kernel_shape,
            pads=pads,
            storage_order=storage_order,
            strides=strides,
        )

    T_Min = TypeVar(
        "T_Min",
        DOUBLE,
        FLOAT,
        FLOAT16,
        INT16,
        INT32,
        INT64,
        INT8,
        UINT16,
        UINT32,
        UINT64,
        UINT8,
    )

    def Min(self, *data_0: T_Min) -> T_Min:
        r"""[🌐 Min(12)](https://onnx.ai/onnx/operators/onnx__Min.html#min-12 "Online Documentation")


        Element-wise min of each of the input tensors (with Numpy-style broadcasting support).
        All inputs and outputs must have the same data type.
        This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check `Broadcasting in ONNX <https://github.com/onnx/onnx/blob/master/docs/Broadcasting.md>`_.


        Args:
            data_0: (variadic) List of tensors for min.
        """

        schema = get_schema("Min", 12, "")
        op = Op(self, "Min", schema)
        return op(*self._prepare_inputs(schema, *data_0))

    T_NegativeLogLikelihoodLoss = TypeVar(
        "T_NegativeLogLikelihoodLoss", DOUBLE, FLOAT, FLOAT16
    )

    Tind_NegativeLogLikelihoodLoss = TypeVar("Tind_NegativeLogLikelihoodLoss", INT32, INT64)

    def NegativeLogLikelihoodLoss(
        self,
        input: T_NegativeLogLikelihoodLoss,
        target: Tind_NegativeLogLikelihoodLoss,
        weight: Optional[T_NegativeLogLikelihoodLoss] = None,
        *,
        ignore_index: Optional[int] = None,
        reduction: str = "mean",
    ) -> T_NegativeLogLikelihoodLoss:
        r"""[🌐 NegativeLogLikelihoodLoss(12)](https://onnx.ai/onnx/operators/onnx__NegativeLogLikelihoodLoss.html#negativeloglikelihoodloss-12 "Online Documentation")


        A NegativeLogLikelihoodLoss operator computes (weighted) negative log likelihood loss.
        Its "input" tensor has the shape of (N, C, d1, d2, ..., dk) where k >= 0.
        The "input" tensor contains log-probabilities for input[n, :, d_1, d_2,..., d_k] being in a class of [0, C).
        The operator's "target" input tensor has the shape of (N, d1, d2, ..., dk). It encodes class labels (one of C classes)
        or it may contain a special value (indicated by an attribute ignore_index) for N x d1 x d2 x ... x dk samples.
        The loss value for input[n, :, d_1, d_2,...d_k] being classified as class c = target[n][d_1][d_2]...[d_k] is computed as:
            loss[n][d_1][d_2]...[d_k] = -input[n][c][d_1][d_2]...[d_k].
        When an optional "weight" is provided, the sample loss is calculated as:
            loss[n][d_1][d_2]...[d_k] = -input[n][c][d_1][d_2]...[d_k] * weight[c].
        loss is zero for the case when target-value equals ignore_index.

            loss[n][d_1][d_2]...[d_k] = 0, when target[n][d_1][d_2]...[d_k] = ignore_index
        If "reduction" attribute is set to "none", the operator's output will be the above loss with shape (N, d1, d2, ..., dk).
        If "reduction" attribute is set to "mean" (the default attribute value), the output loss is (weight) averaged:
            mean(loss), if "weight" is not provided,
        or if weight is provided,
            sum(loss) / sum(weight[target[n][d_1][d_2]...[d_k]]]), for all samples.
        If "reduction" attribute is set to "sum", the output is a scalar:
            sum(loss).
        See also https://pytorch.org/docs/stable/nn.html#torch.nn.NLLLoss.
        Example 1:
            // negative log likelihood loss, "none" reduction
            N, C, d1 = 2, 3, 2
            input = [[[1.0, 2.0], [2.0, 2.0], [3.0, 2.0]],
                     [[0.0, 1.0], [2.0, 2.0], [1.0, 2]]]
            target = [[2, 1], [0, 2]]
            loss = np.zeros((N, d1))
            for n in range(N):
                for d_1 in range(d1):
                    c = target[n][d_1]
                    loss[n][d_1] = -input[n][c][d_1]
            // print(loss)
            // [[-3. -2.]
            //  [-0. -2.]]
        Example 2:
            // weighted negative log likelihood loss, sum reduction
            N, C, d1 = 2, 3, 2
            input = [[[1.0, 2.0], [2.0, 2.0], [3.0, 2.0]],
                    [[0.0, 1.0], [2.0, 2.0], [1.0, 2]]]
            target = [[2, 1], [0, 2]]
            weight = [0.2, 0.3, 0.1]
            loss = np.zeros((N, d1))
            for n in range(N):
                for d_1 in range(d1):
                    c = target[n][d_1]
                    loss[n][d_1] = -input[n][c][d_1] * weight[c]
            loss = np.sum(loss)
            // print(loss)
            // -1.1
        Example 3:
            // weighted negative log likelihood loss, mean reduction
            N, C, d1 = 2, 3, 2
            input = [[[1.0, 2.0], [2.0, 2.0], [3.0, 2.0]],
                    [[0.0, 1.0], [2.0, 2.0], [1.0, 2]]]
            target = [[2, 1], [0, 2]]
            weight = [0.2, 0.3, 0.1]
            loss = np.zeros((N, d1))
            weight_total = 0
            for n in range(N):
                for d_1 in range(d1):
                    c = target[n][d_1]
                    loss[n][d_1] = -input[n][c][d_1] * weight[c]
                    weight_total = weight_total + weight[c]
            loss = np.sum(loss) / weight_total
            // print(loss)
            // -1.57


        Args:
            input: Input tensor of shape (N, C) or (N, C, d1, d2, ..., dk).

            target: Target tensor of shape (N) or (N, d1, d2, ..., dk). Target element
                value shall be in range of [0, C). If ignore_index is specified, it may
                have a value outside [0, C) and the target values should either be in
                the range [0, C) or have the value ignore_index.

            weight: (optional) Optional rescaling weight tensor. If given, it has to be
                a tensor of size C. Otherwise, it is treated as if having all ones.

            ignore_index: Specifies a target value that is ignored and does not
                contribute to the input gradient. It's an optional value.

            reduction: Type of reduction to apply to loss: none, sum, mean (default).
                'none': the output is the loss for each sample. 'sum': the output will
                be summed. 'mean': the sum of the output will be divided by the sum of
                applied weights.
        """

        schema = get_schema("NegativeLogLikelihoodLoss", 12, "")
        op = Op(self, "NegativeLogLikelihoodLoss", schema)
        return op(
            *self._prepare_inputs(schema, input, target, weight),
            ignore_index=ignore_index,
            reduction=reduction,
        )

    T_Pow = TypeVar("T_Pow", DOUBLE, FLOAT, FLOAT16, INT32, INT64)

    T1_Pow = TypeVar(
        "T1_Pow",
        DOUBLE,
        FLOAT,
        FLOAT16,
        INT16,
        INT32,
        INT64,
        INT8,
        UINT16,
        UINT32,
        UINT64,
        UINT8,
    )

    def Pow(self, X: T_Pow, Y: T1_Pow) -> T_Pow:
        r"""[🌐 Pow(12)](https://onnx.ai/onnx/operators/onnx__Pow.html#pow-12 "Online Documentation")


        Pow takes input data (Tensor<T>) and exponent Tensor, and
        produces one output data (Tensor<T>) where the function `f(x) = x^exponent`,
        is applied to the data tensor elementwise.
        This operator supports **multidirectional (i.e., Numpy-style) broadcasting**; for more details please check `Broadcasting in ONNX <https://github.com/onnx/onnx/blob/master/docs/Broadcasting.md>`_.

        Args:
            X: First operand, base of the exponent.

            Y: Second operand, power of the exponent.
        """

        schema = get_schema("Pow", 12, "")
        op = Op(self, "Pow", schema)
        return op(*self._prepare_inputs(schema, X, Y))

    T_ReduceMax = TypeVar(
        "T_ReduceMax", DOUBLE, FLOAT, FLOAT16, INT32, INT64, INT8, UINT32, UINT64, UINT8
    )

    def ReduceMax(
        self,
        data: T_ReduceMax,
        *,
        axes: Optional[Sequence[int]] = None,
        keepdims: int = 1,
    ) -> T_ReduceMax:
        r"""[🌐 ReduceMax(12)](https://onnx.ai/onnx/operators/onnx__ReduceMax.html#reducemax-12 "Online Documentation")


        Computes the max of the input tensor's element along the provided axes. The resulting
        tensor has the same rank as the input if keepdims equals 1. If keepdims equal 0, then
        the resulted tensor have the reduced dimension pruned.

        The above behavior is similar to numpy, with the exception that numpy defaults keepdims to
        False instead of True.

        Args:
            data: An input tensor.

            axes: A list of integers, along which to reduce. The default is to reduce
                over all the dimensions of the input tensor. Accepted range is [-r, r-1]
                where r = rank(data).

            keepdims: Keep the reduced dimension or not, default 1 means keep reduced
                dimension.
        """

        schema = get_schema("ReduceMax", 12, "")
        op = Op(self, "ReduceMax", schema)
        return op(*self._prepare_inputs(schema, data), axes=axes, keepdims=keepdims)

    T_ReduceMin = TypeVar(
        "T_ReduceMin", DOUBLE, FLOAT, FLOAT16, INT32, INT64, INT8, UINT32, UINT64, UINT8
    )

    def ReduceMin(
        self,
        data: T_ReduceMin,
        *,
        axes: Optional[Sequence[int]] = None,
        keepdims: int = 1,
    ) -> T_ReduceMin:
        r"""[🌐 ReduceMin(12)](https://onnx.ai/onnx/operators/onnx__ReduceMin.html#reducemin-12 "Online Documentation")


        Computes the min of the input tensor's element along the provided axes. The resulting
        tensor has the same rank as the input if keepdims equals 1. If keepdims equal 0, then
        the resulted tensor have the reduced dimension pruned.

        The above behavior is similar to numpy, with the exception that numpy defaults keepdims to
        False instead of True.

        Args:
            data: An input tensor.

            axes: A list of integers, along which to reduce. The default is to reduce
                over all the dimensions of the input tensor. Accepted range is [-r, r-1]
                where r = rank(data).

            keepdims: Keep the reduced dimension or not, default 1 means keep reduced
                dimension.
        """

        schema = get_schema("ReduceMin", 12, "")
        op = Op(self, "ReduceMin", schema)
        return op(*self._prepare_inputs(schema, data), axes=axes, keepdims=keepdims)

    T_SoftmaxCrossEntropyLoss = TypeVar("T_SoftmaxCrossEntropyLoss", DOUBLE, FLOAT, FLOAT16)

    Tind_SoftmaxCrossEntropyLoss = TypeVar("Tind_SoftmaxCrossEntropyLoss", INT32, INT64)

    def SoftmaxCrossEntropyLoss(
        self,
        scores: T_SoftmaxCrossEntropyLoss,
        labels: Tind_SoftmaxCrossEntropyLoss,
        weights: Optional[T_SoftmaxCrossEntropyLoss] = None,
        *,
        ignore_index: Optional[int] = None,
        reduction: str = "mean",
    ) -> Tuple[T_SoftmaxCrossEntropyLoss, T_SoftmaxCrossEntropyLoss]:
        r"""[🌐 SoftmaxCrossEntropyLoss(12)](https://onnx.ai/onnx/operators/onnx__SoftmaxCrossEntropyLoss.html#softmaxcrossentropyloss-12 "Online Documentation")

        Loss function that measures the softmax cross entropy
        between 'scores' and 'labels'.
        This operator first computes a loss tensor whose shape is identical to the labels input.
        If the input is 2-D with shape (N, C), the loss tensor may be a N-element vector L = (l_1, l_2, ..., l_N).
        If the input is N-D tensor with shape (N, C, D1, D2, ..., Dk),
        the loss tensor L may have (N, D1, D2, ..., Dk) as its shape and L[i,][j_1][j_2]...[j_k] denotes a scalar element in L.
        After L is available, this operator can optionally do a reduction operator.

        shape(scores): (N, C) where C is the number of classes, or (N, C, D1, D2,..., Dk),
                with K >= 1 in case of K-dimensional loss.
        shape(labels): (N) where each value is 0 <= labels[i] <= C-1, or (N, D1, D2,..., Dk),
                with K >= 1 in case of K-dimensional loss.

        The loss for one sample, l_i, can calculated as follows:
            l[i][d1][d2]...[dk] = -y[i][c][d1][d2]..[dk], where i is the index of classes.
        or
            l[i][d1][d2]...[dk] = -y[i][c][d1][d2]..[dk] * weights[c], if 'weights' is provided.

        loss is zero for the case when label-value equals ignore_index.
            l[i][d1][d2]...[dk]  = 0, when labels[n][d1][d2]...[dk] = ignore_index

        where:
            p = Softmax(scores)
            y = Log(p)
            c = labels[i][d1][d2]...[dk]

        Finally, L is optionally reduced:
        If reduction = 'none', the output is L with shape (N, D1, D2, ..., Dk).
        If reduction = 'sum', the output is scalar: Sum(L).
        If reduction = 'mean', the output is scalar: ReduceMean(L), or if weight is provided: ReduceSum(L) / ReduceSum(W),
        where tensor W is of shape (N, D1, D2, ..., Dk) and W[n][d1][d2]...[dk] = weights[labels[i][d1][d2]...[dk]].


        Args:
            scores: The predicted outputs with shape [batch_size, class_size], or
                [batch_size, class_size, D1, D2 , ..., Dk], where K is the number of
                dimensions.

            labels: The ground truth output tensor, with shape [batch_size], or
                [batch_size, D1, D2, ..., Dk], where K is the number of dimensions.
                Labels element value shall be in range of [0, C). If ignore_index is
                specified, it may have a value outside [0, C) and the label values
                should either be in the range [0, C) or have the value ignore_index.

            weights: (optional) A manual rescaling weight given to each class. If given,
                it has to be a 1D Tensor assigning weight to each of the classes.
                Otherwise, it is treated as if having all ones.

            ignore_index: Specifies a target value that is ignored and does not
                contribute to the input gradient. It's an optional value.

            reduction: Type of reduction to apply to loss: none, sum, mean(default).
                'none': no reduction will be applied, 'sum': the output will be summed.
                'mean': the sum of the output will be divided by the number of elements
                in the output.
        """

        schema = get_schema("SoftmaxCrossEntropyLoss", 12, "")
        op = Op(self, "SoftmaxCrossEntropyLoss", schema)
        return op(
            *self._prepare_inputs(schema, scores, labels, weights),
            ignore_index=ignore_index,
            reduction=reduction,
        )