# --------------------------------------------------------------------------
# ⚠️ WARNING - AUTO-GENERATED CODE - DO NOT EDIT ⚠️
# ⚙️ Generated by 'python -m opgen'
# --------------------------------------------------------------------------
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
# --------------------------------------------------------------------------
# pylint: disable=W0221,W0222,R0901,W0237
# mypy: disable-error-code=override
# ruff: noqa: E741, D402, D405
# --------------------------------------------------------------------------

from __future__ import annotations

from typing import Optional, Sequence, Tuple, TypeVar, Union

from onnx.defs import get_schema
from typing_extensions import TypeAlias

from onnxscript.onnx_opset._impl.opset21 import Opset21
from onnxscript.onnx_types import (
    BFLOAT16,
    BOOL,
    COMPLEX64,
    COMPLEX128,
    DOUBLE,
    FLOAT,
    FLOAT8E4M3FN,
    FLOAT8E4M3FNUZ,
    FLOAT8E5M2,
    FLOAT8E5M2FNUZ,
    FLOAT16,
    INT8,
    INT16,
    INT32,
    INT64,
    STRING,
    UINT8,
    UINT16,
    UINT32,
    UINT64,
)
from onnxscript.values import Op, Opset


class Opset22(Opset21):
    def __new__(cls):
        return Opset.__new__(cls, "", 22)

    T_Acos = TypeVar("T_Acos", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    def Acos(self, input: T_Acos) -> T_Acos:
        r"""[🌐 Acos(22)](https://onnx.ai/onnx/operators/onnx__Acos.html#acos-22 "Online Documentation")


        Calculates the arccosine (inverse of cosine) of the given input tensor, element-wise.


        Args:
            input: (differentiable) Input tensor
        """

        schema = get_schema("Acos", 22, "")
        op = Op(self, "Acos", schema)
        return op(*self._prepare_inputs(schema, input))

    T_Acosh = TypeVar("T_Acosh", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    def Acosh(self, input: T_Acosh) -> T_Acosh:
        r"""[🌐 Acosh(22)](https://onnx.ai/onnx/operators/onnx__Acosh.html#acosh-22 "Online Documentation")


        Calculates the hyperbolic arccosine of the given input tensor element-wise.


        Args:
            input: (differentiable) Input tensor
        """

        schema = get_schema("Acosh", 22, "")
        op = Op(self, "Acosh", schema)
        return op(*self._prepare_inputs(schema, input))

    T_Asin = TypeVar("T_Asin", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    def Asin(self, input: T_Asin) -> T_Asin:
        r"""[🌐 Asin(22)](https://onnx.ai/onnx/operators/onnx__Asin.html#asin-22 "Online Documentation")


        Calculates the arcsine (inverse of sine) of the given input tensor, element-wise.


        Args:
            input: (differentiable) Input tensor
        """

        schema = get_schema("Asin", 22, "")
        op = Op(self, "Asin", schema)
        return op(*self._prepare_inputs(schema, input))

    T_Asinh = TypeVar("T_Asinh", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    def Asinh(self, input: T_Asinh) -> T_Asinh:
        r"""[🌐 Asinh(22)](https://onnx.ai/onnx/operators/onnx__Asinh.html#asinh-22 "Online Documentation")


        Calculates the hyperbolic arcsine of the given input tensor element-wise.


        Args:
            input: (differentiable) Input tensor
        """

        schema = get_schema("Asinh", 22, "")
        op = Op(self, "Asinh", schema)
        return op(*self._prepare_inputs(schema, input))

    T_Atan = TypeVar("T_Atan", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    def Atan(self, input: T_Atan) -> T_Atan:
        r"""[🌐 Atan(22)](https://onnx.ai/onnx/operators/onnx__Atan.html#atan-22 "Online Documentation")


        Calculates the arctangent (inverse of tangent) of the given input tensor, element-wise.


        Args:
            input: (differentiable) Input tensor
        """

        schema = get_schema("Atan", 22, "")
        op = Op(self, "Atan", schema)
        return op(*self._prepare_inputs(schema, input))

    T_Atanh = TypeVar("T_Atanh", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    def Atanh(self, input: T_Atanh) -> T_Atanh:
        r"""[🌐 Atanh(22)](https://onnx.ai/onnx/operators/onnx__Atanh.html#atanh-22 "Online Documentation")


        Calculates the hyperbolic arctangent of the given input tensor element-wise.


        Args:
            input: (differentiable) Input tensor
        """

        schema = get_schema("Atanh", 22, "")
        op = Op(self, "Atanh", schema)
        return op(*self._prepare_inputs(schema, input))

    T_AveragePool = TypeVar("T_AveragePool", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    def AveragePool(
        self,
        X: T_AveragePool,
        *,
        auto_pad: str = "NOTSET",
        ceil_mode: int = 0,
        count_include_pad: int = 0,
        dilations: Optional[Sequence[int]] = None,
        kernel_shape: Sequence[int],
        pads: Optional[Sequence[int]] = None,
        strides: Optional[Sequence[int]] = None,
    ) -> T_AveragePool:
        r"""[🌐 AveragePool(22)](https://onnx.ai/onnx/operators/onnx__AveragePool.html#averagepool-22 "Online Documentation")


         AveragePool consumes an input tensor X and applies average pooling across
         the tensor according to kernel sizes, stride sizes, and pad lengths.
         average pooling consisting of computing the average on all values of a
         subset of the input tensor according to the kernel size and downsampling the
         data into the output tensor Y for further processing. The output spatial shape is calculated differently
         depending on whether explicit padding is used, where pads is employed, or auto padding is used, where auto_pad is utilized.
         With explicit padding (https://pytorch.org/docs/stable/generated/torch.nn.MaxPool2d.html?highlight=maxpool#torch.nn.MaxPool2d):
         ```
         output_spatial_shape[i] = floor((input_spatial_shape[i] + pad_shape[i] - dilation[i] * (kernel_shape[i] - 1) - 1) / strides_spatial_shape[i] + 1)
         ```
         or
         ```
         output_spatial_shape[i] = ceil((input_spatial_shape[i] + pad_shape[i] - dilation[i] * (kernel_shape[i] - 1) - 1) / strides_spatial_shape[i] + 1)
         ```
         if ceil_mode is enabled. `pad_shape[i]` is the sum of pads along axis `i`. Sliding windows that would start in the right padded region are ignored.

         `auto_pad` is a DEPRECATED attribute. If you are using them currently, the output spatial shape will be following when ceil_mode is enabled:
         ```
         VALID: output_spatial_shape[i] = ceil((input_spatial_shape[i] - ((kernel_spatial_shape[i] - 1) * dilations[i] + 1) + 1) / strides_spatial_shape[i])
         SAME_UPPER or SAME_LOWER: output_spatial_shape[i] = ceil(input_spatial_shape[i] / strides_spatial_shape[i])
         ```
         or when ceil_mode is disabled (https://www.tensorflow.org/api_docs/python/tf/keras/layers/AveragePooling2D):
         ```
         VALID: output_spatial_shape[i] = floor((input_spatial_shape[i] - ((kernel_spatial_shape[i] - 1) * dilations[i] + 1)) / strides_spatial_shape[i]) + 1
         SAME_UPPER or SAME_LOWER: output_spatial_shape[i] = floor((input_spatial_shape[i] - 1) / strides_spatial_shape[i]) + 1
         ```
         And pad shape will be following if `SAME_UPPER` or `SAME_LOWER`:
         ```
         pad_shape[i] = (output_spatial_shape[i] - 1) * strides_spatial_shape[i] + ((kernel_spatial_shape[i] - 1) * dilations[i] + 1) - input_spatial_shape[i]
         ```
         The output of each pooling window is divided by the number of elements (exclude pad when attribute count_include_pad is zero).


        Args:
            X: (differentiable) Input data tensor from the previous operator; dimensions
                for image case are (N x C x H x W), where N is the batch size, C is the
                number of channels, and H and W are the height and the width of the
                data. For non image case, the dimensions are in the form of (N x C x D1
                x D2 ... Dn), where N is the batch size. Optionally, if dimension
                denotation is in effect, the operation expects the input data tensor to
                arrive with the dimension denotation of [DATA_BATCH, DATA_CHANNEL,
                DATA_FEATURE, DATA_FEATURE ...].

            auto_pad: auto_pad must be either NOTSET, SAME_UPPER, SAME_LOWER or VALID.
                Where default value is NOTSET, which means explicit padding is used.
                SAME_UPPER or SAME_LOWER mean pad the input so that `output_shape[i] =
                ceil(input_shape[i] / strides[i])` for each axis `i`. The padding is
                split between the two sides equally or almost equally (depending on
                whether it is even or odd). In case the padding is an odd number, the
                extra padding is added at the end for SAME_UPPER and at the beginning
                for SAME_LOWER.

            ceil_mode: Whether to use ceil or floor (default) to compute the output
                shape.

            count_include_pad: Whether include pad pixels when calculating values for
                the edges. Default is 0, doesn't count include pad.

            dilations: Dilation value along each spatial axis of filter. If not present,
                the dilation defaults to 1 along each spatial axis.

            kernel_shape: The size of the kernel along each axis.

            pads: Padding for the beginning and ending along each spatial axis, it can
                take any value greater than or equal to 0. The value represent the
                number of pixels added to the beginning and end part of the
                corresponding axis. `pads` format should be as follow [x1_begin,
                x2_begin...x1_end, x2_end,...], where xi_begin the number of pixels
                added at the beginning of axis `i` and xi_end, the number of pixels
                added at the end of axis `i`. This attribute cannot be used
                simultaneously with auto_pad attribute. If not present, the padding
                defaults to 0 along start and end of each spatial axis.

            strides: Stride along each spatial axis. If not present, the stride defaults
                to 1 along each spatial axis.
        """

        schema = get_schema("AveragePool", 22, "")
        op = Op(self, "AveragePool", schema)
        return op(
            *self._prepare_inputs(schema, X),
            auto_pad=auto_pad,
            ceil_mode=ceil_mode,
            count_include_pad=count_include_pad,
            dilations=dilations,
            kernel_shape=kernel_shape,
            pads=pads,
            strides=strides,
        )

    T1_Bernoulli = TypeVar("T1_Bernoulli", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    T2_Bernoulli: TypeAlias = Union[
        BFLOAT16,
        BOOL,
        DOUBLE,
        FLOAT,
        FLOAT16,
        INT16,
        INT32,
        INT64,
        INT8,
        UINT16,
        UINT32,
        UINT64,
        UINT8,
    ]

    def Bernoulli(
        self,
        input: T1_Bernoulli,
        *,
        dtype: Optional[int] = None,
        seed: Optional[float] = None,
    ) -> T2_Bernoulli:
        r"""[🌐 Bernoulli(22)](https://onnx.ai/onnx/operators/onnx__Bernoulli.html#bernoulli-22 "Online Documentation")


        Draws binary random numbers (0 or 1) from a Bernoulli distribution. The input tensor should be a tensor
        containing probabilities p (a value in the range [0,1]) to be used for drawing the binary random number,
        where an output of 1 is produced with probability p and an output of 0 is produced with probability (1-p).

        This operator is non-deterministic and may not produce the same values in different
        implementations (even if a seed is specified).


        Args:
            input: All values in input have to be in the range:[0, 1].

            dtype: The data type for the elements of the output tensor. if not
                specified, we will use the data type of the input tensor.

            seed: (Optional) Seed to the random generator, if not specified we will auto
                generate one.
        """

        schema = get_schema("Bernoulli", 22, "")
        op = Op(self, "Bernoulli", schema)
        return op(*self._prepare_inputs(schema, input), dtype=dtype, seed=seed)

    T_Conv = TypeVar("T_Conv", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    def Conv(
        self,
        X: T_Conv,
        W: T_Conv,
        B: Optional[T_Conv] = None,
        *,
        auto_pad: str = "NOTSET",
        dilations: Optional[Sequence[int]] = None,
        group: int = 1,
        kernel_shape: Optional[Sequence[int]] = None,
        pads: Optional[Sequence[int]] = None,
        strides: Optional[Sequence[int]] = None,
    ) -> T_Conv:
        r"""[🌐 Conv(22)](https://onnx.ai/onnx/operators/onnx__Conv.html#conv-22 "Online Documentation")


        The convolution operator consumes an input tensor and a filter, and
        computes the output.

        Args:
            X: (differentiable) Input data tensor from previous layer; has size (N x C x
                H x W), where N is the batch size, C is the number of channels, and H
                and W are the height and width. Note that this is for the 2D image.
                Otherwise the size is (N x C x D1 x D2 ... x Dn). Optionally, if
                dimension denotation is in effect, the operation expects input data
                tensor to arrive with the dimension denotation of [DATA_BATCH,
                DATA_CHANNEL, DATA_FEATURE, DATA_FEATURE ...].

            W: (differentiable) The weight tensor that will be used in the convolutions;
                has size (M x C/group x kH x kW), where C is the number of channels, and
                kH and kW are the height and width of the kernel, and M is the number of
                feature maps. For more than 2 dimensions, the kernel shape will be (M x
                C/group x k1 x k2 x ... x kn), where (k1 x k2 x ... kn) is the dimension
                of the kernel. Optionally, if dimension denotation is in effect, the
                operation expects the weight tensor to arrive with the dimension
                denotation of [FILTER_OUT_CHANNEL, FILTER_IN_CHANNEL, FILTER_SPATIAL,
                FILTER_SPATIAL ...]. Assuming zero based indices for the shape array,
                X.shape[1] == (W.shape[1] * group) == C and W.shape[0] mod G == 0. Or in
                other words FILTER_IN_CHANNEL multiplied by the number of groups should
                be equal to DATA_CHANNEL and the number of feature maps M should be a
                multiple of the number of groups G.

            B: (optional, differentiable) Optional 1D bias to be added to the
                convolution, has size of M.

            auto_pad: auto_pad must be either NOTSET, SAME_UPPER, SAME_LOWER or VALID.
                Where default value is NOTSET, which means explicit padding is used.
                SAME_UPPER or SAME_LOWER mean pad the input so that `output_shape[i] =
                ceil(input_shape[i] / strides[i])` for each axis `i`. The padding is
                split between the two sides equally or almost equally (depending on
                whether it is even or odd). In case the padding is an odd number, the
                extra padding is added at the end for SAME_UPPER and at the beginning
                for SAME_LOWER.

            dilations: dilation value along each spatial axis of the filter. If not
                present, the dilation defaults is 1 along each spatial axis.

            group: number of groups input channels and output channels are divided into.

            kernel_shape: The shape of the convolution kernel. If not present, should be
                inferred from input W.

            pads: Padding for the beginning and ending along each spatial axis, it can
                take any value greater than or equal to 0. The value represent the
                number of pixels added to the beginning and end part of the
                corresponding axis. `pads` format should be as follow [x1_begin,
                x2_begin...x1_end, x2_end,...], where xi_begin the number of pixels
                added at the beginning of axis `i` and xi_end, the number of pixels
                added at the end of axis `i`. This attribute cannot be used
                simultaneously with auto_pad attribute. If not present, the padding
                defaults to 0 along start and end of each spatial axis.

            strides: Stride along each spatial axis. If not present, the stride defaults
                is 1 along each spatial axis.
        """

        schema = get_schema("Conv", 22, "")
        op = Op(self, "Conv", schema)
        return op(
            *self._prepare_inputs(schema, X, W, B),
            auto_pad=auto_pad,
            dilations=dilations,
            group=group,
            kernel_shape=kernel_shape,
            pads=pads,
            strides=strides,
        )

    T_ConvTranspose = TypeVar("T_ConvTranspose", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    def ConvTranspose(
        self,
        X: T_ConvTranspose,
        W: T_ConvTranspose,
        B: Optional[T_ConvTranspose] = None,
        *,
        auto_pad: str = "NOTSET",
        dilations: Optional[Sequence[int]] = None,
        group: int = 1,
        kernel_shape: Optional[Sequence[int]] = None,
        output_padding: Optional[Sequence[int]] = None,
        output_shape: Optional[Sequence[int]] = None,
        pads: Optional[Sequence[int]] = None,
        strides: Optional[Sequence[int]] = None,
    ) -> T_ConvTranspose:
        r"""[🌐 ConvTranspose(22)](https://onnx.ai/onnx/operators/onnx__ConvTranspose.html#convtranspose-22 "Online Documentation")


        The convolution transpose operator consumes an input tensor and a filter,
        and computes the output.

        If the pads parameter is provided the shape of the output is calculated via the following equation:

          output_shape[i] = stride[i] * (input_size[i] - 1) + output_padding[i] + ((kernel_shape[i] - 1) * dilations[i] + 1) - pads[start_i] - pads[end_i]

        output_shape can also be explicitly specified in which case pads values are auto generated using these equations:

          total_padding[i] = stride[i] * (input_size[i] - 1) + output_padding[i] + ((kernel_shape[i] - 1) * dilations[i] + 1) - output_shape[i]
          If (auto_pads == SAME_UPPER): pads[start_i] = total_padding[i]/2; pads[end_i] = total_padding[i] - (total_padding[i]/2)
          Else: pads[start_i] = total_padding[i] - (total_padding[i]/2); pads[end_i] = (total_padding[i]/2).



        Args:
            X: (differentiable) Input data tensor from previous layer; has size (N x C x
                H x W), where N is the batch size, C is the number of channels, and H
                and W are the height and width. Note that this is for the 2D image.
                Otherwise the size is (N x C x D1 x D2 ... x Dn)

            W: (differentiable) The weight tensor that will be used in the convolutions;
                has size (C x M/group x kH x kW), where C is the number of channels, and
                kH and kW are the height and width of the kernel, and M is the number of
                feature maps. For more than 2 dimensions, the weight shape will be (C x
                M/group x k1 x k2 x ... x kn), where (k1 x k2 x ... x kn) is the
                dimension of the kernel. The number of channels in the output should be
                equal to W.shape[1] * group (assuming zero based indices of the shape
                array)

            B: (optional, differentiable) Optional 1D bias to be added to the
                convolution, has size of M.

            auto_pad: auto_pad must be either NOTSET, SAME_UPPER, SAME_LOWER or VALID.
                Where default value is NOTSET, which means explicit padding is used.
                SAME_UPPER or SAME_LOWER mean pad the input so that `output_shape[i] =
                input_shape[i] * strides[i]` for each axis `i`. The padding is split
                between the two sides equally or almost equally (depending on whether it
                is even or odd). In case the padding is an odd number, the extra padding
                is added at the end for SAME_UPPER and at the beginning for SAME_LOWER.

            dilations: dilation value along each spatial axis of the filter. If not
                present, the dilation defaults to 1 along each spatial axis.

            group: number of groups input channels and output channels are divided into.

            kernel_shape: The shape of the convolution kernel. If not present, should be
                inferred from input W.

            output_padding: Additional elements added to the side with higher coordinate
                indices in the output. Each padding value in "output_padding" must be
                less than the corresponding stride/dilation dimension. By default, this
                attribute is a zero vector. Note that this attribute doesn't directly
                affect the computed output values. It only controls the selection of the
                computed values, so changing this attribute only adds or removes output
                elements. If "output_shape" is explicitly provided, "output_padding"
                does not contribute additional size to "output_shape" but participates
                in the computation of the needed padding amount. This is also called
                adjs or adjustment in some frameworks.

            output_shape: The shape of the output can be explicitly set which will cause
                pads values to be auto generated. If output_shape is specified pads
                values are ignored. See doc for details for equations to generate pads.
                Note that the output_shape attribute value should not include dimensions
                for batch size and channels, which are automatically inferred.

            pads: Padding for the beginning and ending along each spatial axis, it can
                take any value greater than or equal to 0. The value represent the
                number of pixels added to the beginning and end part of the
                corresponding axis. `pads` format should be as follow [x1_begin,
                x2_begin...x1_end, x2_end,...], where xi_begin the number of pixels
                added at the beginning of axis `i` and xi_end, the number of pixels
                added at the end of axis `i`. This attribute cannot be used
                simultaneously with auto_pad attribute. If not present, the padding
                defaults to 0 along start and end of each spatial axis.

            strides: Stride along each spatial axis. If not present, the stride defaults
                to 1 along each spatial axis.
        """

        schema = get_schema("ConvTranspose", 22, "")
        op = Op(self, "ConvTranspose", schema)
        return op(
            *self._prepare_inputs(schema, X, W, B),
            auto_pad=auto_pad,
            dilations=dilations,
            group=group,
            kernel_shape=kernel_shape,
            output_padding=output_padding,
            output_shape=output_shape,
            pads=pads,
            strides=strides,
        )

    T_Cos = TypeVar("T_Cos", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    def Cos(self, input: T_Cos) -> T_Cos:
        r"""[🌐 Cos(22)](https://onnx.ai/onnx/operators/onnx__Cos.html#cos-22 "Online Documentation")


        Calculates the cosine of the given input tensor, element-wise.


        Args:
            input: (differentiable) Input tensor
        """

        schema = get_schema("Cos", 22, "")
        op = Op(self, "Cos", schema)
        return op(*self._prepare_inputs(schema, input))

    T_Cosh = TypeVar("T_Cosh", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    def Cosh(self, input: T_Cosh) -> T_Cosh:
        r"""[🌐 Cosh(22)](https://onnx.ai/onnx/operators/onnx__Cosh.html#cosh-22 "Online Documentation")


        Calculates the hyperbolic cosine of the given input tensor element-wise.


        Args:
            input: (differentiable) Input tensor
        """

        schema = get_schema("Cosh", 22, "")
        op = Op(self, "Cosh", schema)
        return op(*self._prepare_inputs(schema, input))

    T_DeformConv = TypeVar("T_DeformConv", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    def DeformConv(
        self,
        X: T_DeformConv,
        W: T_DeformConv,
        offset: T_DeformConv,
        B: Optional[T_DeformConv] = None,
        mask: Optional[T_DeformConv] = None,
        *,
        dilations: Optional[Sequence[int]] = None,
        group: int = 1,
        kernel_shape: Optional[Sequence[int]] = None,
        offset_group: int = 1,
        pads: Optional[Sequence[int]] = None,
        strides: Optional[Sequence[int]] = None,
    ) -> T_DeformConv:
        r"""[🌐 DeformConv(22)](https://onnx.ai/onnx/operators/onnx__DeformConv.html#deformconv-22 "Online Documentation")


        Performs deformable convolution as described in https://arxiv.org/abs/1703.06211 and https://arxiv.org/abs/1811.11168.
        This operator specification supports the general N-D case. Note that most common use cases have 2D or 3D data.


        Args:
            X: Input data tensor. For 2D image data, it has shape (N, C, H, W) where N
                is the batch size, C is the number of input channels, and H and W are
                the height and width. In general, the shape is (N, C, D1, D2, ... , Dn)
                for n-dimensional data, where D1 to Dn are the spatial dimension sizes.
                Most common use cases have n = 2 or 3.

            W: Weight tensor that will be used in the convolutions. It has shape (oC,
                C/group, kH, kW), where oC is the number of output channels and kH and
                kW are the kernel height and width. For more than 2 dimensions, it has
                shape (oC, C/group, k1, k2, ... , kn).

            offset: Offset tensor denoting the offset for the sampling locations in the
                convolution kernel. It has shape (N, offset_group * kH * kW * 2, oH, oW)
                for 2D data or (N, offset_group * k1 * k2 * ... * kn * n, o1, o2, ... ,
                on) for nD data. Use linear interpolationfor fractional offset values.
                Sampling locations outside of the padded input tensor gives zero.

            B: (optional) Optional 1D bias of length oC to be added to the convolution.
                Default is a tensor of zeros.

            mask: (optional) The mask tensor to be applied to each position in the
                convolution kernel. It has shape (N, offset_group * kH * kW, oH, oW) for
                2D data or (N, offset_group * k1 * k2 * ... * kn * n, o1, o2, ... , on)
                for nD data. Default is a tensor of ones.

            dilations: Dilation value along each spatial axis of the kernel. Default is
                1 along each axis.

            group: Number of groups the input and output channels, C and oC, are divided
                into. C and oC must both be divisible by group. Default is 1.

            kernel_shape: Shape of the convolution kernel. If not present, it is
                inferred from the shape of input W.

            offset_group: Number of groups of offset. C must be divisible by
                offset_group. Default is 1.

            pads: Padding for the beginning and end along each spatial axis. The values
                represent the number of pixels added to the beginning and end of the
                corresponding axis and can take any nonnegative value. The format should
                be as follows: [x1_begin, x2_begin, ..., x1_end, x2_end, ...], where
                xi_begin is the number of pixels added at the beginning of axis `i` and
                xi_end is the number of pixels added at the end of axis `i`. Default is
                0 along each axis.

            strides: Stride along each spatial axis. Default is 1 along each axis.
        """

        schema = get_schema("DeformConv", 22, "")
        op = Op(self, "DeformConv", schema)
        return op(
            *self._prepare_inputs(schema, X, W, offset, B, mask),
            dilations=dilations,
            group=group,
            kernel_shape=kernel_shape,
            offset_group=offset_group,
            pads=pads,
            strides=strides,
        )

    T_Det = TypeVar("T_Det", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    def Det(self, X: T_Det) -> T_Det:
        r"""[🌐 Det(22)](https://onnx.ai/onnx/operators/onnx__Det.html#det-22 "Online Documentation")


        Det calculates determinant of a square matrix or batches of square matrices.
        Det takes one input tensor of shape `[*, M, M]`, where `*` is zero or more batch dimensions,
        and the inner-most 2 dimensions form square matrices.
        The output is a tensor of shape `[*]`, containing the determinants of all input submatrices.
        e.g., When the input is 2-D, the output is a scalar(shape is empty: `[]`).


        Args:
            X: (differentiable) Input tensor
        """

        schema = get_schema("Det", 22, "")
        op = Op(self, "Det", schema)
        return op(*self._prepare_inputs(schema, X))

    T_Dropout = TypeVar(
        "T_Dropout",
        BFLOAT16,
        DOUBLE,
        FLOAT,
        FLOAT16,
        FLOAT8E4M3FN,
        FLOAT8E4M3FNUZ,
        FLOAT8E5M2,
        FLOAT8E5M2FNUZ,
    )

    T1_Dropout = TypeVar(
        "T1_Dropout",
        BFLOAT16,
        DOUBLE,
        FLOAT,
        FLOAT16,
        FLOAT8E4M3FN,
        FLOAT8E4M3FNUZ,
        FLOAT8E5M2,
        FLOAT8E5M2FNUZ,
    )

    T2_Dropout: TypeAlias = BOOL

    def Dropout(
        self,
        data: T_Dropout,
        ratio: Optional[T1_Dropout] = None,
        training_mode: Optional[T2_Dropout] = None,
        *,
        seed: Optional[int] = None,
    ) -> Tuple[T_Dropout, T2_Dropout]:
        r"""[🌐 Dropout(22)](https://onnx.ai/onnx/operators/onnx__Dropout.html#dropout-22 "Online Documentation")


        Dropout takes an input floating-point tensor, an optional input ratio (floating-point scalar) and an optional input training_mode (boolean scalar). It produces two tensor outputs,
        output (floating-point tensor) and mask (optional `Tensor<bool>`). If `training_mode` is true then the output Y will be a random dropout;
        Note that this Dropout scales the masked input data by the following equation, so to convert the trained model into inference mode,
        the user can simply not pass `training_mode` input or set it to false.
        ::

            output = scale * data * mask,


        where
        ::

            scale = 1. / (1. - ratio).


        This operator has **optional** inputs/outputs. See `ONNX <https://github.com/onnx/onnx/blob/master/docs/IR.md>`_ for more details about the representation of optional arguments. An empty string may be used in the place of an actual argument's name to indicate a missing argument. Trailing optional arguments (those not followed by an argument that is present) may also be simply omitted.


        Args:
            data: (differentiable) The input data as Tensor.

            ratio: (optional, non-differentiable) The ratio of random dropout, with
                value in [0, 1). If set to 0, the output would be a simple copy of the
                input. If it's non-zero, output will be a random dropout of the scaled
                input, which is typically the case during training. It is an optional
                value, if not specified it will default to 0.5.

            training_mode: (optional, non-differentiable) If set to true then it
                indicates dropout is being used for training. It is an optional value
                hence unless specified explicitly, it is false. If it is false, ratio is
                ignored and the operation mimics inference mode where nothing will be
                dropped from the input data and if mask is requested as output it will
                contain all ones.

            seed: (Optional) Seed to the random generator, if not specified we will auto
                generate one.
        """

        schema = get_schema("Dropout", 22, "")
        op = Op(self, "Dropout", schema)
        return op(*self._prepare_inputs(schema, data, ratio, training_mode), seed=seed)

    T_Elu = TypeVar("T_Elu", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    def Elu(self, X: T_Elu, *, alpha: float = 1.0) -> T_Elu:
        r"""[🌐 Elu(22)](https://onnx.ai/onnx/operators/onnx__Elu.html#elu-22 "Online Documentation")


        Elu takes one input data (Tensor<T>) and produces one output data
        (Tensor<T>) where the function `f(x) = alpha * (exp(x) - 1.) for x <
        0`, `f(x) = x for x >= 0`., is applied to the tensor elementwise.



        Args:
            X: (differentiable) Input tensor

            alpha: Coefficient of ELU.
        """

        schema = get_schema("Elu", 22, "")
        op = Op(self, "Elu", schema)
        return op(*self._prepare_inputs(schema, X), alpha=alpha)

    T1_EyeLike = TypeVar(
        "T1_EyeLike",
        BFLOAT16,
        BOOL,
        DOUBLE,
        FLOAT,
        FLOAT16,
        INT16,
        INT32,
        INT64,
        INT8,
        UINT16,
        UINT32,
        UINT64,
        UINT8,
    )

    T2_EyeLike: TypeAlias = Union[
        BFLOAT16,
        BOOL,
        DOUBLE,
        FLOAT,
        FLOAT16,
        INT16,
        INT32,
        INT64,
        INT8,
        UINT16,
        UINT32,
        UINT64,
        UINT8,
    ]

    def EyeLike(
        self, input: T1_EyeLike, *, dtype: Optional[int] = None, k: int = 0
    ) -> T2_EyeLike:
        r"""[🌐 EyeLike(22)](https://onnx.ai/onnx/operators/onnx__EyeLike.html#eyelike-22 "Online Documentation")


        Generate a 2D tensor (matrix) with ones on the diagonal and zeros everywhere else. Only 2D
        tensors are supported, i.e. input T1 must be of rank 2. The shape of the output tensor is the
        same as the input tensor. The data type can be specified by the 'dtype' argument. If
        'dtype' is not specified, then the type of input tensor is used. By default, the main diagonal
        is populated with ones, but attribute 'k' can be used to populate upper or lower diagonals.
        The 'dtype' argument must be one of the data types specified in the 'DataType' enum field in the
        TensorProto message and be valid as an output type.


        Args:
            input: 2D input tensor to copy shape, and optionally, type information from.

            dtype: (Optional) The data type for the elements of the output tensor. If
                not specified, the data type of the input tensor T1 is used.

            k: (Optional) Index of the diagonal to be populated with ones. Default is 0.
                If T2 is the output, this op sets T2[i, i+k] = 1. k = 0 populates the
                main diagonal, k > 0 populates an upper diagonal,  and k < 0 populates a
                lower diagonal.
        """

        schema = get_schema("EyeLike", 22, "")
        op = Op(self, "EyeLike", schema)
        return op(*self._prepare_inputs(schema, input), dtype=dtype, k=k)

    T_GRU = TypeVar("T_GRU", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    T1_GRU: TypeAlias = INT32

    def GRU(
        self,
        X: T_GRU,
        W: T_GRU,
        R: T_GRU,
        B: Optional[T_GRU] = None,
        sequence_lens: Optional[T1_GRU] = None,
        initial_h: Optional[T_GRU] = None,
        *,
        activation_alpha: Optional[Sequence[float]] = None,
        activation_beta: Optional[Sequence[float]] = None,
        activations: Optional[Sequence[str]] = None,
        clip: Optional[float] = None,
        direction: str = "forward",
        hidden_size: Optional[int] = None,
        layout: int = 0,
        linear_before_reset: int = 0,
    ) -> Tuple[T_GRU, T_GRU]:
        r"""[🌐 GRU(22)](https://onnx.ai/onnx/operators/onnx__GRU.html#gru-22 "Online Documentation")


        Computes an one-layer GRU. This operator is usually supported via some custom
        implementation such as CuDNN.

        Notations:

        * `X` - input tensor
        * `z` - update gate
        * `r` - reset gate
        * `h` - hidden gate
        * `t` - time step (t-1 means previous time step)
        * `W[zrh]` - W parameter weight matrix for update, reset, and hidden gates
        * `R[zrh]` - R recurrence weight matrix for update, reset, and hidden gates
        * `Wb[zrh]` - W bias vectors for update, reset, and hidden gates
        * `Rb[zrh]` - R bias vectors for update, reset, and hidden gates
        * `WB[zrh]` - W parameter weight matrix for backward update, reset, and hidden gates
        * `RB[zrh]` - R recurrence weight matrix for backward update, reset, and hidden gates
        * `WBb[zrh]` - W bias vectors for backward update, reset, and hidden gates
        * `RBb[zrh]` - R bias vectors for backward update, reset, and hidden gates
        * `H` - Hidden state
        * `num_directions` - 2 if direction == bidirectional else 1

        Activation functions:

        * Relu(x)                - max(0, x)
        * Tanh(x)                - (1 - e^{-2x})/(1 + e^{-2x})
        * Sigmoid(x)             - 1/(1 + e^{-x})

        NOTE:
          Below are optional

        * Affine(x)              - alpha * x + beta
        * LeakyRelu(x)           - x if x >= 0 else alpha * x
        * ThresholdedRelu(x)     - x if x >= alpha else 0
        * ScaledTanh(x)          - alpha * Tanh(beta * x)
        * HardSigmoid(x)         - min(max(alpha * x + beta, 0), 1)
        * Elu(x)                 - x if x >= 0 else alpha * (e^x - 1)
        * Softsign(x)            - x/(1 + |x|)
        * Softplus(x)            - log(1 + e^x)

        Equations (Default: f=Sigmoid, g=Tanh):

        * zt = f(Xt*(Wz^T) + Ht-1*(Rz^T) + Wbz + Rbz)
        * rt = f(Xt*(Wr^T) + Ht-1*(Rr^T) + Wbr + Rbr)
        * ht = g(Xt*(Wh^T) + (rt (.) Ht-1)*(Rh^T) + Rbh + Wbh) # default, when linear_before_reset = 0
        * ht = g(Xt*(Wh^T) + (rt (.) (Ht-1*(Rh^T) + Rbh)) + Wbh) # when linear_before_reset != 0
        * Ht = (1 - zt) (.) ht + zt (.) Ht-1
        This operator has **optional** inputs/outputs. See `ONNX <https://github.com/onnx/onnx/blob/master/docs/IR.md>`_ for more details about the representation of optional arguments. An empty string may be used in the place of an actual argument's name to indicate a missing argument. Trailing optional arguments (those not followed by an argument that is present) may also be simply omitted.


        Args:
            X: (differentiable) The input sequences packed (and potentially padded) into
                one 3-D tensor with the shape of `[seq_length, batch_size, input_size]`.

            W: (differentiable) The weight tensor for the gates. Concatenation of
                `W[zrh]` and `WB[zrh]` (if bidirectional) along dimension 0. This tensor
                has shape `[num_directions, 3*hidden_size, input_size]`.

            R: (differentiable) The recurrence weight tensor. Concatenation of `R[zrh]`
                and `RB[zrh]` (if bidirectional) along dimension 0. This tensor has
                shape `[num_directions, 3*hidden_size, hidden_size]`.

            B: (optional, differentiable) The bias tensor for the gates. Concatenation
                of `[Wb[zrh], Rb[zrh]]` and `[WBb[zrh], RBb[zrh]]` (if bidirectional)
                along dimension 0. This tensor has shape `[num_directions,
                6*hidden_size]`. Optional: If not specified - assumed to be 0

            sequence_lens: (optional, non-differentiable) Optional tensor specifying
                lengths of the sequences in a batch. If not specified - assumed all
                sequences in the batch to have length `seq_length`. It has shape
                `[batch_size]`.

            initial_h: (optional, non-differentiable) Optional initial value of the
                hidden. If not specified - assumed to be 0. It has shape
                `[num_directions, batch_size, hidden_size]`.

            activation_alpha: Optional scaling values used by some activation functions.
                The values are consumed in the order of activation functions, for
                example (f, g, h) in LSTM. Default values are the same as of
                corresponding ONNX operators.For example with LeakyRelu, the default
                alpha is 0.01.

            activation_beta: Optional scaling values used by some activation functions.
                The values are consumed in the order of activation functions, for
                example (f, g, h) in LSTM. Default values are the same as of
                corresponding ONNX operators.

            activations: A list of 2 (or 4 if bidirectional) activation functions for
                update, reset, and hidden gates. The activation functions must be one of
                the activation functions specified above. Optional: See the equations
                for default if not specified.

            clip: Cell clip threshold. Clipping bounds the elements of a tensor in the
                range of [-threshold, +threshold] and is applied to the input of
                activations. No clip if not specified.

            direction: Specify if the RNN is forward, reverse, or bidirectional. Must be
                one of forward (default), reverse, or bidirectional.

            hidden_size: Number of neurons in the hidden layer

            layout: The shape format of inputs X, initial_h and outputs Y, Y_h. If 0,
                the following shapes are expected: X.shape = [seq_length, batch_size,
                input_size], Y.shape = [seq_length, num_directions, batch_size,
                hidden_size], initial_h.shape = Y_h.shape = [num_directions, batch_size,
                hidden_size]. If 1, the following shapes are expected: X.shape =
                [batch_size, seq_length, input_size], Y.shape = [batch_size, seq_length,
                num_directions, hidden_size], initial_h.shape = Y_h.shape = [batch_size,
                num_directions, hidden_size].

            linear_before_reset: When computing the output of the hidden gate, apply the
                linear transformation before multiplying by the output of the reset
                gate.
        """

        schema = get_schema("GRU", 22, "")
        op = Op(self, "GRU", schema)
        return op(
            *self._prepare_inputs(schema, X, W, R, B, sequence_lens, initial_h),
            activation_alpha=activation_alpha,
            activation_beta=activation_beta,
            activations=activations,
            clip=clip,
            direction=direction,
            hidden_size=hidden_size,
            layout=layout,
            linear_before_reset=linear_before_reset,
        )

    T_GlobalAveragePool = TypeVar("T_GlobalAveragePool", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    def GlobalAveragePool(self, X: T_GlobalAveragePool) -> T_GlobalAveragePool:
        r"""[🌐 GlobalAveragePool(22)](https://onnx.ai/onnx/operators/onnx__GlobalAveragePool.html#globalaveragepool-22 "Online Documentation")


         GlobalAveragePool consumes an input tensor X and applies average pooling across
         the values in the same channel. This is equivalent to AveragePool with kernel size
         equal to the spatial dimension of input tensor.

        Args:
            X: (differentiable) Input data tensor from the previous operator; dimensions
                for image case are (N x C x H x W), where N is the batch size, C is the
                number of channels, and H and W are the height and the width of the
                data. For non image case, the dimensions are in the form of (N x C x D1
                x D2 ... Dn), where N is the batch size.
        """

        schema = get_schema("GlobalAveragePool", 22, "")
        op = Op(self, "GlobalAveragePool", schema)
        return op(*self._prepare_inputs(schema, X))

    T_GlobalLpPool = TypeVar("T_GlobalLpPool", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    def GlobalLpPool(self, X: T_GlobalLpPool, *, p: int = 2) -> T_GlobalLpPool:
        r"""[🌐 GlobalLpPool(22)](https://onnx.ai/onnx/operators/onnx__GlobalLpPool.html#globallppool-22 "Online Documentation")


         GlobalLpPool consumes an input tensor X and applies lp pool pooling across
         the values in the same channel. This is equivalent to LpPool with kernel size
         equal to the spatial dimension of input tensor.

        Args:
            X: (differentiable) Input data tensor from the previous operator; dimensions
                for image case are (N x C x H x W), where N is the batch size, C is the
                number of channels, and H and W are the height and the width of the
                data. For non image case, the dimensions are in the form of (N x C x D1
                x D2 ... Dn), where N is the batch size.

            p: p value of the Lp norm used to pool over the input data.
        """

        schema = get_schema("GlobalLpPool", 22, "")
        op = Op(self, "GlobalLpPool", schema)
        return op(*self._prepare_inputs(schema, X), p=p)

    T_GlobalMaxPool = TypeVar("T_GlobalMaxPool", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    def GlobalMaxPool(self, X: T_GlobalMaxPool) -> T_GlobalMaxPool:
        r"""[🌐 GlobalMaxPool(22)](https://onnx.ai/onnx/operators/onnx__GlobalMaxPool.html#globalmaxpool-22 "Online Documentation")


         GlobalMaxPool consumes an input tensor X and applies max pooling across
         the values in the same channel. This is equivalent to MaxPool with kernel size
         equal to the spatial dimension of input tensor.

        Args:
            X: (differentiable) Input data tensor from the previous operator; dimensions
                for image case are (N x C x H x W), where N is the batch size, C is the
                number of channels, and H and W are the height and the width of the
                data. For non image case, the dimensions are in the form of (N x C x D1
                x D2 ... Dn), where N is the batch size.
        """

        schema = get_schema("GlobalMaxPool", 22, "")
        op = Op(self, "GlobalMaxPool", schema)
        return op(*self._prepare_inputs(schema, X))

    T1_GridSample = TypeVar(
        "T1_GridSample",
        BFLOAT16,
        BOOL,
        COMPLEX128,
        COMPLEX64,
        DOUBLE,
        FLOAT,
        FLOAT16,
        INT16,
        INT32,
        INT64,
        INT8,
        STRING,
        UINT16,
        UINT32,
        UINT64,
        UINT8,
    )

    T2_GridSample = TypeVar("T2_GridSample", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    def GridSample(
        self,
        X: T1_GridSample,
        grid: T2_GridSample,
        *,
        align_corners: int = 0,
        mode: str = "linear",
        padding_mode: str = "zeros",
    ) -> T1_GridSample:
        r"""[🌐 GridSample(22)](https://onnx.ai/onnx/operators/onnx__GridSample.html#gridsample-22 "Online Documentation")


        Given an input `X` and a flow-field `grid`, computes the output `Y` using `X` values and pixel locations from the `grid`.
        For spatial input `X` with shape (N, C, H, W), the `grid` will have shape (N, H_out, W_out, 2),
        the output `Y` will have shape (N, C, H_out, W_out). For volumetric input `X` with shape (N, C, D, H, W),
        the `grid` will have shape (N, D_out, H_out, W_out, 3), the output `Y` will have shape (N, C, D_out, H_out, W_out).
        More generally, for an input `X` of rank r+2 with shape (N, C, d1, d2, ..., dr),
        the `grid` will have shape (N, D1_out, D2_out, ..., Dr_out, r), the output `Y` will have shape (N, C, D1_out, D2_out, ..., Dr_out).

        The tensor `X` contains values at centers of square pixels (voxels, etc) locations such as (n, c, d1_in, d2_in, ..., dr_in).
        The (n, d1_out, d2_out, ..., dr_out, :) values from the tensor `grid` are the normalized positions for interpolating the values
        at the (n, c, d1_out, d2_out, ..., dr_out) locations from the output tensor `Y` using a specified interpolation method (the mode)
        and a padding mode (for `grid` positions falling outside the 2-dimensional image).

        For example, the values in `grid[n, h_out, w_out, :]` are size-2 vectors specifying normalized positions in the 2-dimensional space of `X`.
        They are used to interpolate output values of `Y[n, c, h_out, w_out]`.

        The GridSample operator is often used in doing grid generator and sampler in the
        [Spatial Transformer Networks](https://arxiv.org/abs/1506.02025).
        See also in [torch.nn.functional.grid_sample](https://pytorch.org/docs/stable/generated/torch.nn.functional.grid_sample.html).


        Args:
            X: (differentiable) Input tensor of rank r+2 that has shape (N, C, D1, D2,
                ..., Dr), where N is the batch size, C is the number of channels, D1,
                D2, ..., Dr are the spatial dimensions.

            grid: (non-differentiable) Input offset of shape (N, D1_out, D2_out, ...,
                Dr_out, r), where D1_out, D2_out, ..., Dr_out are the spatial dimensions
                of the grid and output, and r is the number of spatial dimensions. Grid
                specifies the sampling locations normalized by the input spatial
                dimensions. Therefore, it should have most values in the range of [-1,
                1]. If the grid has values outside the range of [-1, 1], the
                corresponding outputs will be handled as defined by padding_mode.
                Following computer vision convention, the coordinates in the length-r
                location vector are listed from the innermost tensor dimension to the
                outermost, the opposite of regular tensor indexing.

            align_corners: If align_corners=1, the extrema (-1 and 1) are considered as
                referring to the center points of the input's corner pixels (voxels,
                etc.). If align_corners=0, they are instead considered as referring to
                the corner points of the input's corner pixels (voxels, etc.), making
                the sampling more resolution agnostic.

            mode: Three interpolation modes: linear (default), nearest and cubic. The
                "linear" mode includes linear and N-linear interpolation modes depending
                on the number of spatial dimensions of the input tensor (i.e. linear for
                1 spatial dimension, bilinear for 2 spatial dimensions, etc.). The
                "cubic" mode also includes N-cubic interpolation modes following the
                same rules. The "nearest" mode rounds to the nearest even index when the
                sampling point falls halfway between two indices.

            padding_mode: Support padding modes for outside grid values:
                `zeros`(default), `border`, `reflection`. zeros: use 0 for out-of-bound
                grid locations, border: use border values for out-of-bound grid
                locations, reflection: use values at locations reflected by the border
                for out-of-bound grid locations. If index 0 represents the margin pixel,
                the reflected value at index -1 will be the same as the value at index
                1. For location far away from the border, it will keep being reflected
                until becoming in bound. If pixel location x = -3.5 reflects by border
                -1 and becomes x' = 1.5, then reflects by border 1 and becomes x'' =
                0.5.
        """

        schema = get_schema("GridSample", 22, "")
        op = Op(self, "GridSample", schema)
        return op(
            *self._prepare_inputs(schema, X, grid),
            align_corners=align_corners,
            mode=mode,
            padding_mode=padding_mode,
        )

    T_HardSigmoid = TypeVar("T_HardSigmoid", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    def HardSigmoid(
        self, X: T_HardSigmoid, *, alpha: float = 0.20000000298023224, beta: float = 0.5
    ) -> T_HardSigmoid:
        r"""[🌐 HardSigmoid(22)](https://onnx.ai/onnx/operators/onnx__HardSigmoid.html#hardsigmoid-22 "Online Documentation")


        HardSigmoid takes one input data (Tensor<T>) and produces one output data
        (Tensor<T>) where the HardSigmoid function, y = max(0, min(1, alpha * x + beta)),
        is applied to the tensor elementwise.


        Args:
            X: (differentiable) Input tensor

            alpha: Value of alpha.

            beta: Value of beta.
        """

        schema = get_schema("HardSigmoid", 22, "")
        op = Op(self, "HardSigmoid", schema)
        return op(*self._prepare_inputs(schema, X), alpha=alpha, beta=beta)

    T_HardSwish = TypeVar("T_HardSwish", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    def HardSwish(self, X: T_HardSwish) -> T_HardSwish:
        r"""[🌐 HardSwish(22)](https://onnx.ai/onnx/operators/onnx__HardSwish.html#hardswish-22 "Online Documentation")


        HardSwish takes one input data (Tensor<T>) and produces one output data (Tensor<T>) where
        the HardSwish function, y = x * max(0, min(1, alpha * x + beta)) = x * HardSigmoid<alpha, beta>(x),
        where alpha = 1/6 and beta = 0.5, is applied to the tensor elementwise.


        Args:
            X: (differentiable) Input tensor
        """

        schema = get_schema("HardSwish", 22, "")
        op = Op(self, "HardSwish", schema)
        return op(*self._prepare_inputs(schema, X))

    T_InstanceNormalization = TypeVar(
        "T_InstanceNormalization", BFLOAT16, DOUBLE, FLOAT, FLOAT16
    )

    def InstanceNormalization(
        self,
        input: T_InstanceNormalization,
        scale: T_InstanceNormalization,
        B: T_InstanceNormalization,
        *,
        epsilon: float = 9.999999747378752e-06,
    ) -> T_InstanceNormalization:
        r"""[🌐 InstanceNormalization(22)](https://onnx.ai/onnx/operators/onnx__InstanceNormalization.html#instancenormalization-22 "Online Documentation")


        Carries out instance normalization as described in the paper
        https://arxiv.org/abs/1607.08022.

        y = scale * (x - mean) / sqrt(variance + epsilon) + B,
        where mean and variance are computed per instance per channel.



        Args:
            input: (differentiable) Input data tensor from the previous operator;
                dimensions for image case are (N x C x H x W), where N is the batch
                size, C is the number of channels, and H and W are the height and the
                width of the data. For non image case, the dimensions are in the form of
                (N x C x D1 x D2 ... Dn), where N is the batch size.

            scale: (differentiable) The input 1-dimensional scale tensor of size C.

            B: (differentiable) The input 1-dimensional bias tensor of size C.

            epsilon: The epsilon value to use to avoid division by zero.
        """

        schema = get_schema("InstanceNormalization", 22, "")
        op = Op(self, "InstanceNormalization", schema)
        return op(*self._prepare_inputs(schema, input, scale, B), epsilon=epsilon)

    T_LSTM = TypeVar("T_LSTM", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    T1_LSTM: TypeAlias = INT32

    def LSTM(
        self,
        X: T_LSTM,
        W: T_LSTM,
        R: T_LSTM,
        B: Optional[T_LSTM] = None,
        sequence_lens: Optional[T1_LSTM] = None,
        initial_h: Optional[T_LSTM] = None,
        initial_c: Optional[T_LSTM] = None,
        P: Optional[T_LSTM] = None,
        *,
        activation_alpha: Optional[Sequence[float]] = None,
        activation_beta: Optional[Sequence[float]] = None,
        activations: Optional[Sequence[str]] = None,
        clip: Optional[float] = None,
        direction: str = "forward",
        hidden_size: Optional[int] = None,
        input_forget: int = 0,
        layout: int = 0,
    ) -> Tuple[T_LSTM, T_LSTM, T_LSTM]:
        r"""[🌐 LSTM(22)](https://onnx.ai/onnx/operators/onnx__LSTM.html#lstm-22 "Online Documentation")


        Computes an one-layer LSTM. This operator is usually supported via some
        custom implementation such as CuDNN.

        Notations:

        * `X` - input tensor
        * `i` - input gate
        * `o` - output gate
        * `f` - forget gate
        * `c` - cell gate
        * `t` - time step (t-1 means previous time step)
        * `W[iofc]` - W parameter weight matrix for input, output, forget, and cell gates
        * `R[iofc]` - R recurrence weight matrix for input, output, forget, and cell gates
        * `Wb[iofc]` - W bias vectors for input, output, forget, and cell gates
        * `Rb[iofc]` - R bias vectors for input, output, forget, and cell gates
        * `P[iof]`  - P peephole weight vector for input, output, and forget gates
        * `WB[iofc]` - W parameter weight matrix for backward input, output, forget, and cell gates
        * `RB[iofc]` - R recurrence weight matrix for backward input, output, forget, and cell gates
        * `WBb[iofc]` - W bias vectors for backward input, output, forget, and cell gates
        * `RBb[iofc]` - R bias vectors for backward input, output, forget, and cell gates
        * `PB[iof]`  - P peephole weight vector for backward input, output, and forget gates
        * `H` - Hidden state
        * `num_directions` - 2 if direction == bidirectional else 1

        Activation functions:

        * Relu(x)                - max(0, x)
        * Tanh(x)                - (1 - e^{-2x})/(1 + e^{-2x})
        * Sigmoid(x)             - 1/(1 + e^{-x})

        NOTE: Below are optional

        * Affine(x)              - alpha*x + beta
        * LeakyRelu(x)           - x if x >= 0 else alpha * x
        * ThresholdedRelu(x)     - x if x >= alpha else 0
        * ScaledTanh(x)          - alpha*Tanh(beta*x)
        * HardSigmoid(x)         - min(max(alpha*x + beta, 0), 1)
        * Elu(x)                 - x if x >= 0 else alpha*(e^x - 1)
        * Softsign(x)            - x/(1 + |x|)
        * Softplus(x)            - log(1 + e^x)

        Equations (Default: f=Sigmoid, g=Tanh, h=Tanh):

        * it = f(Xt*(Wi^T) + Ht-1*(Ri^T) + Pi (.) Ct-1 + Wbi + Rbi)
        * ft = f(Xt*(Wf^T) + Ht-1*(Rf^T) + Pf (.) Ct-1 + Wbf + Rbf)
        * ct = g(Xt*(Wc^T) + Ht-1*(Rc^T) + Wbc + Rbc)
        * Ct = ft (.) Ct-1 + it (.) ct
        * ot = f(Xt*(Wo^T) + Ht-1*(Ro^T) + Po (.) Ct + Wbo + Rbo)
        * Ht = ot (.) h(Ct)
        This operator has **optional** inputs/outputs. See `ONNX <https://github.com/onnx/onnx/blob/master/docs/IR.md>`_ for more details about the representation of optional arguments. An empty string may be used in the place of an actual argument's name to indicate a missing argument. Trailing optional arguments (those not followed by an argument that is present) may also be simply omitted.


        Args:
            X: (differentiable) The input sequences packed (and potentially padded) into
                one 3-D tensor with the shape of `[seq_length, batch_size, input_size]`.

            W: (differentiable) The weight tensor for the gates. Concatenation of
                `W[iofc]` and `WB[iofc]` (if bidirectional) along dimension 0. The
                tensor has shape `[num_directions, 4*hidden_size, input_size]`.

            R: (differentiable) The recurrence weight tensor. Concatenation of `R[iofc]`
                and `RB[iofc]` (if bidirectional) along dimension 0. This tensor has
                shape `[num_directions, 4*hidden_size, hidden_size]`.

            B: (optional, differentiable) The bias tensor for input gate. Concatenation
                of `[Wb[iofc], Rb[iofc]]`, and `[WBb[iofc], RBb[iofc]]` (if
                bidirectional) along dimension 0. This tensor has shape
                `[num_directions, 8*hidden_size]`. Optional: If not specified - assumed
                to be 0.

            sequence_lens: (optional, non-differentiable) Optional tensor specifying
                lengths of the sequences in a batch. If not specified - assumed all
                sequences in the batch to have length `seq_length`. It has shape
                `[batch_size]`.

            initial_h: (optional, non-differentiable) Optional initial value of the
                hidden. If not specified - assumed to be 0. It has shape
                `[num_directions, batch_size, hidden_size]`.

            initial_c: (optional, non-differentiable) Optional initial value of the
                cell. If not specified - assumed to be 0. It has shape `[num_directions,
                batch_size, hidden_size]`.

            P: (optional, differentiable) The weight tensor for peepholes. Concatenation
                of `P[iof]` and `PB[iof]` (if bidirectional) along dimension 0. It has
                shape `[num_directions, 3*hidde_size]`. Optional: If not specified -
                assumed to be 0.

            activation_alpha: Optional scaling values used by some activation functions.
                The values are consumed in the order of activation functions, for
                example (f, g, h) in LSTM. Default values are the same as of
                corresponding ONNX operators.For example with LeakyRelu, the default
                alpha is 0.01.

            activation_beta: Optional scaling values used by some activation functions.
                The values are consumed in the order of activation functions, for
                example (f, g, h) in LSTM. Default values are the same as of
                corresponding ONNX operators.

            activations: A list of 3 (or 6 if bidirectional) activation functions for
                input, output, forget, cell, and hidden. The activation functions must
                be one of the activation functions specified above. Optional: See the
                equations for default if not specified.

            clip: Cell clip threshold. Clipping bounds the elements of a tensor in the
                range of [-threshold, +threshold] and is applied to the input of
                activations. No clip if not specified.

            direction: Specify if the RNN is forward, reverse, or bidirectional. Must be
                one of forward (default), reverse, or bidirectional.

            hidden_size: Number of neurons in the hidden layer

            input_forget: Couple the input and forget gates if 1.

            layout: The shape format of inputs X, initial_h, initial_c and outputs Y,
                Y_h, Y_c. If 0, the following shapes are expected: X.shape =
                [seq_length, batch_size, input_size], Y.shape = [seq_length,
                num_directions, batch_size, hidden_size], initial_h.shape = Y_h.shape =
                initial_c.shape = Y_c.shape = [num_directions, batch_size, hidden_size].
                If 1, the following shapes are expected: X.shape = [batch_size,
                seq_length, input_size], Y.shape = [batch_size, seq_length,
                num_directions, hidden_size], initial_h.shape = Y_h.shape =
                initial_c.shape = Y_c.shape = [batch_size, num_directions, hidden_size].
        """

        schema = get_schema("LSTM", 22, "")
        op = Op(self, "LSTM", schema)
        return op(
            *self._prepare_inputs(schema, X, W, R, B, sequence_lens, initial_h, initial_c, P),
            activation_alpha=activation_alpha,
            activation_beta=activation_beta,
            activations=activations,
            clip=clip,
            direction=direction,
            hidden_size=hidden_size,
            input_forget=input_forget,
            layout=layout,
        )

    T_LpNormalization = TypeVar("T_LpNormalization", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    def LpNormalization(
        self, input: T_LpNormalization, *, axis: int = -1, p: int = 2
    ) -> T_LpNormalization:
        r"""[🌐 LpNormalization(22)](https://onnx.ai/onnx/operators/onnx__LpNormalization.html#lpnormalization-22 "Online Documentation")


        Given a matrix, apply Lp-normalization along the provided axis.


        Args:
            input: (differentiable) Input matrix

            axis: The axis on which to apply normalization, -1 mean last axis.

            p: The order of the normalization, only 1 or 2 are supported.
        """

        schema = get_schema("LpNormalization", 22, "")
        op = Op(self, "LpNormalization", schema)
        return op(*self._prepare_inputs(schema, input), axis=axis, p=p)

    T_LpPool = TypeVar("T_LpPool", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    def LpPool(
        self,
        X: T_LpPool,
        *,
        auto_pad: str = "NOTSET",
        ceil_mode: int = 0,
        dilations: Optional[Sequence[int]] = None,
        kernel_shape: Sequence[int],
        p: int = 2,
        pads: Optional[Sequence[int]] = None,
        strides: Optional[Sequence[int]] = None,
    ) -> T_LpPool:
        r"""[🌐 LpPool(22)](https://onnx.ai/onnx/operators/onnx__LpPool.html#lppool-22 "Online Documentation")


         LpPool consumes an input tensor X and applies Lp pooling across
         the tensor according to kernel sizes, stride sizes, and pad lengths.
         Lp pooling consisting of computing the Lp norm on all values of a subset
         of the input tensor according to the kernel size and downsampling the
         data into the output tensor Y for further processing. The output spatial shape will be following:
         ```
         output_spatial_shape[i] = floor((input_spatial_shape[i] + pad_shape[i] - {kernelSpatialShape}) / strides_spatial_shape[i] + 1)
         ```
         or
         ```
         output_spatial_shape[i] = ceil((input_spatial_shape[i] + pad_shape[i] - {kernelSpatialShape}) / strides_spatial_shape[i] + 1)
         ```
         if ceil_mode is enabled `pad_shape[i]` is the sum of pads along axis `i`.

         `auto_pad` is a DEPRECATED attribute. If you are using them currently, the output spatial shape will be following:
         ```
         VALID: output_spatial_shape[i] = ceil((input_spatial_shape[i] - {kernelSpatialShape} + 1) / strides_spatial_shape[i])
         SAME_UPPER or SAME_LOWER: output_spatial_shape[i] = ceil(input_spatial_shape[i] / strides_spatial_shape[i])
         ```
         And pad shape will be following if `SAME_UPPER` or `SAME_LOWER`:
         ```
         pad_shape[i] = (output_spatial_shape[i] - 1) * strides_spatial_shape[i] + {kernelSpatialShape} - input_spatial_shape[i]
         ```

        Args:
            X: (differentiable) Input data tensor from the previous operator; dimensions
                for image case are (N x C x H x W), where N is the batch size, C is the
                number of channels, and H and W are the height and the width of the
                data. For non image case, the dimensions are in the form of (N x C x D1
                x D2 ... Dn), where N is the batch size.

            auto_pad: auto_pad must be either NOTSET, SAME_UPPER, SAME_LOWER or VALID.
                Where default value is NOTSET, which means explicit padding is used.
                SAME_UPPER or SAME_LOWER mean pad the input so that `output_shape[i] =
                ceil(input_shape[i] / strides[i])` for each axis `i`. The padding is
                split between the two sides equally or almost equally (depending on
                whether it is even or odd). In case the padding is an odd number, the
                extra padding is added at the end for SAME_UPPER and at the beginning
                for SAME_LOWER.

            ceil_mode: Whether to use ceil or floor (default) to compute the output
                shape.

            dilations: dilation value along each spatial axis of the filter. If not
                present, the dilation defaults is 1 along each spatial axis.

            kernel_shape: The size of the kernel along each axis.

            p: p value of the Lp norm used to pool over the input data.

            pads: Padding for the beginning and ending along each spatial axis, it can
                take any value greater than or equal to 0. The value represent the
                number of pixels added to the beginning and end part of the
                corresponding axis. `pads` format should be as follow [x1_begin,
                x2_begin...x1_end, x2_end,...], where xi_begin the number of pixels
                added at the beginning of axis `i` and xi_end, the number of pixels
                added at the end of axis `i`. This attribute cannot be used
                simultaneously with auto_pad attribute. If not present, the padding
                defaults to 0 along start and end of each spatial axis.

            strides: Stride along each spatial axis. If not present, the stride defaults
                to 1 along each spatial axis.
        """

        schema = get_schema("LpPool", 22, "")
        op = Op(self, "LpPool", schema)
        return op(
            *self._prepare_inputs(schema, X),
            auto_pad=auto_pad,
            ceil_mode=ceil_mode,
            dilations=dilations,
            kernel_shape=kernel_shape,
            p=p,
            pads=pads,
            strides=strides,
        )

    T_MaxPool = TypeVar("T_MaxPool", BFLOAT16, DOUBLE, FLOAT, FLOAT16, INT8, UINT8)

    I_MaxPool: TypeAlias = INT64

    def MaxPool(
        self,
        X: T_MaxPool,
        *,
        auto_pad: str = "NOTSET",
        ceil_mode: int = 0,
        dilations: Optional[Sequence[int]] = None,
        kernel_shape: Sequence[int],
        pads: Optional[Sequence[int]] = None,
        storage_order: int = 0,
        strides: Optional[Sequence[int]] = None,
    ) -> Tuple[T_MaxPool, I_MaxPool]:
        r"""[🌐 MaxPool(22)](https://onnx.ai/onnx/operators/onnx__MaxPool.html#maxpool-22 "Online Documentation")


         MaxPool consumes an input tensor X and applies max pooling across
         the tensor according to kernel sizes, stride sizes, and pad lengths.
         max pooling consisting of computing the max on all values of a
         subset of the input tensor according to the kernel size and downsampling the
         data into the output tensor Y for further processing. The output spatial shape is calculated differently
         depending on whether explicit padding is used, where pads is employed, or auto padding is used, where auto_pad is utilized.
         With explicit padding (https://pytorch.org/docs/stable/generated/torch.nn.MaxPool2d.html?highlight=maxpool#torch.nn.MaxPool2d):
         ```
         output_spatial_shape[i] = floor((input_spatial_shape[i] + pad_shape[i] - dilation[i] * (kernel_shape[i] - 1) - 1) / strides_spatial_shape[i] + 1)
         ```
         or
         ```
         output_spatial_shape[i] = ceil((input_spatial_shape[i] + pad_shape[i] - dilation[i] * (kernel_shape[i] - 1) - 1) / strides_spatial_shape[i] + 1)
         ```
         if ceil_mode is enabled. `pad_shape[i]` is the sum of pads along axis `i`. Sliding windows that would start in the right padded region are ignored.

         `auto_pad` is a DEPRECATED attribute. If you are using them currently, the output spatial shape will be following when ceil_mode is enabled:
         ```
         VALID: output_spatial_shape[i] = ceil((input_spatial_shape[i] - ((kernel_spatial_shape[i] - 1) * dilations[i] + 1) + 1) / strides_spatial_shape[i])
         SAME_UPPER or SAME_LOWER: output_spatial_shape[i] = ceil(input_spatial_shape[i] / strides_spatial_shape[i])
         ```
         or when ceil_mode is disabled (https://www.tensorflow.org/api_docs/python/tf/keras/layers/AveragePooling2D):
         ```
         VALID: output_spatial_shape[i] = floor((input_spatial_shape[i] - ((kernel_spatial_shape[i] - 1) * dilations[i] + 1)) / strides_spatial_shape[i]) + 1
         SAME_UPPER or SAME_LOWER: output_spatial_shape[i] = floor((input_spatial_shape[i] - 1) / strides_spatial_shape[i]) + 1
         ```
         And pad shape will be following if `SAME_UPPER` or `SAME_LOWER`:
         ```
         pad_shape[i] = (output_spatial_shape[i] - 1) * strides_spatial_shape[i] + ((kernel_spatial_shape[i] - 1) * dilations[i] + 1) - input_spatial_shape[i]
         ```
         The output of each pooling window is maximum number of elements exclude pad.


        Args:
            X: (differentiable) Input data tensor from the previous operator; dimensions
                for image case are (N x C x H x W), where N is the batch size, C is the
                number of channels, and H and W are the height and the width of the
                data. For non image case, the dimensions are in the form of (N x C x D1
                x D2 ... Dn), where N is the batch size. Optionally, if dimension
                denotation is in effect, the operation expects the input data tensor to
                arrive with the dimension denotation of [DATA_BATCH, DATA_CHANNEL,
                DATA_FEATURE, DATA_FEATURE ...].

            auto_pad: auto_pad must be either NOTSET, SAME_UPPER, SAME_LOWER or VALID.
                Where default value is NOTSET, which means explicit padding is used.
                SAME_UPPER or SAME_LOWER mean pad the input so that `output_shape[i] =
                ceil(input_shape[i] / strides[i])` for each axis `i`. The padding is
                split between the two sides equally or almost equally (depending on
                whether it is even or odd). In case the padding is an odd number, the
                extra padding is added at the end for SAME_UPPER and at the beginning
                for SAME_LOWER.

            ceil_mode: Whether to use ceil or floor (default) to compute the output
                shape.

            dilations: Dilation value along each spatial axis of filter. If not present,
                the dilation defaults to 1 along each spatial axis.

            kernel_shape: The size of the kernel along each axis.

            pads: Padding for the beginning and ending along each spatial axis, it can
                take any value greater than or equal to 0. The value represent the
                number of pixels added to the beginning and end part of the
                corresponding axis. `pads` format should be as follow [x1_begin,
                x2_begin...x1_end, x2_end,...], where xi_begin the number of pixels
                added at the beginning of axis `i` and xi_end, the number of pixels
                added at the end of axis `i`. This attribute cannot be used
                simultaneously with auto_pad attribute. If not present, the padding
                defaults to 0 along start and end of each spatial axis.

            storage_order: The storage order of the tensor. 0 is row major, and 1 is
                column major. This attribute is used only to convert an n-tuple index
                value into a single integer value for producing the second output.

            strides: Stride along each spatial axis. If not present, the stride defaults
                to 1 along each spatial axis.
        """

        schema = get_schema("MaxPool", 22, "")
        op = Op(self, "MaxPool", schema)
        return op(
            *self._prepare_inputs(schema, X),
            auto_pad=auto_pad,
            ceil_mode=ceil_mode,
            dilations=dilations,
            kernel_shape=kernel_shape,
            pads=pads,
            storage_order=storage_order,
            strides=strides,
        )

    T_MaxRoiPool = TypeVar("T_MaxRoiPool", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    def MaxRoiPool(
        self,
        X: T_MaxRoiPool,
        rois: T_MaxRoiPool,
        *,
        pooled_shape: Sequence[int],
        spatial_scale: float = 1.0,
    ) -> T_MaxRoiPool:
        r"""[🌐 MaxRoiPool(22)](https://onnx.ai/onnx/operators/onnx__MaxRoiPool.html#maxroipool-22 "Online Documentation")


         ROI max pool consumes an input tensor X and region of interests (RoIs) to
         apply max pooling across each RoI, to produce output 4-D tensor of shape
         (num_rois, channels, pooled_shape[0], pooled_shape[1]).

        Args:
            X: (differentiable) Input data tensor from the previous operator; dimensions
                for image case are (N x C x H x W), where N is the batch size, C is the
                number of channels, and H and W are the height and the width of the
                data.

            rois: (non-differentiable) RoIs (Regions of Interest) to pool over. Should
                be a 2-D tensor of shape (num_rois, 5) given as [[batch_id, x1, y1, x2,
                y2], ...].

            pooled_shape: ROI pool output shape (height, width).

            spatial_scale: Multiplicative spatial scale factor to translate ROI
                coordinates from their input scale to the scale used when pooling.
        """

        schema = get_schema("MaxRoiPool", 22, "")
        op = Op(self, "MaxRoiPool", schema)
        return op(
            *self._prepare_inputs(schema, X, rois),
            pooled_shape=pooled_shape,
            spatial_scale=spatial_scale,
        )

    T1_MaxUnpool = TypeVar("T1_MaxUnpool", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    T2_MaxUnpool: TypeAlias = INT64

    def MaxUnpool(
        self,
        X: T1_MaxUnpool,
        I: T2_MaxUnpool,
        output_shape: Optional[T2_MaxUnpool] = None,
        *,
        kernel_shape: Sequence[int],
        pads: Optional[Sequence[int]] = None,
        strides: Optional[Sequence[int]] = None,
    ) -> T1_MaxUnpool:
        r"""[🌐 MaxUnpool(22)](https://onnx.ai/onnx/operators/onnx__MaxUnpool.html#maxunpool-22 "Online Documentation")


        MaxUnpool essentially computes the partial inverse of the MaxPool op.
         The input information to this op is typically the output information from a MaxPool op. The first
         input tensor X is the tensor that needs to be unpooled, which is typically the pooled tensor (first output)
         from MaxPool. The second input tensor, I, contains the indices to the (locally maximal) elements corresponding
         to the elements in the first input tensor X. Input tensor I is typically the second output of the MaxPool op.
         The third (optional) input is a tensor that specifies the output size of the unpooling operation.

        MaxUnpool is intended to do 'partial' inverse of the MaxPool op. 'Partial' because all the non-maximal
         values from the original input to MaxPool are set to zero in the output of the MaxUnpool op. Pooling
         the result of an unpooling operation should give back the original input to the unpooling op.

        MaxUnpool can produce the same output size for several input sizes, which makes unpooling op ambiguous.
         The third input argument, output_size, is meant to disambiguate the op and produce output tensor of
         known/predictable size.

        In addition to the inputs, MaxUnpool takes three attributes, namely kernel_shape, strides, and pads,
         which define the exact unpooling op. The attributes typically have the same values as the corresponding
         pooling op that the unpooling op is trying to invert.


        Args:
            X: (differentiable) Input data tensor that has to be unpooled. This tensor
                is typically the first output of the MaxPool op.Dimensions for image
                case are (N x C x H x W), where N is the batch size, C is the number of
                channels, and H and W are the height and the width of the data. For
                non-image case, the dimensions are in the form of (N x C x D1 x D2 ...
                Dn), where N is the batch size. Optionally, if dimension denotation is
                in effect, the operation expects the input data tensor to arrive with
                the dimension denotation of [DATA_BATCH, DATA_CHANNEL, DATA_FEATURE,
                DATA_FEATURE ...].

            I: (non-differentiable) Input data tensor containing the indices
                corresponding to elements in the first input tensor X.This tensor is
                typically the second output of the MaxPool op.Dimensions must be the
                same as input tensor X. The indices are linear, i.e. computed
                considering the tensor as flattened 1-D tensor, assuming row-major
                storage. Also, the linear indices should not consider padding. So the
                values in indices are in the range [0, N x C x D1 x ... x Dn).

            output_shape: (optional, non-differentiable) The shape of the output can be
                explicitly set which will cause pads values to be auto generated. If
                'output_shape' is specified, 'pads' values are ignored.

            kernel_shape: The size of the kernel along each axis.

            pads: Padding for the beginning and ending along each spatial axis, it can
                take any value greater than or equal to 0. The value represent the
                number of pixels added to the beginning and end part of the
                corresponding axis. `pads` format should be as follow [x1_begin,
                x2_begin...x1_end, x2_end,...], where xi_begin the number of pixels
                added at the beginning of axis `i` and xi_end, the number of pixels
                added at the end of axis `i`. This attribute cannot be used
                simultaneously with auto_pad attribute. If not present, the padding
                defaults to 0 along start and end of each spatial axis.

            strides: Stride along each spatial axis. If not present, the stride defaults
                to 1 along each spatial axis.
        """

        schema = get_schema("MaxUnpool", 22, "")
        op = Op(self, "MaxUnpool", schema)
        return op(
            *self._prepare_inputs(schema, X, I, output_shape),
            kernel_shape=kernel_shape,
            pads=pads,
            strides=strides,
        )

    T_Mish = TypeVar("T_Mish", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    def Mish(self, X: T_Mish) -> T_Mish:
        r"""[🌐 Mish(22)](https://onnx.ai/onnx/operators/onnx__Mish.html#mish-22 "Online Documentation")


        Mish: A Self Regularized Non-Monotonic Neural Activation Function.

        Perform the linear unit element-wise on the input tensor X using formula:

        ::

            mish(x) = x * tanh(softplus(x)) = x * tanh(ln(1 + e^{x}))




        Args:
            X: (differentiable) Input tensor
        """

        schema = get_schema("Mish", 22, "")
        op = Op(self, "Mish", schema)
        return op(*self._prepare_inputs(schema, X))

    T1_Multinomial = TypeVar("T1_Multinomial", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    T2_Multinomial: TypeAlias = Union[INT32, INT64]

    def Multinomial(
        self,
        input: T1_Multinomial,
        *,
        dtype: int = 6,
        sample_size: int = 1,
        seed: Optional[float] = None,
    ) -> T2_Multinomial:
        r"""[🌐 Multinomial(22)](https://onnx.ai/onnx/operators/onnx__Multinomial.html#multinomial-22 "Online Documentation")


        Generate a tensor of samples from a multinomial distribution according to the probabilities
        of each of the possible outcomes.


        Args:
            input: Input tensor with shape [batch_size, class_size], where class_size is
                the number of all possible outcomes. Each value along the axis zero
                represents the unnormalized log-probability of each corresponding
                outcome in a batch.

            dtype: (Optional) The data type for the elements of the output tensor, if
                not specified, we will use int32.

            sample_size: Number of times to sample.

            seed: (Optional) Seed to the random generator, if not specified we will auto
                generate one.
        """

        schema = get_schema("Multinomial", 22, "")
        op = Op(self, "Multinomial", schema)
        return op(
            *self._prepare_inputs(schema, input),
            dtype=dtype,
            sample_size=sample_size,
            seed=seed,
        )

    T_NegativeLogLikelihoodLoss = TypeVar(
        "T_NegativeLogLikelihoodLoss", BFLOAT16, DOUBLE, FLOAT, FLOAT16
    )

    Tind_NegativeLogLikelihoodLoss = TypeVar("Tind_NegativeLogLikelihoodLoss", INT32, INT64)

    def NegativeLogLikelihoodLoss(
        self,
        input: T_NegativeLogLikelihoodLoss,
        target: Tind_NegativeLogLikelihoodLoss,
        weight: Optional[T_NegativeLogLikelihoodLoss] = None,
        *,
        ignore_index: Optional[int] = None,
        reduction: str = "mean",
    ) -> T_NegativeLogLikelihoodLoss:
        r"""[🌐 NegativeLogLikelihoodLoss(22)](https://onnx.ai/onnx/operators/onnx__NegativeLogLikelihoodLoss.html#negativeloglikelihoodloss-22 "Online Documentation")


        A NegativeLogLikelihoodLoss operator computes (weighted) negative log likelihood loss.
        Its "input" tensor has the shape of (N, C, d1, d2, ..., dk) where k >= 0.
        The "input" tensor contains log-probabilities for input[n, :, d_1, d_2,..., d_k] being in a class of [0, C).
        The operator's "target" input tensor has the shape of (N, d1, d2, ..., dk). It encodes class labels (one of C classes)
        or it may contain a special value (indicated by an attribute ignore_index) for N x d1 x d2 x ... x dk samples.
        The loss value for input[n, :, d_1, d_2,...d_k] being classified as class c = target[n][d_1][d_2]...[d_k] is computed as:

        ::

            loss[n][d_1][d_2]...[d_k] = -input[n][c][d_1][d_2]...[d_k].



        When an optional "weight" is provided, the sample loss is calculated as:

        ::

            loss[n][d_1][d_2]...[d_k] = -input[n][c][d_1][d_2]...[d_k] * weight[c].



        loss is zero for the case when target-value equals ignore_index.

        ::

            loss[n][d_1][d_2]...[d_k] = 0, when target[n][d_1][d_2]...[d_k] = ignore_index



        If "reduction" attribute is set to "none", the operator's output will be the above loss with shape (N, d1, d2, ..., dk).
        If "reduction" attribute is set to "mean" (the default attribute value), the output loss is (weight) averaged:

        ::

            mean(loss), if "weight" is not provided,



        or if weight is provided,

        ::

            sum(loss) / sum(weight[target[n][d_1][d_2]...[d_k]]]), for all samples.



        If "reduction" attribute is set to "sum", the output is a scalar: `sum(loss)`.

        See also https://pytorch.org/docs/stable/nn.html#torch.nn.NLLLoss.

        Example 1:

        ::

            // negative log likelihood loss, "none" reduction
            N, C, d1 = 2, 3, 2
            input = [[[1.0, 2.0], [2.0, 2.0], [3.0, 2.0]],
                      [[0.0, 1.0], [2.0, 2.0], [1.0, 2]]]
            target = [[2, 1], [0, 2]]

            loss = np.zeros((N, d1))
            for n in range(N):
                for d_1 in range(d1):
                    c = target[n][d_1]
                    loss[n][d_1] = -input[n][c][d_1]

            // print(loss)
            // [[-3. -2.]
            //  [-0. -2.]]



        Example 2:

        ::

            // weighted negative log likelihood loss, sum reduction
            N, C, d1 = 2, 3, 2
            input = [[[1.0, 2.0], [2.0, 2.0], [3.0, 2.0]],
                    [[0.0, 1.0], [2.0, 2.0], [1.0, 2]]]
            target = [[2, 1], [0, 2]]
            weight = [0.2, 0.3, 0.1]
            loss = np.zeros((N, d1))
            for n in range(N):
                for d_1 in range(d1):
                    c = target[n][d_1]
                    loss[n][d_1] = -input[n][c][d_1] * weight[c]

            loss = np.sum(loss)
            // print(loss)
            // -1.1



        Example 3:

        ::

            // weighted negative log likelihood loss, mean reduction
            N, C, d1 = 2, 3, 2
            input = [[[1.0, 2.0], [2.0, 2.0], [3.0, 2.0]],
                    [[0.0, 1.0], [2.0, 2.0], [1.0, 2]]]
            target = [[2, 1], [0, 2]]
            weight = [0.2, 0.3, 0.1]
            loss = np.zeros((N, d1))
            weight_total = 0
            for n in range(N):
                for d_1 in range(d1):
                    c = target[n][d_1]
                    loss[n][d_1] = -input[n][c][d_1] * weight[c]
                    weight_total = weight_total + weight[c]

            loss = np.sum(loss) / weight_total
            // print(loss)
            // -1.57




        Args:
            input: (differentiable) Input tensor of shape (N, C) or (N, C, d1, d2, ...,
                dk).

            target: (non-differentiable) Target tensor of shape (N) or (N, d1, d2, ...,
                dk). Target element value shall be in range of [0, C). If ignore_index
                is specified, it may have a value outside [0, C) and the target values
                should either be in the range [0, C) or have the value ignore_index.

            weight: (optional, non-differentiable) Optional rescaling weight tensor. If
                given, it has to be a tensor of size C. Otherwise, it is treated as if
                having all ones.

            ignore_index: Specifies a target value that is ignored and does not
                contribute to the input gradient. It's an optional value.

            reduction: Type of reduction to apply to loss: none, sum, mean (default).
                'none': the output is the loss for each sample. 'sum': the output will
                be summed. 'mean': the sum of the output will be divided by the sum of
                applied weights.
        """

        schema = get_schema("NegativeLogLikelihoodLoss", 22, "")
        op = Op(self, "NegativeLogLikelihoodLoss", schema)
        return op(
            *self._prepare_inputs(schema, input, target, weight),
            ignore_index=ignore_index,
            reduction=reduction,
        )

    T_RNN = TypeVar("T_RNN", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    T1_RNN: TypeAlias = INT32

    def RNN(
        self,
        X: T_RNN,
        W: T_RNN,
        R: T_RNN,
        B: Optional[T_RNN] = None,
        sequence_lens: Optional[T1_RNN] = None,
        initial_h: Optional[T_RNN] = None,
        *,
        activation_alpha: Optional[Sequence[float]] = None,
        activation_beta: Optional[Sequence[float]] = None,
        activations: Sequence[str] = ("Tanh", "Tanh"),
        clip: Optional[float] = None,
        direction: str = "forward",
        hidden_size: Optional[int] = None,
        layout: int = 0,
    ) -> Tuple[T_RNN, T_RNN]:
        r"""[🌐 RNN(22)](https://onnx.ai/onnx/operators/onnx__RNN.html#rnn-22 "Online Documentation")


        Computes an one-layer simple RNN. This operator is usually supported
        via some custom implementation such as CuDNN.

        Notations:

        * `X` - input tensor
        * `i` - input gate
        * `t` - time step (t-1 means previous time step)
        * `Wi` - W parameter weight matrix for input gate
        * `Ri` - R recurrence weight matrix for input gate
        * `Wbi` - W parameter bias vector for input gate
        * `Rbi` - R parameter bias vector for input gate
        * `WBi` - W parameter weight matrix for backward input gate
        * `RBi` - R recurrence weight matrix for backward input gate
        * `WBbi` - WR bias vectors for backward input gate
        * `RBbi` - RR bias vectors for backward input gate
        * `H` - Hidden state
        * `num_directions` - 2 if direction == bidirectional else 1

        Activation functions:

        * Relu(x)                - max(0, x)
        * Tanh(x)                - (1 - e^{-2x})/(1 + e^{-2x})
        * Sigmoid(x)             - 1/(1 + e^{-x})

        NOTE: Below are optional

        * Affine(x)              - alpha*x + beta
        * LeakyRelu(x)           - x if x >= 0 else alpha * x
        * ThresholdedRelu(x)     - x if x >= alpha else 0
        * ScaledTanh(x)          - alpha*Tanh(beta*x)
        * HardSigmoid(x)         - min(max(alpha*x + beta, 0), 1)
        * Elu(x)                 - x if x >= 0 else alpha*(e^x - 1)
        * Softsign(x)            - x/(1 + |x|)
        * Softplus(x)            - log(1 + e^x)

        Equations (Default: f=Tanh):

        * Ht = f(Xt*(Wi^T) + Ht-1*(Ri^T) + Wbi + Rbi)
        This operator has **optional** inputs/outputs. See `ONNX <https://github.com/onnx/onnx/blob/master/docs/IR.md>`_ for more details about the representation of optional arguments. An empty string may be used in the place of an actual argument's name to indicate a missing argument. Trailing optional arguments (those not followed by an argument that is present) may also be simply omitted.


        Args:
            X: (differentiable) The input sequences packed (and potentially padded) into
                one 3-D tensor with the shape of `[seq_length, batch_size, input_size]`.

            W: (differentiable) The weight tensor for input gate. Concatenation of `Wi`
                and `WBi` (if bidirectional). The tensor has shape `[num_directions,
                hidden_size, input_size]`.

            R: (differentiable) The recurrence weight tensor. Concatenation of `Ri` and
                `RBi` (if bidirectional). The tensor has shape `[num_directions,
                hidden_size, hidden_size]`.

            B: (optional, differentiable) The bias tensor for input gate. Concatenation
                of `[Wbi, Rbi]` and `[WBbi, RBbi]` (if bidirectional). The tensor has
                shape `[num_directions, 2*hidden_size]`. Optional: If not specified -
                assumed to be 0.

            sequence_lens: (optional, non-differentiable) Optional tensor specifying
                lengths of the sequences in a batch. If not specified - assumed all
                sequences in the batch to have length `seq_length`. It has shape
                `[batch_size]`.

            initial_h: (optional, non-differentiable) Optional initial value of the
                hidden. If not specified - assumed to be 0. It has shape
                `[num_directions, batch_size, hidden_size]`.

            activation_alpha: Optional scaling values used by some activation functions.
                The values are consumed in the order of activation functions, for
                example (f, g, h) in LSTM. Default values are the same as of
                corresponding ONNX operators.For example with LeakyRelu, the default
                alpha is 0.01.

            activation_beta: Optional scaling values used by some activation functions.
                The values are consumed in the order of activation functions, for
                example (f, g, h) in LSTM. Default values are the same as of
                corresponding ONNX operators.

            activations: One (or two if bidirectional) activation function for input
                gate. The activation function must be one of the activation functions
                specified above. Optional: Default `Tanh` if not specified.

            clip: Cell clip threshold. Clipping bounds the elements of a tensor in the
                range of [-threshold, +threshold] and is applied to the input of
                activations. No clip if not specified.

            direction: Specify if the RNN is forward, reverse, or bidirectional. Must be
                one of forward (default), reverse, or bidirectional.

            hidden_size: Number of neurons in the hidden layer

            layout: The shape format of inputs X, initial_h and outputs Y, Y_h. If 0,
                the following shapes are expected: X.shape = [seq_length, batch_size,
                input_size], Y.shape = [seq_length, num_directions, batch_size,
                hidden_size], initial_h.shape = Y_h.shape = [num_directions, batch_size,
                hidden_size]. If 1, the following shapes are expected: X.shape =
                [batch_size, seq_length, input_size], Y.shape = [batch_size, seq_length,
                num_directions, hidden_size], initial_h.shape = Y_h.shape = [batch_size,
                num_directions, hidden_size].
        """

        schema = get_schema("RNN", 22, "")
        op = Op(self, "RNN", schema)
        return op(
            *self._prepare_inputs(schema, X, W, R, B, sequence_lens, initial_h),
            activation_alpha=activation_alpha,
            activation_beta=activation_beta,
            activations=activations,
            clip=clip,
            direction=direction,
            hidden_size=hidden_size,
            layout=layout,
        )

    T_RandomNormal: TypeAlias = Union[BFLOAT16, DOUBLE, FLOAT, FLOAT16]

    def RandomNormal(
        self,
        *,
        dtype: int = 1,
        mean: float = 0.0,
        scale: float = 1.0,
        seed: Optional[float] = None,
        shape: Sequence[int],
    ) -> T_RandomNormal:
        r"""[🌐 RandomNormal(22)](https://onnx.ai/onnx/operators/onnx__RandomNormal.html#randomnormal-22 "Online Documentation")


        Generate a tensor with random values drawn from a normal distribution. The shape
        of the tensor is specified by the `shape` argument and the parameter of the normal distribution
        specified by `mean` and `scale`.

        The data type is specified by the 'dtype' argument. The 'dtype' argument must
        be one of the data types specified in the 'DataType' enum field in the
        TensorProto message.


        Args:
            dtype: The data type for the elements of the output tensor. Default is
                TensorProto::FLOAT.

            mean: The mean of the normal distribution.

            scale: The standard deviation of the normal distribution.

            seed: (Optional) Seed to the random generator, if not specified we will auto
                generate one.

            shape: The shape of the output tensor.
        """

        schema = get_schema("RandomNormal", 22, "")
        op = Op(self, "RandomNormal", schema)
        return op(dtype=dtype, mean=mean, scale=scale, seed=seed, shape=shape)

    T1_RandomNormalLike = TypeVar(
        "T1_RandomNormalLike",
        BFLOAT16,
        BOOL,
        COMPLEX128,
        COMPLEX64,
        DOUBLE,
        FLOAT,
        FLOAT16,
        INT16,
        INT32,
        INT64,
        INT8,
        STRING,
        UINT16,
        UINT32,
        UINT64,
        UINT8,
    )

    T2_RandomNormalLike: TypeAlias = Union[BFLOAT16, DOUBLE, FLOAT, FLOAT16]

    def RandomNormalLike(
        self,
        input: T1_RandomNormalLike,
        *,
        dtype: Optional[int] = None,
        mean: float = 0.0,
        scale: float = 1.0,
        seed: Optional[float] = None,
    ) -> T2_RandomNormalLike:
        r"""[🌐 RandomNormalLike(22)](https://onnx.ai/onnx/operators/onnx__RandomNormalLike.html#randomnormallike-22 "Online Documentation")


        Generate a tensor with random values drawn from a normal distribution.
        The shape of the output tensor is copied from the shape of the input tensor,
        and the parameters of the normal distribution are specified by `mean` and `scale`.

        The data type is specified by the 'dtype' argument, or copied from the input tensor if not provided.
        The 'dtype' argument must be one of the data types specified in the 'DataType' enum field in the
        TensorProto message, and be valid as an output type.


        Args:
            input: Input tensor to copy shape and optionally type information from.

            dtype: (Optional) The data type for the elements of the output tensor, if
                not specified, we will use the data type of the input tensor.

            mean: The mean of the normal distribution.

            scale: The standard deviation of the normal distribution.

            seed: (Optional) Seed to the random generator, if not specified we will auto
                generate one.
        """

        schema = get_schema("RandomNormalLike", 22, "")
        op = Op(self, "RandomNormalLike", schema)
        return op(
            *self._prepare_inputs(schema, input),
            dtype=dtype,
            mean=mean,
            scale=scale,
            seed=seed,
        )

    T_RandomUniform: TypeAlias = Union[BFLOAT16, DOUBLE, FLOAT, FLOAT16]

    def RandomUniform(
        self,
        *,
        dtype: int = 1,
        high: float = 1.0,
        low: float = 0.0,
        seed: Optional[float] = None,
        shape: Sequence[int],
    ) -> T_RandomUniform:
        r"""[🌐 RandomUniform(22)](https://onnx.ai/onnx/operators/onnx__RandomUniform.html#randomuniform-22 "Online Documentation")


        Generate a tensor with random values drawn from a uniform distribution. The shape
        of the tensor is specified by the `shape` argument and the range by `low` and `high`.

        The data type is specified by the 'dtype' argument. The 'dtype' argument must
        be one of the data types specified in the 'DataType' enum field in the
        TensorProto message.


        Args:
            dtype: The data type for the elements of the output tensor. If not
                specified, default is TensorProto::FLOAT.

            high: Upper boundary of the output values.

            low: Lower boundary of the output values.

            seed: (Optional) Seed to the random generator, if not specified we will auto
                generate one.

            shape: The shape of the output tensor.
        """

        schema = get_schema("RandomUniform", 22, "")
        op = Op(self, "RandomUniform", schema)
        return op(dtype=dtype, high=high, low=low, seed=seed, shape=shape)

    T1_RandomUniformLike = TypeVar(
        "T1_RandomUniformLike",
        BFLOAT16,
        BOOL,
        COMPLEX128,
        COMPLEX64,
        DOUBLE,
        FLOAT,
        FLOAT16,
        INT16,
        INT32,
        INT64,
        INT8,
        STRING,
        UINT16,
        UINT32,
        UINT64,
        UINT8,
    )

    T2_RandomUniformLike: TypeAlias = Union[BFLOAT16, DOUBLE, FLOAT, FLOAT16]

    def RandomUniformLike(
        self,
        input: T1_RandomUniformLike,
        *,
        dtype: Optional[int] = None,
        high: float = 1.0,
        low: float = 0.0,
        seed: Optional[float] = None,
    ) -> T2_RandomUniformLike:
        r"""[🌐 RandomUniformLike(22)](https://onnx.ai/onnx/operators/onnx__RandomUniformLike.html#randomuniformlike-22 "Online Documentation")


        Generate a tensor with random values drawn from a uniform distribution.
        The shape of the output tensor is copied from the shape of the input tensor,
        and the parameters of the uniform distribution are specified by `low` and `high`.

        The data type is specified by the 'dtype' argument, or copied from the input tensor if not provided.
        The 'dtype' argument must be one of the data types specified in the 'DataType' enum field in the
        TensorProto message and be valid as an output type.


        Args:
            input: Input tensor to copy shape and optionally type information from.

            dtype: (Optional) The data type for the elements of the output tensor, if
                not specified, we will use the data type of the input tensor.

            high: Upper boundary of the output values.

            low: Lower boundary of the output values.

            seed: (Optional) Seed to the random generator, if not specified we will auto
                generate one.
        """

        schema = get_schema("RandomUniformLike", 22, "")
        op = Op(self, "RandomUniformLike", schema)
        return op(
            *self._prepare_inputs(schema, input),
            dtype=dtype,
            high=high,
            low=low,
            seed=seed,
        )

    T1_RoiAlign = TypeVar("T1_RoiAlign", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    T2_RoiAlign: TypeAlias = INT64

    def RoiAlign(
        self,
        X: T1_RoiAlign,
        rois: T1_RoiAlign,
        batch_indices: T2_RoiAlign,
        *,
        coordinate_transformation_mode: str = "half_pixel",
        mode: str = "avg",
        output_height: int = 1,
        output_width: int = 1,
        sampling_ratio: int = 0,
        spatial_scale: float = 1.0,
    ) -> T1_RoiAlign:
        r"""[🌐 RoiAlign(22)](https://onnx.ai/onnx/operators/onnx__RoiAlign.html#roialign-22 "Online Documentation")


        Region of Interest (RoI) align operation described in the
        [Mask R-CNN paper](https://arxiv.org/abs/1703.06870).
        RoiAlign consumes an input tensor X and region of interests (rois)
        to apply pooling across each RoI; it produces a 4-D tensor of shape
        (num_rois, C, output_height, output_width).

        RoiAlign is proposed to avoid the misalignment by removing
        quantizations while converting from original image into feature
        map and from feature map into RoI feature; in each ROI bin,
        the value of the sampled locations are computed directly
        through bilinear interpolation.


        Args:
            X: Input data tensor from the previous operator; 4-D feature map of shape
                (N, C, H, W), where N is the batch size, C is the number of channels,
                and H and W are the height and the width of the data.

            rois: RoIs (Regions of Interest) to pool over; rois is 2-D input of shape
                (num_rois, 4) given as [[x1, y1, x2, y2], ...]. The RoIs' coordinates
                are in the coordinate system of the input image. Each coordinate set has
                a 1:1 correspondence with the 'batch_indices' input.

            batch_indices: 1-D tensor of shape (num_rois,) with each element denoting
                the index of the corresponding image in the batch.

            coordinate_transformation_mode: Allowed values are 'half_pixel' and
                'output_half_pixel'. Use the value 'half_pixel' to pixel shift the input
                coordinates by -0.5 (the recommended behavior). Use the value
                'output_half_pixel' to omit the pixel shift for the input (use this for
                a backward-compatible behavior).

            mode: The pooling method. Two modes are supported: 'avg' and 'max'. Default
                is 'avg'.

            output_height: default 1; Pooled output Y's height.

            output_width: default 1; Pooled output Y's width.

            sampling_ratio: Number of sampling points in the interpolation grid used to
                compute the output value of each pooled output bin. If > 0, then exactly
                sampling_ratio x sampling_ratio grid points are used. If == 0, then an
                adaptive number of grid points are used (computed as ceil(roi_width /
                output_width), and likewise for height). Default is 0.

            spatial_scale: Multiplicative spatial scale factor to translate ROI
                coordinates from their input spatial scale to the scale used when
                pooling, i.e., spatial scale of the input feature map X relative to the
                input image. E.g.; default is 1.0f.
        """

        schema = get_schema("RoiAlign", 22, "")
        op = Op(self, "RoiAlign", schema)
        return op(
            *self._prepare_inputs(schema, X, rois, batch_indices),
            coordinate_transformation_mode=coordinate_transformation_mode,
            mode=mode,
            output_height=output_height,
            output_width=output_width,
            sampling_ratio=sampling_ratio,
            spatial_scale=spatial_scale,
        )

    T_Round = TypeVar("T_Round", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    def Round(self, X: T_Round) -> T_Round:
        r"""[🌐 Round(22)](https://onnx.ai/onnx/operators/onnx__Round.html#round-22 "Online Documentation")


        Round takes one input Tensor and rounds the values, element-wise, meaning
        it finds the nearest integer for each value.
        In case of halves, the rule is to round them to the nearest even integer.
        If input x is integral, +0, -0, NaN,  or infinite, x itself is returned.
        The output tensor has the same shape and type as the input.

        Examples:
        ::

            round([0.9]) = [1.0]
            round([2.5]) = [2.0]
            round([2.3]) = [2.0]
            round([1.5]) = [2.0]
            round([-4.5]) = [-4.0]




        Args:
            X: (non-differentiable) Input tensor
        """

        schema = get_schema("Round", 22, "")
        op = Op(self, "Round", schema)
        return op(*self._prepare_inputs(schema, X))

    T_Selu = TypeVar("T_Selu", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    def Selu(
        self,
        X: T_Selu,
        *,
        alpha: float = 1.6732631921768188,
        gamma: float = 1.0507010221481323,
    ) -> T_Selu:
        r"""[🌐 Selu(22)](https://onnx.ai/onnx/operators/onnx__Selu.html#selu-22 "Online Documentation")


        Selu takes one input data (Tensor<T>) and produces one output data
        (Tensor<T>) where the scaled exponential linear unit function,
        `y = gamma * (alpha * e^x - alpha) for x <= 0`, `y = gamma * x for x > 0`,
        is applied to the tensor elementwise.


        Args:
            X: (differentiable) Input tensor

            alpha: Coefficient of SELU default to 1.67326319217681884765625 (i.e.,
                float32 approximation of 1.6732632423543772848170429916717).

            gamma: Coefficient of SELU default to 1.05070102214813232421875 (i.e.,
                float32 approximation of 1.0507009873554804934193349852946).
        """

        schema = get_schema("Selu", 22, "")
        op = Op(self, "Selu", schema)
        return op(*self._prepare_inputs(schema, X), alpha=alpha, gamma=gamma)

    T_Sin = TypeVar("T_Sin", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    def Sin(self, input: T_Sin) -> T_Sin:
        r"""[🌐 Sin(22)](https://onnx.ai/onnx/operators/onnx__Sin.html#sin-22 "Online Documentation")


        Calculates the sine of the given input tensor, element-wise.


        Args:
            input: (differentiable) Input tensor
        """

        schema = get_schema("Sin", 22, "")
        op = Op(self, "Sin", schema)
        return op(*self._prepare_inputs(schema, input))

    T_Sinh = TypeVar("T_Sinh", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    def Sinh(self, input: T_Sinh) -> T_Sinh:
        r"""[🌐 Sinh(22)](https://onnx.ai/onnx/operators/onnx__Sinh.html#sinh-22 "Online Documentation")


        Calculates the hyperbolic sine of the given input tensor element-wise.


        Args:
            input: (differentiable) Input tensor
        """

        schema = get_schema("Sinh", 22, "")
        op = Op(self, "Sinh", schema)
        return op(*self._prepare_inputs(schema, input))

    T_Softplus = TypeVar("T_Softplus", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    def Softplus(self, X: T_Softplus) -> T_Softplus:
        r"""[🌐 Softplus(22)](https://onnx.ai/onnx/operators/onnx__Softplus.html#softplus-22 "Online Documentation")


        Softplus takes one input data (Tensor<T>) and produces one output data
        (Tensor<T>) where the softplus function, y = ln(exp(x) + 1), is applied to
        the tensor elementwise.


        Args:
            X: (differentiable) Input tensor
        """

        schema = get_schema("Softplus", 22, "")
        op = Op(self, "Softplus", schema)
        return op(*self._prepare_inputs(schema, X))

    T_Softsign = TypeVar("T_Softsign", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    def Softsign(self, input: T_Softsign) -> T_Softsign:
        r"""[🌐 Softsign(22)](https://onnx.ai/onnx/operators/onnx__Softsign.html#softsign-22 "Online Documentation")


        Calculates the softsign (x/(1+|x|)) of the given input tensor element-wise.


        Args:
            input: (differentiable) Input tensor
        """

        schema = get_schema("Softsign", 22, "")
        op = Op(self, "Softsign", schema)
        return op(*self._prepare_inputs(schema, input))

    T_Tan = TypeVar("T_Tan", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    def Tan(self, input: T_Tan) -> T_Tan:
        r"""[🌐 Tan(22)](https://onnx.ai/onnx/operators/onnx__Tan.html#tan-22 "Online Documentation")


        Calculates the tangent of the given input tensor, element-wise.


        Args:
            input: (differentiable) Input tensor
        """

        schema = get_schema("Tan", 22, "")
        op = Op(self, "Tan", schema)
        return op(*self._prepare_inputs(schema, input))

    T_ThresholdedRelu = TypeVar("T_ThresholdedRelu", BFLOAT16, DOUBLE, FLOAT, FLOAT16)

    def ThresholdedRelu(
        self, X: T_ThresholdedRelu, *, alpha: float = 1.0
    ) -> T_ThresholdedRelu:
        r"""[🌐 ThresholdedRelu(22)](https://onnx.ai/onnx/operators/onnx__ThresholdedRelu.html#thresholdedrelu-22 "Online Documentation")


        ThresholdedRelu takes one input data (Tensor<T>) and produces one output data
        (Tensor<T>) where the rectified linear function, y = x for x > alpha, y = 0 otherwise,
        is applied to the tensor elementwise.


        Args:
            X: (differentiable) Input tensor

            alpha: Threshold value
        """

        schema = get_schema("ThresholdedRelu", 22, "")
        op = Op(self, "ThresholdedRelu", schema)
        return op(*self._prepare_inputs(schema, X), alpha=alpha)
