# Automatically generated by Numbast Static Binding Generator
# Generator Information:
# Ast_canopy version: 0.3.0
# Numbast version: 0.3.0
# Generation command: /home/wangm/numbast/numbast/src/numbast/__main__.py --cfg-path configs/cuda_bf16.yml --output-dir numba_cuda/numba/cuda/_internal
# Static binding generator parameters: {'cfg_path': 'configs/cuda_bf16.yml', 'output_dir': 'numba_cuda/numba/cuda/', 'entry_point': None, 'retain': None, 'types': None, 'datamodels': None, 'compute_capability': None, 'run_ruff_format': True}
# Config file path (relative to the path of the generated binding): ../../../../configs/cuda_bf16.yml
# Cudatoolkit version: (12, 8)
# Default CUDA_HOME path: /home/wangm/micromamba/envs/numbast


# Imports:
import io
import operator
import os

import numba
from llvmlite import ir
from numba import types
from numba.core.datamodel import PrimitiveModel, StructModel
from numba.core.extending import (
    lower_cast,
    make_attribute_wrapper,
    register_model,
)
from numba.core.typing import signature
from numba.core.typing.templates import AttributeTemplate, ConcreteTemplate
from numba.cuda import CUSource, declare_device
from numba.cuda.cudadecl import register, register_attr, register_global
from numba.cuda.cudaimpl import lower
from numba.extending import as_numba_type
from numba.types import (
    CPointer,
    Function,
    Number,
    Type,
    bool_,
    float16,
    float32,
    float64,
    int8,
    int16,
    int32,
    int64,
    uint8,
    uint16,
    uint32,
    uint64,
)

# Setups:

# Shim Stream:


class _KeyedStringIO(io.StringIO):
    def __init__(self, *arg, **kwarg):
        super().__init__(*arg, *kwarg)
        self._keys = set()

    def write_with_key(self, key: str, value: str):
        if key in self._keys:
            return
        self._keys.add(key)
        self.write(value)

    def reset(self):
        self._keys.clear()
        self.seek(0)


shim_defines = ""
shim_include = "#include <" + "cuda_bf16.h" + ">"
shim_prefix = shim_defines + "\n" + shim_include
shim_stream = _KeyedStringIO()
shim_stream.write(shim_prefix)
shim_obj = CUSource(shim_stream)

# Enums:


# Structs:


# Typing for unnamed1401637
class _type_class_unnamed1401637(Type):
    def __init__(self):
        super().__init__(name="unnamed1401637")
        self.alignof_ = 2
        self.bitwidth = 2 * 8


_type_unnamed1401637 = _type_class_unnamed1401637()


# Make Python API for struct
unnamed1401637 = type("unnamed1401637", (), {"_nbtype": _type_unnamed1401637})

as_numba_type.register(unnamed1401637, _type_unnamed1401637)


@register_model(_type_class_unnamed1401637)
class _model_unnamed1401637(StructModel):
    def __init__(self, dmm, fe_type):
        members = [("x", uint16)]
        super().__init__(dmm, fe_type, members)


@register_attr
class _attr_typing_unnamed1401637(AttributeTemplate):
    key = globals()["unnamed1401637"]

    def resolve_x(self, obj):
        return uint16


make_attribute_wrapper(_type_class_unnamed1401637, "x", "x")


@register
class _ctor_template_unnamed1401637(ConcreteTemplate):
    key = globals()["unnamed1401637"]
    cases = []


register_global(unnamed1401637, Function(_ctor_template_unnamed1401637))


# Typing for unnamed1401746
class _type_class_unnamed1401746(Type):
    def __init__(self):
        super().__init__(name="unnamed1401746")
        self.alignof_ = 4
        self.bitwidth = 4 * 8


_type_unnamed1401746 = _type_class_unnamed1401746()


# Make Python API for struct
unnamed1401746 = type("unnamed1401746", (), {"_nbtype": _type_unnamed1401746})

as_numba_type.register(unnamed1401746, _type_unnamed1401746)


@register_model(_type_class_unnamed1401746)
class _model_unnamed1401746(StructModel):
    def __init__(self, dmm, fe_type):
        members = [("x", uint16), ("y", uint16)]
        super().__init__(dmm, fe_type, members)


@register_attr
class _attr_typing_unnamed1401746(AttributeTemplate):
    key = globals()["unnamed1401746"]

    def resolve_x(self, obj):
        return uint16

    def resolve_y(self, obj):
        return uint16


make_attribute_wrapper(_type_class_unnamed1401746, "x", "x")


make_attribute_wrapper(_type_class_unnamed1401746, "y", "y")


@register
class _ctor_template_unnamed1401746(ConcreteTemplate):
    key = globals()["unnamed1401746"]
    cases = []


register_global(unnamed1401746, Function(_ctor_template_unnamed1401746))


# Typing for __nv_bfloat16
class _type_class___nv_bfloat16(Number):
    def __init__(self):
        super().__init__(name="__nv_bfloat16")
        self.alignof_ = 2
        self.bitwidth = 2 * 8


_type___nv_bfloat16 = _type_class___nv_bfloat16()


# Make Python API for struct
__nv_bfloat16 = type("__nv_bfloat16", (), {"_nbtype": _type___nv_bfloat16})

as_numba_type.register(__nv_bfloat16, _type___nv_bfloat16)


@register_model(_type_class___nv_bfloat16)
class _model___nv_bfloat16(PrimitiveModel):
    def __init__(self, dmm, fe_type):
        be_type = ir.IntType(fe_type.bitwidth)
        super(_model___nv_bfloat16, self).__init__(dmm, fe_type, be_type)


def _lower___nv_bfloat16_void(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16____nv_bfloat16_1(int &ignore, __nv_bfloat16 *self ) {
        new (self) __nv_bfloat16();
        return 0;
    }
        """

    _ctor_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16____nv_bfloat16_1",
        int32(
            CPointer(_type___nv_bfloat16),
        ),
    )

    def __nv_bfloat16_device_caller(arg_0):
        return _ctor_decl___nv_bfloat16(arg_0)

    @lower(
        __nv_bfloat16,
    )
    def ctor_impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16____nv_bfloat16_1", shim_raw_str
        )
        selfptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        argptrs = [
            builder.alloca(context.get_value_type(arg)) for arg in sig.args
        ]
        for ptr, ty, arg in zip(argptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        context.compile_internal(
            builder,
            __nv_bfloat16_device_caller,
            signature(
                int32,
                CPointer(_type___nv_bfloat16),
            ),
            (selfptr, *argptrs),
        )
        return builder.load(
            selfptr, align=getattr(_type___nv_bfloat16, "alignof_", None)
        )


_lower___nv_bfloat16_void(shim_stream, shim_obj)


def _lower___nv_bfloat16__type_unnamed1401637(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16____nv_bfloat16_2(int &ignore, __nv_bfloat16 *self , __nv_bfloat16_raw* hr) {
        new (self) __nv_bfloat16(*hr);
        return 0;
    }
        """

    _ctor_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16____nv_bfloat16_2",
        int32(CPointer(_type___nv_bfloat16), CPointer(_type_unnamed1401637)),
    )

    def __nv_bfloat16_device_caller(arg_0, arg_1):
        return _ctor_decl___nv_bfloat16(arg_0, arg_1)

    @lower(__nv_bfloat16, _type_unnamed1401637)
    def ctor_impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16____nv_bfloat16_2", shim_raw_str
        )
        selfptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        argptrs = [
            builder.alloca(context.get_value_type(arg)) for arg in sig.args
        ]
        for ptr, ty, arg in zip(argptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        context.compile_internal(
            builder,
            __nv_bfloat16_device_caller,
            signature(
                int32,
                CPointer(_type___nv_bfloat16),
                CPointer(_type_unnamed1401637),
            ),
            (selfptr, *argptrs),
        )
        return builder.load(
            selfptr, align=getattr(_type___nv_bfloat16, "alignof_", None)
        )


_lower___nv_bfloat16__type_unnamed1401637(shim_stream, shim_obj)


def _lower___nv_bfloat16_float16(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16____nv_bfloat16_3(int &ignore, __nv_bfloat16 *self , __half* f) {
        new (self) __nv_bfloat16(*f);
        return 0;
    }
        """

    _ctor_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16____nv_bfloat16_3",
        int32(CPointer(_type___nv_bfloat16), CPointer(float16)),
    )

    def __nv_bfloat16_device_caller(arg_0, arg_1):
        return _ctor_decl___nv_bfloat16(arg_0, arg_1)

    @lower(__nv_bfloat16, float16)
    def ctor_impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16____nv_bfloat16_3", shim_raw_str
        )
        selfptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        argptrs = [
            builder.alloca(context.get_value_type(arg)) for arg in sig.args
        ]
        for ptr, ty, arg in zip(argptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        context.compile_internal(
            builder,
            __nv_bfloat16_device_caller,
            signature(int32, CPointer(_type___nv_bfloat16), CPointer(float16)),
            (selfptr, *argptrs),
        )
        return builder.load(
            selfptr, align=getattr(_type___nv_bfloat16, "alignof_", None)
        )


_lower___nv_bfloat16_float16(shim_stream, shim_obj)


def _lower___nv_bfloat16_float32(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16____nv_bfloat16_4(int &ignore, __nv_bfloat16 *self , float* f) {
        new (self) __nv_bfloat16(*f);
        return 0;
    }
        """

    _ctor_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16____nv_bfloat16_4",
        int32(CPointer(_type___nv_bfloat16), CPointer(float32)),
    )

    def __nv_bfloat16_device_caller(arg_0, arg_1):
        return _ctor_decl___nv_bfloat16(arg_0, arg_1)

    @lower(__nv_bfloat16, float32)
    def ctor_impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16____nv_bfloat16_4", shim_raw_str
        )
        selfptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        argptrs = [
            builder.alloca(context.get_value_type(arg)) for arg in sig.args
        ]
        for ptr, ty, arg in zip(argptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        context.compile_internal(
            builder,
            __nv_bfloat16_device_caller,
            signature(int32, CPointer(_type___nv_bfloat16), CPointer(float32)),
            (selfptr, *argptrs),
        )
        return builder.load(
            selfptr, align=getattr(_type___nv_bfloat16, "alignof_", None)
        )


_lower___nv_bfloat16_float32(shim_stream, shim_obj)


def _lower___nv_bfloat16_float64(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16____nv_bfloat16_5(int &ignore, __nv_bfloat16 *self , double* f) {
        new (self) __nv_bfloat16(*f);
        return 0;
    }
        """

    _ctor_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16____nv_bfloat16_5",
        int32(CPointer(_type___nv_bfloat16), CPointer(float64)),
    )

    def __nv_bfloat16_device_caller(arg_0, arg_1):
        return _ctor_decl___nv_bfloat16(arg_0, arg_1)

    @lower(__nv_bfloat16, float64)
    def ctor_impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16____nv_bfloat16_5", shim_raw_str
        )
        selfptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        argptrs = [
            builder.alloca(context.get_value_type(arg)) for arg in sig.args
        ]
        for ptr, ty, arg in zip(argptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        context.compile_internal(
            builder,
            __nv_bfloat16_device_caller,
            signature(int32, CPointer(_type___nv_bfloat16), CPointer(float64)),
            (selfptr, *argptrs),
        )
        return builder.load(
            selfptr, align=getattr(_type___nv_bfloat16, "alignof_", None)
        )


_lower___nv_bfloat16_float64(shim_stream, shim_obj)


def _lower___nv_bfloat16_int16(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16____nv_bfloat16_6(int &ignore, __nv_bfloat16 *self , short* val) {
        new (self) __nv_bfloat16(*val);
        return 0;
    }
        """

    _ctor_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16____nv_bfloat16_6",
        int32(CPointer(_type___nv_bfloat16), CPointer(int16)),
    )

    def __nv_bfloat16_device_caller(arg_0, arg_1):
        return _ctor_decl___nv_bfloat16(arg_0, arg_1)

    @lower(__nv_bfloat16, int16)
    def ctor_impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16____nv_bfloat16_6", shim_raw_str
        )
        selfptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        argptrs = [
            builder.alloca(context.get_value_type(arg)) for arg in sig.args
        ]
        for ptr, ty, arg in zip(argptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        context.compile_internal(
            builder,
            __nv_bfloat16_device_caller,
            signature(int32, CPointer(_type___nv_bfloat16), CPointer(int16)),
            (selfptr, *argptrs),
        )
        return builder.load(
            selfptr, align=getattr(_type___nv_bfloat16, "alignof_", None)
        )


_lower___nv_bfloat16_int16(shim_stream, shim_obj)


def _lower___nv_bfloat16_uint16(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16____nv_bfloat16_7(int &ignore, __nv_bfloat16 *self , unsigned short* val) {
        new (self) __nv_bfloat16(*val);
        return 0;
    }
        """

    _ctor_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16____nv_bfloat16_7",
        int32(CPointer(_type___nv_bfloat16), CPointer(uint16)),
    )

    def __nv_bfloat16_device_caller(arg_0, arg_1):
        return _ctor_decl___nv_bfloat16(arg_0, arg_1)

    @lower(__nv_bfloat16, uint16)
    def ctor_impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16____nv_bfloat16_7", shim_raw_str
        )
        selfptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        argptrs = [
            builder.alloca(context.get_value_type(arg)) for arg in sig.args
        ]
        for ptr, ty, arg in zip(argptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        context.compile_internal(
            builder,
            __nv_bfloat16_device_caller,
            signature(int32, CPointer(_type___nv_bfloat16), CPointer(uint16)),
            (selfptr, *argptrs),
        )
        return builder.load(
            selfptr, align=getattr(_type___nv_bfloat16, "alignof_", None)
        )


_lower___nv_bfloat16_uint16(shim_stream, shim_obj)


def _lower___nv_bfloat16_int32(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16____nv_bfloat16_8(int &ignore, __nv_bfloat16 *self , int* val) {
        new (self) __nv_bfloat16(*val);
        return 0;
    }
        """

    _ctor_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16____nv_bfloat16_8",
        int32(CPointer(_type___nv_bfloat16), CPointer(int32)),
    )

    def __nv_bfloat16_device_caller(arg_0, arg_1):
        return _ctor_decl___nv_bfloat16(arg_0, arg_1)

    @lower(__nv_bfloat16, int32)
    def ctor_impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16____nv_bfloat16_8", shim_raw_str
        )
        selfptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        argptrs = [
            builder.alloca(context.get_value_type(arg)) for arg in sig.args
        ]
        for ptr, ty, arg in zip(argptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        context.compile_internal(
            builder,
            __nv_bfloat16_device_caller,
            signature(int32, CPointer(_type___nv_bfloat16), CPointer(int32)),
            (selfptr, *argptrs),
        )
        return builder.load(
            selfptr, align=getattr(_type___nv_bfloat16, "alignof_", None)
        )


_lower___nv_bfloat16_int32(shim_stream, shim_obj)


def _lower___nv_bfloat16_uint32(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16____nv_bfloat16_9(int &ignore, __nv_bfloat16 *self , unsigned int* val) {
        new (self) __nv_bfloat16(*val);
        return 0;
    }
        """

    _ctor_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16____nv_bfloat16_9",
        int32(CPointer(_type___nv_bfloat16), CPointer(uint32)),
    )

    def __nv_bfloat16_device_caller(arg_0, arg_1):
        return _ctor_decl___nv_bfloat16(arg_0, arg_1)

    @lower(__nv_bfloat16, uint32)
    def ctor_impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16____nv_bfloat16_9", shim_raw_str
        )
        selfptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        argptrs = [
            builder.alloca(context.get_value_type(arg)) for arg in sig.args
        ]
        for ptr, ty, arg in zip(argptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        context.compile_internal(
            builder,
            __nv_bfloat16_device_caller,
            signature(int32, CPointer(_type___nv_bfloat16), CPointer(uint32)),
            (selfptr, *argptrs),
        )
        return builder.load(
            selfptr, align=getattr(_type___nv_bfloat16, "alignof_", None)
        )


_lower___nv_bfloat16_uint32(shim_stream, shim_obj)


def _lower___nv_bfloat16_int64(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16____nv_bfloat16_10(int &ignore, __nv_bfloat16 *self , long* val) {
        new (self) __nv_bfloat16(*val);
        return 0;
    }
        """

    _ctor_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16____nv_bfloat16_10",
        int32(CPointer(_type___nv_bfloat16), CPointer(int64)),
    )

    def __nv_bfloat16_device_caller(arg_0, arg_1):
        return _ctor_decl___nv_bfloat16(arg_0, arg_1)

    @lower(__nv_bfloat16, int64)
    def ctor_impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16____nv_bfloat16_10", shim_raw_str
        )
        selfptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        argptrs = [
            builder.alloca(context.get_value_type(arg)) for arg in sig.args
        ]
        for ptr, ty, arg in zip(argptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        context.compile_internal(
            builder,
            __nv_bfloat16_device_caller,
            signature(int32, CPointer(_type___nv_bfloat16), CPointer(int64)),
            (selfptr, *argptrs),
        )
        return builder.load(
            selfptr, align=getattr(_type___nv_bfloat16, "alignof_", None)
        )


_lower___nv_bfloat16_int64(shim_stream, shim_obj)


def _lower___nv_bfloat16_uint64(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16____nv_bfloat16_11(int &ignore, __nv_bfloat16 *self , unsigned long* val) {
        new (self) __nv_bfloat16(*val);
        return 0;
    }
        """

    _ctor_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16____nv_bfloat16_11",
        int32(CPointer(_type___nv_bfloat16), CPointer(uint64)),
    )

    def __nv_bfloat16_device_caller(arg_0, arg_1):
        return _ctor_decl___nv_bfloat16(arg_0, arg_1)

    @lower(__nv_bfloat16, uint64)
    def ctor_impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16____nv_bfloat16_11", shim_raw_str
        )
        selfptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        argptrs = [
            builder.alloca(context.get_value_type(arg)) for arg in sig.args
        ]
        for ptr, ty, arg in zip(argptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        context.compile_internal(
            builder,
            __nv_bfloat16_device_caller,
            signature(int32, CPointer(_type___nv_bfloat16), CPointer(uint64)),
            (selfptr, *argptrs),
        )
        return builder.load(
            selfptr, align=getattr(_type___nv_bfloat16, "alignof_", None)
        )


_lower___nv_bfloat16_uint64(shim_stream, shim_obj)


def _lower___nv_bfloat16_int64(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16____nv_bfloat16_12(int &ignore, __nv_bfloat16 *self , long long* val) {
        new (self) __nv_bfloat16(*val);
        return 0;
    }
        """

    _ctor_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16____nv_bfloat16_12",
        int32(CPointer(_type___nv_bfloat16), CPointer(int64)),
    )

    def __nv_bfloat16_device_caller(arg_0, arg_1):
        return _ctor_decl___nv_bfloat16(arg_0, arg_1)

    @lower(__nv_bfloat16, int64)
    def ctor_impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16____nv_bfloat16_12", shim_raw_str
        )
        selfptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        argptrs = [
            builder.alloca(context.get_value_type(arg)) for arg in sig.args
        ]
        for ptr, ty, arg in zip(argptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        context.compile_internal(
            builder,
            __nv_bfloat16_device_caller,
            signature(int32, CPointer(_type___nv_bfloat16), CPointer(int64)),
            (selfptr, *argptrs),
        )
        return builder.load(
            selfptr, align=getattr(_type___nv_bfloat16, "alignof_", None)
        )


_lower___nv_bfloat16_int64(shim_stream, shim_obj)


def _lower___nv_bfloat16_uint64(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16____nv_bfloat16_13(int &ignore, __nv_bfloat16 *self , unsigned long long* val) {
        new (self) __nv_bfloat16(*val);
        return 0;
    }
        """

    _ctor_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16____nv_bfloat16_13",
        int32(CPointer(_type___nv_bfloat16), CPointer(uint64)),
    )

    def __nv_bfloat16_device_caller(arg_0, arg_1):
        return _ctor_decl___nv_bfloat16(arg_0, arg_1)

    @lower(__nv_bfloat16, uint64)
    def ctor_impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16____nv_bfloat16_13", shim_raw_str
        )
        selfptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        argptrs = [
            builder.alloca(context.get_value_type(arg)) for arg in sig.args
        ]
        for ptr, ty, arg in zip(argptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        context.compile_internal(
            builder,
            __nv_bfloat16_device_caller,
            signature(int32, CPointer(_type___nv_bfloat16), CPointer(uint64)),
            (selfptr, *argptrs),
        )
        return builder.load(
            selfptr, align=getattr(_type___nv_bfloat16, "alignof_", None)
        )


_lower___nv_bfloat16_uint64(shim_stream, shim_obj)


@register
class _ctor_template___nv_bfloat16(ConcreteTemplate):
    key = globals()["__nv_bfloat16"]
    cases = [
        signature(
            _type___nv_bfloat16,
        ),
        signature(_type___nv_bfloat16, _type_unnamed1401637),
        signature(_type___nv_bfloat16, float16),
        signature(_type___nv_bfloat16, float32),
        signature(_type___nv_bfloat16, float64),
        signature(_type___nv_bfloat16, int16),
        signature(_type___nv_bfloat16, uint16),
        signature(_type___nv_bfloat16, int32),
        signature(_type___nv_bfloat16, uint32),
        signature(_type___nv_bfloat16, int64),
        signature(_type___nv_bfloat16, uint64),
        signature(_type___nv_bfloat16, int64),
        signature(_type___nv_bfloat16, uint64),
    ]


register_global(__nv_bfloat16, Function(_ctor_template___nv_bfloat16))


def _from___nv_bfloat16_to__type_unnamed1401637_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16_operator___nv_bfloat16_raw_1(__nv_bfloat16_raw &retval, __nv_bfloat16 *self) {
        retval = self->operator __nv_bfloat16_raw();
        return 0;
    }
        """

    _op_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16_operator___nv_bfloat16_raw_1",
        _type_unnamed1401637(
            CPointer(_type___nv_bfloat16),
        ),
    )

    def _conversion_op_caller___nv_bfloat16(arg):
        return _op_decl___nv_bfloat16(arg)

    @lower_cast(_type___nv_bfloat16, _type_unnamed1401637)
    def impl(context, builder, fromty, toty, value):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16_operator___nv_bfloat16_raw_1", shim_raw_str
        )
        ptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        builder.store(
            value, ptr, align=getattr(_type___nv_bfloat16, "align", None)
        )

        return context.compile_internal(
            builder,
            _conversion_op_caller___nv_bfloat16,
            signature(
                _type_unnamed1401637,
                CPointer(_type___nv_bfloat16),
            ),
            (ptr,),
        )


_from___nv_bfloat16_to__type_unnamed1401637_lower(shim_stream, shim_obj)


def _from___nv_bfloat16_to__type_unnamed1401637_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16_operator___nv_bfloat16_raw_2(__nv_bfloat16_raw &retval, __nv_bfloat16 *self) {
        retval = self->operator __nv_bfloat16_raw();
        return 0;
    }
        """

    _op_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16_operator___nv_bfloat16_raw_2",
        _type_unnamed1401637(
            CPointer(_type___nv_bfloat16),
        ),
    )

    def _conversion_op_caller___nv_bfloat16(arg):
        return _op_decl___nv_bfloat16(arg)

    @lower_cast(_type___nv_bfloat16, _type_unnamed1401637)
    def impl(context, builder, fromty, toty, value):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16_operator___nv_bfloat16_raw_2", shim_raw_str
        )
        ptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        builder.store(
            value, ptr, align=getattr(_type___nv_bfloat16, "align", None)
        )

        return context.compile_internal(
            builder,
            _conversion_op_caller___nv_bfloat16,
            signature(
                _type_unnamed1401637,
                CPointer(_type___nv_bfloat16),
            ),
            (ptr,),
        )


_from___nv_bfloat16_to__type_unnamed1401637_lower(shim_stream, shim_obj)


def _from___nv_bfloat16_to_float32_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16_operator_float_1(float &retval, __nv_bfloat16 *self) {
        retval = self->operator float();
        return 0;
    }
        """

    _op_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16_operator_float_1",
        float32(
            CPointer(_type___nv_bfloat16),
        ),
    )

    def _conversion_op_caller___nv_bfloat16(arg):
        return _op_decl___nv_bfloat16(arg)

    @lower_cast(_type___nv_bfloat16, float32)
    def impl(context, builder, fromty, toty, value):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16_operator_float_1", shim_raw_str
        )
        ptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        builder.store(
            value, ptr, align=getattr(_type___nv_bfloat16, "align", None)
        )

        return context.compile_internal(
            builder,
            _conversion_op_caller___nv_bfloat16,
            signature(
                float32,
                CPointer(_type___nv_bfloat16),
            ),
            (ptr,),
        )


_from___nv_bfloat16_to_float32_lower(shim_stream, shim_obj)


def _from___nv_bfloat16_to_int8_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16_operator_signed_char_1(signed char &retval, __nv_bfloat16 *self) {
        retval = self->operator signed char();
        return 0;
    }
        """

    _op_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16_operator_signed_char_1",
        int8(
            CPointer(_type___nv_bfloat16),
        ),
    )

    def _conversion_op_caller___nv_bfloat16(arg):
        return _op_decl___nv_bfloat16(arg)

    @lower_cast(_type___nv_bfloat16, int8)
    def impl(context, builder, fromty, toty, value):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16_operator_signed_char_1", shim_raw_str
        )
        ptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        builder.store(
            value, ptr, align=getattr(_type___nv_bfloat16, "align", None)
        )

        return context.compile_internal(
            builder,
            _conversion_op_caller___nv_bfloat16,
            signature(
                int8,
                CPointer(_type___nv_bfloat16),
            ),
            (ptr,),
        )


_from___nv_bfloat16_to_int8_lower(shim_stream, shim_obj)


def _from___nv_bfloat16_to_uint8_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16_operator_unsigned_char_1(unsigned char &retval, __nv_bfloat16 *self) {
        retval = self->operator unsigned char();
        return 0;
    }
        """

    _op_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16_operator_unsigned_char_1",
        uint8(
            CPointer(_type___nv_bfloat16),
        ),
    )

    def _conversion_op_caller___nv_bfloat16(arg):
        return _op_decl___nv_bfloat16(arg)

    @lower_cast(_type___nv_bfloat16, uint8)
    def impl(context, builder, fromty, toty, value):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16_operator_unsigned_char_1", shim_raw_str
        )
        ptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        builder.store(
            value, ptr, align=getattr(_type___nv_bfloat16, "align", None)
        )

        return context.compile_internal(
            builder,
            _conversion_op_caller___nv_bfloat16,
            signature(
                uint8,
                CPointer(_type___nv_bfloat16),
            ),
            (ptr,),
        )


_from___nv_bfloat16_to_uint8_lower(shim_stream, shim_obj)


def _from___nv_bfloat16_to_int8_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16_operator_char_1(char &retval, __nv_bfloat16 *self) {
        retval = self->operator char();
        return 0;
    }
        """

    _op_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16_operator_char_1",
        int8(
            CPointer(_type___nv_bfloat16),
        ),
    )

    def _conversion_op_caller___nv_bfloat16(arg):
        return _op_decl___nv_bfloat16(arg)

    @lower_cast(_type___nv_bfloat16, int8)
    def impl(context, builder, fromty, toty, value):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16_operator_char_1", shim_raw_str
        )
        ptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        builder.store(
            value, ptr, align=getattr(_type___nv_bfloat16, "align", None)
        )

        return context.compile_internal(
            builder,
            _conversion_op_caller___nv_bfloat16,
            signature(
                int8,
                CPointer(_type___nv_bfloat16),
            ),
            (ptr,),
        )


_from___nv_bfloat16_to_int8_lower(shim_stream, shim_obj)


def _from___nv_bfloat16_to_int16_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16_operator_short_1(short &retval, __nv_bfloat16 *self) {
        retval = self->operator short();
        return 0;
    }
        """

    _op_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16_operator_short_1",
        int16(
            CPointer(_type___nv_bfloat16),
        ),
    )

    def _conversion_op_caller___nv_bfloat16(arg):
        return _op_decl___nv_bfloat16(arg)

    @lower_cast(_type___nv_bfloat16, int16)
    def impl(context, builder, fromty, toty, value):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16_operator_short_1", shim_raw_str
        )
        ptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        builder.store(
            value, ptr, align=getattr(_type___nv_bfloat16, "align", None)
        )

        return context.compile_internal(
            builder,
            _conversion_op_caller___nv_bfloat16,
            signature(
                int16,
                CPointer(_type___nv_bfloat16),
            ),
            (ptr,),
        )


_from___nv_bfloat16_to_int16_lower(shim_stream, shim_obj)


def _from___nv_bfloat16_to_uint16_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16_operator_unsigned_short_1(unsigned short &retval, __nv_bfloat16 *self) {
        retval = self->operator unsigned short();
        return 0;
    }
        """

    _op_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16_operator_unsigned_short_1",
        uint16(
            CPointer(_type___nv_bfloat16),
        ),
    )

    def _conversion_op_caller___nv_bfloat16(arg):
        return _op_decl___nv_bfloat16(arg)

    @lower_cast(_type___nv_bfloat16, uint16)
    def impl(context, builder, fromty, toty, value):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16_operator_unsigned_short_1", shim_raw_str
        )
        ptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        builder.store(
            value, ptr, align=getattr(_type___nv_bfloat16, "align", None)
        )

        return context.compile_internal(
            builder,
            _conversion_op_caller___nv_bfloat16,
            signature(
                uint16,
                CPointer(_type___nv_bfloat16),
            ),
            (ptr,),
        )


_from___nv_bfloat16_to_uint16_lower(shim_stream, shim_obj)


def _from___nv_bfloat16_to_int32_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16_operator_int_1(int &retval, __nv_bfloat16 *self) {
        retval = self->operator int();
        return 0;
    }
        """

    _op_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16_operator_int_1",
        int32(
            CPointer(_type___nv_bfloat16),
        ),
    )

    def _conversion_op_caller___nv_bfloat16(arg):
        return _op_decl___nv_bfloat16(arg)

    @lower_cast(_type___nv_bfloat16, int32)
    def impl(context, builder, fromty, toty, value):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16_operator_int_1", shim_raw_str
        )
        ptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        builder.store(
            value, ptr, align=getattr(_type___nv_bfloat16, "align", None)
        )

        return context.compile_internal(
            builder,
            _conversion_op_caller___nv_bfloat16,
            signature(
                int32,
                CPointer(_type___nv_bfloat16),
            ),
            (ptr,),
        )


_from___nv_bfloat16_to_int32_lower(shim_stream, shim_obj)


def _from___nv_bfloat16_to_uint32_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16_operator_unsigned_int_1(unsigned int &retval, __nv_bfloat16 *self) {
        retval = self->operator unsigned int();
        return 0;
    }
        """

    _op_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16_operator_unsigned_int_1",
        uint32(
            CPointer(_type___nv_bfloat16),
        ),
    )

    def _conversion_op_caller___nv_bfloat16(arg):
        return _op_decl___nv_bfloat16(arg)

    @lower_cast(_type___nv_bfloat16, uint32)
    def impl(context, builder, fromty, toty, value):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16_operator_unsigned_int_1", shim_raw_str
        )
        ptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        builder.store(
            value, ptr, align=getattr(_type___nv_bfloat16, "align", None)
        )

        return context.compile_internal(
            builder,
            _conversion_op_caller___nv_bfloat16,
            signature(
                uint32,
                CPointer(_type___nv_bfloat16),
            ),
            (ptr,),
        )


_from___nv_bfloat16_to_uint32_lower(shim_stream, shim_obj)


def _from___nv_bfloat16_to_int64_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16_operator_long_1(long &retval, __nv_bfloat16 *self) {
        retval = self->operator long();
        return 0;
    }
        """

    _op_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16_operator_long_1",
        int64(
            CPointer(_type___nv_bfloat16),
        ),
    )

    def _conversion_op_caller___nv_bfloat16(arg):
        return _op_decl___nv_bfloat16(arg)

    @lower_cast(_type___nv_bfloat16, int64)
    def impl(context, builder, fromty, toty, value):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16_operator_long_1", shim_raw_str
        )
        ptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        builder.store(
            value, ptr, align=getattr(_type___nv_bfloat16, "align", None)
        )

        return context.compile_internal(
            builder,
            _conversion_op_caller___nv_bfloat16,
            signature(
                int64,
                CPointer(_type___nv_bfloat16),
            ),
            (ptr,),
        )


_from___nv_bfloat16_to_int64_lower(shim_stream, shim_obj)


def _from___nv_bfloat16_to_uint64_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16_operator_unsigned_long_1(unsigned long &retval, __nv_bfloat16 *self) {
        retval = self->operator unsigned long();
        return 0;
    }
        """

    _op_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16_operator_unsigned_long_1",
        uint64(
            CPointer(_type___nv_bfloat16),
        ),
    )

    def _conversion_op_caller___nv_bfloat16(arg):
        return _op_decl___nv_bfloat16(arg)

    @lower_cast(_type___nv_bfloat16, uint64)
    def impl(context, builder, fromty, toty, value):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16_operator_unsigned_long_1", shim_raw_str
        )
        ptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        builder.store(
            value, ptr, align=getattr(_type___nv_bfloat16, "align", None)
        )

        return context.compile_internal(
            builder,
            _conversion_op_caller___nv_bfloat16,
            signature(
                uint64,
                CPointer(_type___nv_bfloat16),
            ),
            (ptr,),
        )


_from___nv_bfloat16_to_uint64_lower(shim_stream, shim_obj)


def _from___nv_bfloat16_to_int64_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16_operator_long_long_1(long long &retval, __nv_bfloat16 *self) {
        retval = self->operator long long();
        return 0;
    }
        """

    _op_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16_operator_long_long_1",
        int64(
            CPointer(_type___nv_bfloat16),
        ),
    )

    def _conversion_op_caller___nv_bfloat16(arg):
        return _op_decl___nv_bfloat16(arg)

    @lower_cast(_type___nv_bfloat16, int64)
    def impl(context, builder, fromty, toty, value):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16_operator_long_long_1", shim_raw_str
        )
        ptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        builder.store(
            value, ptr, align=getattr(_type___nv_bfloat16, "align", None)
        )

        return context.compile_internal(
            builder,
            _conversion_op_caller___nv_bfloat16,
            signature(
                int64,
                CPointer(_type___nv_bfloat16),
            ),
            (ptr,),
        )


_from___nv_bfloat16_to_int64_lower(shim_stream, shim_obj)


def _from___nv_bfloat16_to_uint64_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16_operator_unsigned_long_long_1(unsigned long long &retval, __nv_bfloat16 *self) {
        retval = self->operator unsigned long long();
        return 0;
    }
        """

    _op_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16_operator_unsigned_long_long_1",
        uint64(
            CPointer(_type___nv_bfloat16),
        ),
    )

    def _conversion_op_caller___nv_bfloat16(arg):
        return _op_decl___nv_bfloat16(arg)

    @lower_cast(_type___nv_bfloat16, uint64)
    def impl(context, builder, fromty, toty, value):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16_operator_unsigned_long_long_1", shim_raw_str
        )
        ptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        builder.store(
            value, ptr, align=getattr(_type___nv_bfloat16, "align", None)
        )

        return context.compile_internal(
            builder,
            _conversion_op_caller___nv_bfloat16,
            signature(
                uint64,
                CPointer(_type___nv_bfloat16),
            ),
            (ptr,),
        )


_from___nv_bfloat16_to_uint64_lower(shim_stream, shim_obj)


def _from___nv_bfloat16_to_bool__lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat16_operator_bool_1(bool &retval, __nv_bfloat16 *self) {
        retval = self->operator bool();
        return 0;
    }
        """

    _op_decl___nv_bfloat16 = declare_device(
        "____nv_bfloat16_operator_bool_1",
        bool_(
            CPointer(_type___nv_bfloat16),
        ),
    )

    def _conversion_op_caller___nv_bfloat16(arg):
        return _op_decl___nv_bfloat16(arg)

    @lower_cast(_type___nv_bfloat16, bool_)
    def impl(context, builder, fromty, toty, value):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat16_operator_bool_1", shim_raw_str
        )
        ptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat16), name="selfptr"
        )
        builder.store(
            value, ptr, align=getattr(_type___nv_bfloat16, "align", None)
        )

        return context.compile_internal(
            builder,
            _conversion_op_caller___nv_bfloat16,
            signature(
                bool_,
                CPointer(_type___nv_bfloat16),
            ),
            (ptr,),
        )


_from___nv_bfloat16_to_bool__lower(shim_stream, shim_obj)


# Typing for __nv_bfloat162
class _type_class___nv_bfloat162(Type):
    def __init__(self):
        super().__init__(name="__nv_bfloat162")
        self.alignof_ = 4
        self.bitwidth = 4 * 8


_type___nv_bfloat162 = _type_class___nv_bfloat162()


# Make Python API for struct
__nv_bfloat162 = type("__nv_bfloat162", (), {"_nbtype": _type___nv_bfloat162})

as_numba_type.register(__nv_bfloat162, _type___nv_bfloat162)


@register_model(_type_class___nv_bfloat162)
class _model___nv_bfloat162(StructModel):
    def __init__(self, dmm, fe_type):
        members = [("x", _type___nv_bfloat16), ("y", _type___nv_bfloat16)]
        super().__init__(dmm, fe_type, members)


@register_attr
class _attr_typing___nv_bfloat162(AttributeTemplate):
    key = globals()["__nv_bfloat162"]

    def resolve_x(self, obj):
        return _type___nv_bfloat16

    def resolve_y(self, obj):
        return _type___nv_bfloat16


make_attribute_wrapper(_type_class___nv_bfloat162, "x", "x")


make_attribute_wrapper(_type_class___nv_bfloat162, "y", "y")


def _lower___nv_bfloat162_void(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat162____nv_bfloat162_1(int &ignore, __nv_bfloat162 *self ) {
        new (self) __nv_bfloat162();
        return 0;
    }
        """

    _ctor_decl___nv_bfloat162 = declare_device(
        "____nv_bfloat162____nv_bfloat162_1",
        int32(
            CPointer(_type___nv_bfloat162),
        ),
    )

    def __nv_bfloat162_device_caller(arg_0):
        return _ctor_decl___nv_bfloat162(arg_0)

    @lower(
        __nv_bfloat162,
    )
    def ctor_impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat162____nv_bfloat162_1", shim_raw_str
        )
        selfptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat162), name="selfptr"
        )
        argptrs = [
            builder.alloca(context.get_value_type(arg)) for arg in sig.args
        ]
        for ptr, ty, arg in zip(argptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        context.compile_internal(
            builder,
            __nv_bfloat162_device_caller,
            signature(
                int32,
                CPointer(_type___nv_bfloat162),
            ),
            (selfptr, *argptrs),
        )
        return builder.load(
            selfptr, align=getattr(_type___nv_bfloat162, "alignof_", None)
        )


_lower___nv_bfloat162_void(shim_stream, shim_obj)


def _lower___nv_bfloat162__type___nv_bfloat162(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat162____nv_bfloat162_2(int &ignore, __nv_bfloat162 *self , __nv_bfloat162* src) {
        new (self) __nv_bfloat162(*src);
        return 0;
    }
        """

    _ctor_decl___nv_bfloat162 = declare_device(
        "____nv_bfloat162____nv_bfloat162_2",
        int32(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def __nv_bfloat162_device_caller(arg_0, arg_1):
        return _ctor_decl___nv_bfloat162(arg_0, arg_1)

    @lower(__nv_bfloat162, _type___nv_bfloat162)
    def ctor_impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat162____nv_bfloat162_2", shim_raw_str
        )
        selfptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat162), name="selfptr"
        )
        argptrs = [
            builder.alloca(context.get_value_type(arg)) for arg in sig.args
        ]
        for ptr, ty, arg in zip(argptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        context.compile_internal(
            builder,
            __nv_bfloat162_device_caller,
            signature(
                int32,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            (selfptr, *argptrs),
        )
        return builder.load(
            selfptr, align=getattr(_type___nv_bfloat162, "alignof_", None)
        )


_lower___nv_bfloat162__type___nv_bfloat162(shim_stream, shim_obj)


def _lower___nv_bfloat162__type___nv_bfloat16__type___nv_bfloat16(
    shim_stream, shim_obj
):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat162____nv_bfloat162_3(int &ignore, __nv_bfloat162 *self , __nv_bfloat16* a, __nv_bfloat16* b) {
        new (self) __nv_bfloat162(*a, *b);
        return 0;
    }
        """

    _ctor_decl___nv_bfloat162 = declare_device(
        "____nv_bfloat162____nv_bfloat162_3",
        int32(
            CPointer(_type___nv_bfloat162),
            CPointer(_type___nv_bfloat16),
            CPointer(_type___nv_bfloat16),
        ),
    )

    def __nv_bfloat162_device_caller(arg_0, arg_1, arg_2):
        return _ctor_decl___nv_bfloat162(arg_0, arg_1, arg_2)

    @lower(__nv_bfloat162, _type___nv_bfloat16, _type___nv_bfloat16)
    def ctor_impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat162____nv_bfloat162_3", shim_raw_str
        )
        selfptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat162), name="selfptr"
        )
        argptrs = [
            builder.alloca(context.get_value_type(arg)) for arg in sig.args
        ]
        for ptr, ty, arg in zip(argptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        context.compile_internal(
            builder,
            __nv_bfloat162_device_caller,
            signature(
                int32,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            (selfptr, *argptrs),
        )
        return builder.load(
            selfptr, align=getattr(_type___nv_bfloat162, "alignof_", None)
        )


_lower___nv_bfloat162__type___nv_bfloat16__type___nv_bfloat16(
    shim_stream, shim_obj
)


def _lower___nv_bfloat162__type___nv_bfloat162(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat162____nv_bfloat162_4(int &ignore, __nv_bfloat162 *self , __nv_bfloat162* src) {
        new (self) __nv_bfloat162(*src);
        return 0;
    }
        """

    _ctor_decl___nv_bfloat162 = declare_device(
        "____nv_bfloat162____nv_bfloat162_4",
        int32(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def __nv_bfloat162_device_caller(arg_0, arg_1):
        return _ctor_decl___nv_bfloat162(arg_0, arg_1)

    @lower(__nv_bfloat162, _type___nv_bfloat162)
    def ctor_impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat162____nv_bfloat162_4", shim_raw_str
        )
        selfptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat162), name="selfptr"
        )
        argptrs = [
            builder.alloca(context.get_value_type(arg)) for arg in sig.args
        ]
        for ptr, ty, arg in zip(argptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        context.compile_internal(
            builder,
            __nv_bfloat162_device_caller,
            signature(
                int32,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            (selfptr, *argptrs),
        )
        return builder.load(
            selfptr, align=getattr(_type___nv_bfloat162, "alignof_", None)
        )


_lower___nv_bfloat162__type___nv_bfloat162(shim_stream, shim_obj)


def _lower___nv_bfloat162__type_unnamed1401746(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat162____nv_bfloat162_5(int &ignore, __nv_bfloat162 *self , __nv_bfloat162_raw* h2r) {
        new (self) __nv_bfloat162(*h2r);
        return 0;
    }
        """

    _ctor_decl___nv_bfloat162 = declare_device(
        "____nv_bfloat162____nv_bfloat162_5",
        int32(CPointer(_type___nv_bfloat162), CPointer(_type_unnamed1401746)),
    )

    def __nv_bfloat162_device_caller(arg_0, arg_1):
        return _ctor_decl___nv_bfloat162(arg_0, arg_1)

    @lower(__nv_bfloat162, _type_unnamed1401746)
    def ctor_impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat162____nv_bfloat162_5", shim_raw_str
        )
        selfptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat162), name="selfptr"
        )
        argptrs = [
            builder.alloca(context.get_value_type(arg)) for arg in sig.args
        ]
        for ptr, ty, arg in zip(argptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        context.compile_internal(
            builder,
            __nv_bfloat162_device_caller,
            signature(
                int32,
                CPointer(_type___nv_bfloat162),
                CPointer(_type_unnamed1401746),
            ),
            (selfptr, *argptrs),
        )
        return builder.load(
            selfptr, align=getattr(_type___nv_bfloat162, "alignof_", None)
        )


_lower___nv_bfloat162__type_unnamed1401746(shim_stream, shim_obj)


@register
class _ctor_template___nv_bfloat162(ConcreteTemplate):
    key = globals()["__nv_bfloat162"]
    cases = [
        signature(
            _type___nv_bfloat162,
        ),
        signature(_type___nv_bfloat162, _type___nv_bfloat162),
        signature(
            _type___nv_bfloat162, _type___nv_bfloat16, _type___nv_bfloat16
        ),
        signature(_type___nv_bfloat162, _type___nv_bfloat162),
        signature(_type___nv_bfloat162, _type_unnamed1401746),
    ]


register_global(__nv_bfloat162, Function(_ctor_template___nv_bfloat162))


def _from___nv_bfloat162_to__type_unnamed1401746_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    ____nv_bfloat162_operator___nv_bfloat162_raw_1(__nv_bfloat162_raw &retval, __nv_bfloat162 *self) {
        retval = self->operator __nv_bfloat162_raw();
        return 0;
    }
        """

    _op_decl___nv_bfloat162 = declare_device(
        "____nv_bfloat162_operator___nv_bfloat162_raw_1",
        _type_unnamed1401746(
            CPointer(_type___nv_bfloat162),
        ),
    )

    def _conversion_op_caller___nv_bfloat162(arg):
        return _op_decl___nv_bfloat162(arg)

    @lower_cast(_type___nv_bfloat162, _type_unnamed1401746)
    def impl(context, builder, fromty, toty, value):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key(
            "____nv_bfloat162_operator___nv_bfloat162_raw_1", shim_raw_str
        )
        ptr = builder.alloca(
            context.get_value_type(_type___nv_bfloat162), name="selfptr"
        )
        builder.store(
            value, ptr, align=getattr(_type___nv_bfloat162, "align", None)
        )

        return context.compile_internal(
            builder,
            _conversion_op_caller___nv_bfloat162,
            signature(
                _type_unnamed1401746,
                CPointer(_type___nv_bfloat162),
            ),
            (ptr,),
        )


_from___nv_bfloat162_to__type_unnamed1401746_lower(shim_stream, shim_obj)


# Functions:


def make_bfloat162():
    pass


def _make_bfloat162_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    make_bfloat162_1(__nv_bfloat162 &retval , __nv_bfloat16* x, __nv_bfloat16* y) {
        retval = make_bfloat162(*x, *y);
        return 0;
    }
        """

    make_bfloat162_1 = declare_device(
        "make_bfloat162_1",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)
        ),
    )

    def make_bfloat162_1_caller(arg_0, arg_1):
        return make_bfloat162_1(arg_0, arg_1)

    @lower(make_bfloat162, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("make_bfloat162_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            make_bfloat162_1_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_make_bfloat162_1_lower(shim_stream, shim_obj)


def htrunc():
    pass


def _htrunc_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    htrunc_1(__nv_bfloat16 &retval , __nv_bfloat16* h) {
        retval = htrunc(*h);
        return 0;
    }
        """

    htrunc_1 = declare_device(
        "htrunc_1", _type___nv_bfloat16(CPointer(_type___nv_bfloat16))
    )

    def htrunc_1_caller(arg_0):
        return htrunc_1(arg_0)

    @lower(htrunc, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("htrunc_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            htrunc_1_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_htrunc_1_lower(shim_stream, shim_obj)


def hceil():
    pass


def _hceil_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    hceil_1(__nv_bfloat16 &retval , __nv_bfloat16* h) {
        retval = hceil(*h);
        return 0;
    }
        """

    hceil_1 = declare_device(
        "hceil_1", _type___nv_bfloat16(CPointer(_type___nv_bfloat16))
    )

    def hceil_1_caller(arg_0):
        return hceil_1(arg_0)

    @lower(hceil, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("hceil_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            hceil_1_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_hceil_1_lower(shim_stream, shim_obj)


def hfloor():
    pass


def _hfloor_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    hfloor_1(__nv_bfloat16 &retval , __nv_bfloat16* h) {
        retval = hfloor(*h);
        return 0;
    }
        """

    hfloor_1 = declare_device(
        "hfloor_1", _type___nv_bfloat16(CPointer(_type___nv_bfloat16))
    )

    def hfloor_1_caller(arg_0):
        return hfloor_1(arg_0)

    @lower(hfloor, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("hfloor_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            hfloor_1_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_hfloor_1_lower(shim_stream, shim_obj)


def hrint():
    pass


def _hrint_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    hrint_1(__nv_bfloat16 &retval , __nv_bfloat16* h) {
        retval = hrint(*h);
        return 0;
    }
        """

    hrint_1 = declare_device(
        "hrint_1", _type___nv_bfloat16(CPointer(_type___nv_bfloat16))
    )

    def hrint_1_caller(arg_0):
        return hrint_1(arg_0)

    @lower(hrint, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("hrint_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            hrint_1_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_hrint_1_lower(shim_stream, shim_obj)


def h2trunc():
    pass


def _h2trunc_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    h2trunc_1(__nv_bfloat162 &retval , __nv_bfloat162* h) {
        retval = h2trunc(*h);
        return 0;
    }
        """

    h2trunc_1 = declare_device(
        "h2trunc_1", _type___nv_bfloat162(CPointer(_type___nv_bfloat162))
    )

    def h2trunc_1_caller(arg_0):
        return h2trunc_1(arg_0)

    @lower(h2trunc, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("h2trunc_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            h2trunc_1_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_h2trunc_1_lower(shim_stream, shim_obj)


def h2ceil():
    pass


def _h2ceil_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    h2ceil_1(__nv_bfloat162 &retval , __nv_bfloat162* h) {
        retval = h2ceil(*h);
        return 0;
    }
        """

    h2ceil_1 = declare_device(
        "h2ceil_1", _type___nv_bfloat162(CPointer(_type___nv_bfloat162))
    )

    def h2ceil_1_caller(arg_0):
        return h2ceil_1(arg_0)

    @lower(h2ceil, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("h2ceil_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            h2ceil_1_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_h2ceil_1_lower(shim_stream, shim_obj)


def h2floor():
    pass


def _h2floor_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    h2floor_1(__nv_bfloat162 &retval , __nv_bfloat162* h) {
        retval = h2floor(*h);
        return 0;
    }
        """

    h2floor_1 = declare_device(
        "h2floor_1", _type___nv_bfloat162(CPointer(_type___nv_bfloat162))
    )

    def h2floor_1_caller(arg_0):
        return h2floor_1(arg_0)

    @lower(h2floor, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("h2floor_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            h2floor_1_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_h2floor_1_lower(shim_stream, shim_obj)


def h2rint():
    pass


def _h2rint_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    h2rint_1(__nv_bfloat162 &retval , __nv_bfloat162* h) {
        retval = h2rint(*h);
        return 0;
    }
        """

    h2rint_1 = declare_device(
        "h2rint_1", _type___nv_bfloat162(CPointer(_type___nv_bfloat162))
    )

    def h2rint_1_caller(arg_0):
        return h2rint_1(arg_0)

    @lower(h2rint, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("h2rint_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            h2rint_1_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_h2rint_1_lower(shim_stream, shim_obj)


def hsqrt():
    pass


def _hsqrt_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    hsqrt_1(__nv_bfloat16 &retval , __nv_bfloat16* a) {
        retval = hsqrt(*a);
        return 0;
    }
        """

    hsqrt_1 = declare_device(
        "hsqrt_1", _type___nv_bfloat16(CPointer(_type___nv_bfloat16))
    )

    def hsqrt_1_caller(arg_0):
        return hsqrt_1(arg_0)

    @lower(hsqrt, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("hsqrt_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            hsqrt_1_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_hsqrt_1_lower(shim_stream, shim_obj)


def hrsqrt():
    pass


def _hrsqrt_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    hrsqrt_1(__nv_bfloat16 &retval , __nv_bfloat16* a) {
        retval = hrsqrt(*a);
        return 0;
    }
        """

    hrsqrt_1 = declare_device(
        "hrsqrt_1", _type___nv_bfloat16(CPointer(_type___nv_bfloat16))
    )

    def hrsqrt_1_caller(arg_0):
        return hrsqrt_1(arg_0)

    @lower(hrsqrt, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("hrsqrt_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            hrsqrt_1_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_hrsqrt_1_lower(shim_stream, shim_obj)


def hrcp():
    pass


def _hrcp_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    hrcp_1(__nv_bfloat16 &retval , __nv_bfloat16* a) {
        retval = hrcp(*a);
        return 0;
    }
        """

    hrcp_1 = declare_device(
        "hrcp_1", _type___nv_bfloat16(CPointer(_type___nv_bfloat16))
    )

    def hrcp_1_caller(arg_0):
        return hrcp_1(arg_0)

    @lower(hrcp, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("hrcp_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            hrcp_1_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_hrcp_1_lower(shim_stream, shim_obj)


def hlog():
    pass


def _hlog_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    hlog_1(__nv_bfloat16 &retval , __nv_bfloat16* a) {
        retval = hlog(*a);
        return 0;
    }
        """

    hlog_1 = declare_device(
        "hlog_1", _type___nv_bfloat16(CPointer(_type___nv_bfloat16))
    )

    def hlog_1_caller(arg_0):
        return hlog_1(arg_0)

    @lower(hlog, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("hlog_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            hlog_1_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_hlog_1_lower(shim_stream, shim_obj)


def hlog2():
    pass


def _hlog2_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    hlog2_1(__nv_bfloat16 &retval , __nv_bfloat16* a) {
        retval = hlog2(*a);
        return 0;
    }
        """

    hlog2_1 = declare_device(
        "hlog2_1", _type___nv_bfloat16(CPointer(_type___nv_bfloat16))
    )

    def hlog2_1_caller(arg_0):
        return hlog2_1(arg_0)

    @lower(hlog2, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("hlog2_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            hlog2_1_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_hlog2_1_lower(shim_stream, shim_obj)


def hlog10():
    pass


def _hlog10_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    hlog10_1(__nv_bfloat16 &retval , __nv_bfloat16* a) {
        retval = hlog10(*a);
        return 0;
    }
        """

    hlog10_1 = declare_device(
        "hlog10_1", _type___nv_bfloat16(CPointer(_type___nv_bfloat16))
    )

    def hlog10_1_caller(arg_0):
        return hlog10_1(arg_0)

    @lower(hlog10, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("hlog10_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            hlog10_1_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_hlog10_1_lower(shim_stream, shim_obj)


def hexp():
    pass


def _hexp_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    hexp_1(__nv_bfloat16 &retval , __nv_bfloat16* a) {
        retval = hexp(*a);
        return 0;
    }
        """

    hexp_1 = declare_device(
        "hexp_1", _type___nv_bfloat16(CPointer(_type___nv_bfloat16))
    )

    def hexp_1_caller(arg_0):
        return hexp_1(arg_0)

    @lower(hexp, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("hexp_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            hexp_1_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_hexp_1_lower(shim_stream, shim_obj)


def htanh_approx():
    pass


def _htanh_approx_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    htanh_approx_1(__nv_bfloat16 &retval , __nv_bfloat16* a) {
        retval = htanh_approx(*a);
        return 0;
    }
        """

    htanh_approx_1 = declare_device(
        "htanh_approx_1", _type___nv_bfloat16(CPointer(_type___nv_bfloat16))
    )

    def htanh_approx_1_caller(arg_0):
        return htanh_approx_1(arg_0)

    @lower(htanh_approx, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("htanh_approx_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            htanh_approx_1_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_htanh_approx_1_lower(shim_stream, shim_obj)


def h2tanh_approx():
    pass


def _h2tanh_approx_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    h2tanh_approx_1(__nv_bfloat162 &retval , __nv_bfloat162* a) {
        retval = h2tanh_approx(*a);
        return 0;
    }
        """

    h2tanh_approx_1 = declare_device(
        "h2tanh_approx_1", _type___nv_bfloat162(CPointer(_type___nv_bfloat162))
    )

    def h2tanh_approx_1_caller(arg_0):
        return h2tanh_approx_1(arg_0)

    @lower(h2tanh_approx, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("h2tanh_approx_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            h2tanh_approx_1_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_h2tanh_approx_1_lower(shim_stream, shim_obj)


def htanh():
    pass


def _htanh_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    htanh_1(__nv_bfloat16 &retval , __nv_bfloat16* a) {
        retval = htanh(*a);
        return 0;
    }
        """

    htanh_1 = declare_device(
        "htanh_1", _type___nv_bfloat16(CPointer(_type___nv_bfloat16))
    )

    def htanh_1_caller(arg_0):
        return htanh_1(arg_0)

    @lower(htanh, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("htanh_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            htanh_1_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_htanh_1_lower(shim_stream, shim_obj)


def h2tanh():
    pass


def _h2tanh_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    h2tanh_1(__nv_bfloat162 &retval , __nv_bfloat162* a) {
        retval = h2tanh(*a);
        return 0;
    }
        """

    h2tanh_1 = declare_device(
        "h2tanh_1", _type___nv_bfloat162(CPointer(_type___nv_bfloat162))
    )

    def h2tanh_1_caller(arg_0):
        return h2tanh_1(arg_0)

    @lower(h2tanh, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("h2tanh_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            h2tanh_1_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_h2tanh_1_lower(shim_stream, shim_obj)


def hexp2():
    pass


def _hexp2_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    hexp2_1(__nv_bfloat16 &retval , __nv_bfloat16* a) {
        retval = hexp2(*a);
        return 0;
    }
        """

    hexp2_1 = declare_device(
        "hexp2_1", _type___nv_bfloat16(CPointer(_type___nv_bfloat16))
    )

    def hexp2_1_caller(arg_0):
        return hexp2_1(arg_0)

    @lower(hexp2, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("hexp2_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            hexp2_1_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_hexp2_1_lower(shim_stream, shim_obj)


def hexp10():
    pass


def _hexp10_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    hexp10_1(__nv_bfloat16 &retval , __nv_bfloat16* a) {
        retval = hexp10(*a);
        return 0;
    }
        """

    hexp10_1 = declare_device(
        "hexp10_1", _type___nv_bfloat16(CPointer(_type___nv_bfloat16))
    )

    def hexp10_1_caller(arg_0):
        return hexp10_1(arg_0)

    @lower(hexp10, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("hexp10_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            hexp10_1_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_hexp10_1_lower(shim_stream, shim_obj)


def hcos():
    pass


def _hcos_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    hcos_1(__nv_bfloat16 &retval , __nv_bfloat16* a) {
        retval = hcos(*a);
        return 0;
    }
        """

    hcos_1 = declare_device(
        "hcos_1", _type___nv_bfloat16(CPointer(_type___nv_bfloat16))
    )

    def hcos_1_caller(arg_0):
        return hcos_1(arg_0)

    @lower(hcos, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("hcos_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            hcos_1_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_hcos_1_lower(shim_stream, shim_obj)


def hsin():
    pass


def _hsin_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    hsin_1(__nv_bfloat16 &retval , __nv_bfloat16* a) {
        retval = hsin(*a);
        return 0;
    }
        """

    hsin_1 = declare_device(
        "hsin_1", _type___nv_bfloat16(CPointer(_type___nv_bfloat16))
    )

    def hsin_1_caller(arg_0):
        return hsin_1(arg_0)

    @lower(hsin, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("hsin_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            hsin_1_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_hsin_1_lower(shim_stream, shim_obj)


def h2sqrt():
    pass


def _h2sqrt_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    h2sqrt_1(__nv_bfloat162 &retval , __nv_bfloat162* a) {
        retval = h2sqrt(*a);
        return 0;
    }
        """

    h2sqrt_1 = declare_device(
        "h2sqrt_1", _type___nv_bfloat162(CPointer(_type___nv_bfloat162))
    )

    def h2sqrt_1_caller(arg_0):
        return h2sqrt_1(arg_0)

    @lower(h2sqrt, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("h2sqrt_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            h2sqrt_1_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_h2sqrt_1_lower(shim_stream, shim_obj)


def h2rsqrt():
    pass


def _h2rsqrt_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    h2rsqrt_1(__nv_bfloat162 &retval , __nv_bfloat162* a) {
        retval = h2rsqrt(*a);
        return 0;
    }
        """

    h2rsqrt_1 = declare_device(
        "h2rsqrt_1", _type___nv_bfloat162(CPointer(_type___nv_bfloat162))
    )

    def h2rsqrt_1_caller(arg_0):
        return h2rsqrt_1(arg_0)

    @lower(h2rsqrt, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("h2rsqrt_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            h2rsqrt_1_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_h2rsqrt_1_lower(shim_stream, shim_obj)


def h2rcp():
    pass


def _h2rcp_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    h2rcp_1(__nv_bfloat162 &retval , __nv_bfloat162* a) {
        retval = h2rcp(*a);
        return 0;
    }
        """

    h2rcp_1 = declare_device(
        "h2rcp_1", _type___nv_bfloat162(CPointer(_type___nv_bfloat162))
    )

    def h2rcp_1_caller(arg_0):
        return h2rcp_1(arg_0)

    @lower(h2rcp, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("h2rcp_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            h2rcp_1_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_h2rcp_1_lower(shim_stream, shim_obj)


def h2log():
    pass


def _h2log_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    h2log_1(__nv_bfloat162 &retval , __nv_bfloat162* a) {
        retval = h2log(*a);
        return 0;
    }
        """

    h2log_1 = declare_device(
        "h2log_1", _type___nv_bfloat162(CPointer(_type___nv_bfloat162))
    )

    def h2log_1_caller(arg_0):
        return h2log_1(arg_0)

    @lower(h2log, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("h2log_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            h2log_1_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_h2log_1_lower(shim_stream, shim_obj)


def h2log2():
    pass


def _h2log2_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    h2log2_1(__nv_bfloat162 &retval , __nv_bfloat162* a) {
        retval = h2log2(*a);
        return 0;
    }
        """

    h2log2_1 = declare_device(
        "h2log2_1", _type___nv_bfloat162(CPointer(_type___nv_bfloat162))
    )

    def h2log2_1_caller(arg_0):
        return h2log2_1(arg_0)

    @lower(h2log2, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("h2log2_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            h2log2_1_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_h2log2_1_lower(shim_stream, shim_obj)


def h2log10():
    pass


def _h2log10_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    h2log10_1(__nv_bfloat162 &retval , __nv_bfloat162* a) {
        retval = h2log10(*a);
        return 0;
    }
        """

    h2log10_1 = declare_device(
        "h2log10_1", _type___nv_bfloat162(CPointer(_type___nv_bfloat162))
    )

    def h2log10_1_caller(arg_0):
        return h2log10_1(arg_0)

    @lower(h2log10, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("h2log10_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            h2log10_1_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_h2log10_1_lower(shim_stream, shim_obj)


def h2exp():
    pass


def _h2exp_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    h2exp_1(__nv_bfloat162 &retval , __nv_bfloat162* a) {
        retval = h2exp(*a);
        return 0;
    }
        """

    h2exp_1 = declare_device(
        "h2exp_1", _type___nv_bfloat162(CPointer(_type___nv_bfloat162))
    )

    def h2exp_1_caller(arg_0):
        return h2exp_1(arg_0)

    @lower(h2exp, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("h2exp_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            h2exp_1_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_h2exp_1_lower(shim_stream, shim_obj)


def h2exp2():
    pass


def _h2exp2_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    h2exp2_1(__nv_bfloat162 &retval , __nv_bfloat162* a) {
        retval = h2exp2(*a);
        return 0;
    }
        """

    h2exp2_1 = declare_device(
        "h2exp2_1", _type___nv_bfloat162(CPointer(_type___nv_bfloat162))
    )

    def h2exp2_1_caller(arg_0):
        return h2exp2_1(arg_0)

    @lower(h2exp2, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("h2exp2_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            h2exp2_1_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_h2exp2_1_lower(shim_stream, shim_obj)


def h2exp10():
    pass


def _h2exp10_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    h2exp10_1(__nv_bfloat162 &retval , __nv_bfloat162* a) {
        retval = h2exp10(*a);
        return 0;
    }
        """

    h2exp10_1 = declare_device(
        "h2exp10_1", _type___nv_bfloat162(CPointer(_type___nv_bfloat162))
    )

    def h2exp10_1_caller(arg_0):
        return h2exp10_1(arg_0)

    @lower(h2exp10, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("h2exp10_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            h2exp10_1_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_h2exp10_1_lower(shim_stream, shim_obj)


def h2cos():
    pass


def _h2cos_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    h2cos_1(__nv_bfloat162 &retval , __nv_bfloat162* a) {
        retval = h2cos(*a);
        return 0;
    }
        """

    h2cos_1 = declare_device(
        "h2cos_1", _type___nv_bfloat162(CPointer(_type___nv_bfloat162))
    )

    def h2cos_1_caller(arg_0):
        return h2cos_1(arg_0)

    @lower(h2cos, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("h2cos_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            h2cos_1_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_h2cos_1_lower(shim_stream, shim_obj)


def h2sin():
    pass


def _h2sin_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    h2sin_1(__nv_bfloat162 &retval , __nv_bfloat162* a) {
        retval = h2sin(*a);
        return 0;
    }
        """

    h2sin_1 = declare_device(
        "h2sin_1", _type___nv_bfloat162(CPointer(_type___nv_bfloat162))
    )

    def h2sin_1_caller(arg_0):
        return h2sin_1(arg_0)

    @lower(h2sin, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("h2sin_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            h2sin_1_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_h2sin_1_lower(shim_stream, shim_obj)


def atomicAdd():
    pass


def _atomicAdd_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    atomicAdd_1(__nv_bfloat162 &retval , __nv_bfloat162 ** address, __nv_bfloat162* val) {
        retval = atomicAdd(*address, *val);
        return 0;
    }
        """

    atomicAdd_1 = declare_device(
        "atomicAdd_1",
        _type___nv_bfloat162(
            CPointer(CPointer(_type___nv_bfloat162)),
            CPointer(_type___nv_bfloat162),
        ),
    )

    def atomicAdd_1_caller(arg_0, arg_1):
        return atomicAdd_1(arg_0, arg_1)

    @lower(atomicAdd, CPointer(_type___nv_bfloat162), _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("atomicAdd_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            atomicAdd_1_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(CPointer(_type___nv_bfloat162)),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_atomicAdd_1_lower(shim_stream, shim_obj)


def _atomicAdd_2_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    atomicAdd_2(__nv_bfloat16 &retval , __nv_bfloat16 ** address, __nv_bfloat16* val) {
        retval = atomicAdd(*address, *val);
        return 0;
    }
        """

    atomicAdd_2 = declare_device(
        "atomicAdd_2",
        _type___nv_bfloat16(
            CPointer(CPointer(_type___nv_bfloat16)),
            CPointer(_type___nv_bfloat16),
        ),
    )

    def atomicAdd_2_caller(arg_0, arg_1):
        return atomicAdd_2(arg_0, arg_1)

    @lower(atomicAdd, CPointer(_type___nv_bfloat16), _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("atomicAdd_2", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            atomicAdd_2_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(CPointer(_type___nv_bfloat16)),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_atomicAdd_2_lower(shim_stream, shim_obj)


def _operator_add_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    operator_add_1(__nv_bfloat16 &retval , __nv_bfloat16* lh, __nv_bfloat16* rh) {
        retval = operator+(*lh, *rh);
        return 0;
    }
        """

    operator_add_1 = declare_device(
        "operator_add_1",
        _type___nv_bfloat16(
            CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)
        ),
    )

    def operator_add_1_caller(arg_0, arg_1):
        return operator_add_1(arg_0, arg_1)

    @lower(operator.add, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("operator_add_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            operator_add_1_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_operator_add_1_lower(shim_stream, shim_obj)


def _operator_sub_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    operator_sub_1(__nv_bfloat16 &retval , __nv_bfloat16* lh, __nv_bfloat16* rh) {
        retval = operator-(*lh, *rh);
        return 0;
    }
        """

    operator_sub_1 = declare_device(
        "operator_sub_1",
        _type___nv_bfloat16(
            CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)
        ),
    )

    def operator_sub_1_caller(arg_0, arg_1):
        return operator_sub_1(arg_0, arg_1)

    @lower(operator.sub, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("operator_sub_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            operator_sub_1_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_operator_sub_1_lower(shim_stream, shim_obj)


def _operator_mul_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    operator_mul_1(__nv_bfloat16 &retval , __nv_bfloat16* lh, __nv_bfloat16* rh) {
        retval = operator*(*lh, *rh);
        return 0;
    }
        """

    operator_mul_1 = declare_device(
        "operator_mul_1",
        _type___nv_bfloat16(
            CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)
        ),
    )

    def operator_mul_1_caller(arg_0, arg_1):
        return operator_mul_1(arg_0, arg_1)

    @lower(operator.mul, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("operator_mul_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            operator_mul_1_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_operator_mul_1_lower(shim_stream, shim_obj)


def _operator_truediv_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    operator_truediv_1(__nv_bfloat16 &retval , __nv_bfloat16* lh, __nv_bfloat16* rh) {
        retval = operator/(*lh, *rh);
        return 0;
    }
        """

    operator_truediv_1 = declare_device(
        "operator_truediv_1",
        _type___nv_bfloat16(
            CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)
        ),
    )

    def operator_truediv_1_caller(arg_0, arg_1):
        return operator_truediv_1(arg_0, arg_1)

    @lower(operator.truediv, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("operator_truediv_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            operator_truediv_1_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_operator_truediv_1_lower(shim_stream, shim_obj)


def _operator_iadd_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    operator_iadd_1(__nv_bfloat16 &retval , __nv_bfloat16* lh, __nv_bfloat16* rh) {
        retval = operator+=(*lh, *rh);
        return 0;
    }
        """

    operator_iadd_1 = declare_device(
        "operator_iadd_1",
        _type___nv_bfloat16(
            CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)
        ),
    )

    def operator_iadd_1_caller(arg_0, arg_1):
        return operator_iadd_1(arg_0, arg_1)

    @lower(operator.iadd, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("operator_iadd_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            operator_iadd_1_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_operator_iadd_1_lower(shim_stream, shim_obj)


def _operator_isub_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    operator_isub_1(__nv_bfloat16 &retval , __nv_bfloat16* lh, __nv_bfloat16* rh) {
        retval = operator-=(*lh, *rh);
        return 0;
    }
        """

    operator_isub_1 = declare_device(
        "operator_isub_1",
        _type___nv_bfloat16(
            CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)
        ),
    )

    def operator_isub_1_caller(arg_0, arg_1):
        return operator_isub_1(arg_0, arg_1)

    @lower(operator.isub, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("operator_isub_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            operator_isub_1_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_operator_isub_1_lower(shim_stream, shim_obj)


def _operator_imul_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    operator_imul_1(__nv_bfloat16 &retval , __nv_bfloat16* lh, __nv_bfloat16* rh) {
        retval = operator*=(*lh, *rh);
        return 0;
    }
        """

    operator_imul_1 = declare_device(
        "operator_imul_1",
        _type___nv_bfloat16(
            CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)
        ),
    )

    def operator_imul_1_caller(arg_0, arg_1):
        return operator_imul_1(arg_0, arg_1)

    @lower(operator.imul, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("operator_imul_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            operator_imul_1_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_operator_imul_1_lower(shim_stream, shim_obj)


def _operator_itruediv_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    operator_itruediv_1(__nv_bfloat16 &retval , __nv_bfloat16* lh, __nv_bfloat16* rh) {
        retval = operator/=(*lh, *rh);
        return 0;
    }
        """

    operator_itruediv_1 = declare_device(
        "operator_itruediv_1",
        _type___nv_bfloat16(
            CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)
        ),
    )

    def operator_itruediv_1_caller(arg_0, arg_1):
        return operator_itruediv_1(arg_0, arg_1)

    @lower(operator.itruediv, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("operator_itruediv_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            operator_itruediv_1_caller,
            signature(
                _type___nv_bfloat16,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_operator_itruediv_1_lower(shim_stream, shim_obj)


def _operator_pos_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    operator_pos_1(__nv_bfloat16 &retval , __nv_bfloat16* h) {
        retval = operator+(*h);
        return 0;
    }
        """

    operator_pos_1 = declare_device(
        "operator_pos_1", _type___nv_bfloat16(CPointer(_type___nv_bfloat16))
    )

    def operator_pos_1_caller(arg_0):
        return operator_pos_1(arg_0)

    @lower(operator.pos, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("operator_pos_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            operator_pos_1_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_operator_pos_1_lower(shim_stream, shim_obj)


def _operator_neg_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    operator_neg_1(__nv_bfloat16 &retval , __nv_bfloat16* h) {
        retval = operator-(*h);
        return 0;
    }
        """

    operator_neg_1 = declare_device(
        "operator_neg_1", _type___nv_bfloat16(CPointer(_type___nv_bfloat16))
    )

    def operator_neg_1_caller(arg_0):
        return operator_neg_1(arg_0)

    @lower(operator.neg, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("operator_neg_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            operator_neg_1_caller,
            signature(_type___nv_bfloat16, CPointer(_type___nv_bfloat16)),
            ptrs,
        )


_operator_neg_1_lower(shim_stream, shim_obj)


def _operator_eq_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    operator_eq_1(bool &retval , __nv_bfloat16* lh, __nv_bfloat16* rh) {
        retval = operator==(*lh, *rh);
        return 0;
    }
        """

    operator_eq_1 = declare_device(
        "operator_eq_1",
        bool_(CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)),
    )

    def operator_eq_1_caller(arg_0, arg_1):
        return operator_eq_1(arg_0, arg_1)

    @lower(operator.eq, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("operator_eq_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            operator_eq_1_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_operator_eq_1_lower(shim_stream, shim_obj)


def _operator_ne_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    operator_ne_1(bool &retval , __nv_bfloat16* lh, __nv_bfloat16* rh) {
        retval = operator!=(*lh, *rh);
        return 0;
    }
        """

    operator_ne_1 = declare_device(
        "operator_ne_1",
        bool_(CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)),
    )

    def operator_ne_1_caller(arg_0, arg_1):
        return operator_ne_1(arg_0, arg_1)

    @lower(operator.ne, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("operator_ne_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            operator_ne_1_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_operator_ne_1_lower(shim_stream, shim_obj)


def _operator_gt_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    operator_gt_1(bool &retval , __nv_bfloat16* lh, __nv_bfloat16* rh) {
        retval = operator>(*lh, *rh);
        return 0;
    }
        """

    operator_gt_1 = declare_device(
        "operator_gt_1",
        bool_(CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)),
    )

    def operator_gt_1_caller(arg_0, arg_1):
        return operator_gt_1(arg_0, arg_1)

    @lower(operator.gt, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("operator_gt_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            operator_gt_1_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_operator_gt_1_lower(shim_stream, shim_obj)


def _operator_lt_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    operator_lt_1(bool &retval , __nv_bfloat16* lh, __nv_bfloat16* rh) {
        retval = operator<(*lh, *rh);
        return 0;
    }
        """

    operator_lt_1 = declare_device(
        "operator_lt_1",
        bool_(CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)),
    )

    def operator_lt_1_caller(arg_0, arg_1):
        return operator_lt_1(arg_0, arg_1)

    @lower(operator.lt, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("operator_lt_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            operator_lt_1_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_operator_lt_1_lower(shim_stream, shim_obj)


def _operator_ge_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    operator_ge_1(bool &retval , __nv_bfloat16* lh, __nv_bfloat16* rh) {
        retval = operator>=(*lh, *rh);
        return 0;
    }
        """

    operator_ge_1 = declare_device(
        "operator_ge_1",
        bool_(CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)),
    )

    def operator_ge_1_caller(arg_0, arg_1):
        return operator_ge_1(arg_0, arg_1)

    @lower(operator.ge, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("operator_ge_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            operator_ge_1_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_operator_ge_1_lower(shim_stream, shim_obj)


def _operator_le_1_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    operator_le_1(bool &retval , __nv_bfloat16* lh, __nv_bfloat16* rh) {
        retval = operator<=(*lh, *rh);
        return 0;
    }
        """

    operator_le_1 = declare_device(
        "operator_le_1",
        bool_(CPointer(_type___nv_bfloat16), CPointer(_type___nv_bfloat16)),
    )

    def operator_le_1_caller(arg_0, arg_1):
        return operator_le_1(arg_0, arg_1)

    @lower(operator.le, _type___nv_bfloat16, _type___nv_bfloat16)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("operator_le_1", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            operator_le_1_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat16),
                CPointer(_type___nv_bfloat16),
            ),
            ptrs,
        )


_operator_le_1_lower(shim_stream, shim_obj)


def _operator_add_2_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    operator_add_2(__nv_bfloat162 &retval , __nv_bfloat162* lh, __nv_bfloat162* rh) {
        retval = operator+(*lh, *rh);
        return 0;
    }
        """

    operator_add_2 = declare_device(
        "operator_add_2",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def operator_add_2_caller(arg_0, arg_1):
        return operator_add_2(arg_0, arg_1)

    @lower(operator.add, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("operator_add_2", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            operator_add_2_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_operator_add_2_lower(shim_stream, shim_obj)


def _operator_sub_2_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    operator_sub_2(__nv_bfloat162 &retval , __nv_bfloat162* lh, __nv_bfloat162* rh) {
        retval = operator-(*lh, *rh);
        return 0;
    }
        """

    operator_sub_2 = declare_device(
        "operator_sub_2",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def operator_sub_2_caller(arg_0, arg_1):
        return operator_sub_2(arg_0, arg_1)

    @lower(operator.sub, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("operator_sub_2", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            operator_sub_2_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_operator_sub_2_lower(shim_stream, shim_obj)


def _operator_mul_2_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    operator_mul_2(__nv_bfloat162 &retval , __nv_bfloat162* lh, __nv_bfloat162* rh) {
        retval = operator*(*lh, *rh);
        return 0;
    }
        """

    operator_mul_2 = declare_device(
        "operator_mul_2",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def operator_mul_2_caller(arg_0, arg_1):
        return operator_mul_2(arg_0, arg_1)

    @lower(operator.mul, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("operator_mul_2", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            operator_mul_2_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_operator_mul_2_lower(shim_stream, shim_obj)


def _operator_truediv_2_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    operator_truediv_2(__nv_bfloat162 &retval , __nv_bfloat162* lh, __nv_bfloat162* rh) {
        retval = operator/(*lh, *rh);
        return 0;
    }
        """

    operator_truediv_2 = declare_device(
        "operator_truediv_2",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def operator_truediv_2_caller(arg_0, arg_1):
        return operator_truediv_2(arg_0, arg_1)

    @lower(operator.truediv, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("operator_truediv_2", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            operator_truediv_2_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_operator_truediv_2_lower(shim_stream, shim_obj)


def _operator_iadd_2_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    operator_iadd_2(__nv_bfloat162 &retval , __nv_bfloat162* lh, __nv_bfloat162* rh) {
        retval = operator+=(*lh, *rh);
        return 0;
    }
        """

    operator_iadd_2 = declare_device(
        "operator_iadd_2",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def operator_iadd_2_caller(arg_0, arg_1):
        return operator_iadd_2(arg_0, arg_1)

    @lower(operator.iadd, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("operator_iadd_2", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            operator_iadd_2_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_operator_iadd_2_lower(shim_stream, shim_obj)


def _operator_isub_2_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    operator_isub_2(__nv_bfloat162 &retval , __nv_bfloat162* lh, __nv_bfloat162* rh) {
        retval = operator-=(*lh, *rh);
        return 0;
    }
        """

    operator_isub_2 = declare_device(
        "operator_isub_2",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def operator_isub_2_caller(arg_0, arg_1):
        return operator_isub_2(arg_0, arg_1)

    @lower(operator.isub, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("operator_isub_2", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            operator_isub_2_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_operator_isub_2_lower(shim_stream, shim_obj)


def _operator_imul_2_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    operator_imul_2(__nv_bfloat162 &retval , __nv_bfloat162* lh, __nv_bfloat162* rh) {
        retval = operator*=(*lh, *rh);
        return 0;
    }
        """

    operator_imul_2 = declare_device(
        "operator_imul_2",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def operator_imul_2_caller(arg_0, arg_1):
        return operator_imul_2(arg_0, arg_1)

    @lower(operator.imul, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("operator_imul_2", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            operator_imul_2_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_operator_imul_2_lower(shim_stream, shim_obj)


def _operator_itruediv_2_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    operator_itruediv_2(__nv_bfloat162 &retval , __nv_bfloat162* lh, __nv_bfloat162* rh) {
        retval = operator/=(*lh, *rh);
        return 0;
    }
        """

    operator_itruediv_2 = declare_device(
        "operator_itruediv_2",
        _type___nv_bfloat162(
            CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)
        ),
    )

    def operator_itruediv_2_caller(arg_0, arg_1):
        return operator_itruediv_2(arg_0, arg_1)

    @lower(operator.itruediv, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("operator_itruediv_2", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            operator_itruediv_2_caller,
            signature(
                _type___nv_bfloat162,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_operator_itruediv_2_lower(shim_stream, shim_obj)


def _operator_pos_2_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    operator_pos_2(__nv_bfloat162 &retval , __nv_bfloat162* h) {
        retval = operator+(*h);
        return 0;
    }
        """

    operator_pos_2 = declare_device(
        "operator_pos_2", _type___nv_bfloat162(CPointer(_type___nv_bfloat162))
    )

    def operator_pos_2_caller(arg_0):
        return operator_pos_2(arg_0)

    @lower(operator.pos, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("operator_pos_2", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            operator_pos_2_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_operator_pos_2_lower(shim_stream, shim_obj)


def _operator_neg_2_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    operator_neg_2(__nv_bfloat162 &retval , __nv_bfloat162* h) {
        retval = operator-(*h);
        return 0;
    }
        """

    operator_neg_2 = declare_device(
        "operator_neg_2", _type___nv_bfloat162(CPointer(_type___nv_bfloat162))
    )

    def operator_neg_2_caller(arg_0):
        return operator_neg_2(arg_0)

    @lower(operator.neg, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("operator_neg_2", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            operator_neg_2_caller,
            signature(_type___nv_bfloat162, CPointer(_type___nv_bfloat162)),
            ptrs,
        )


_operator_neg_2_lower(shim_stream, shim_obj)


def _operator_eq_2_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    operator_eq_2(bool &retval , __nv_bfloat162* lh, __nv_bfloat162* rh) {
        retval = operator==(*lh, *rh);
        return 0;
    }
        """

    operator_eq_2 = declare_device(
        "operator_eq_2",
        bool_(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def operator_eq_2_caller(arg_0, arg_1):
        return operator_eq_2(arg_0, arg_1)

    @lower(operator.eq, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("operator_eq_2", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            operator_eq_2_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_operator_eq_2_lower(shim_stream, shim_obj)


def _operator_ne_2_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    operator_ne_2(bool &retval , __nv_bfloat162* lh, __nv_bfloat162* rh) {
        retval = operator!=(*lh, *rh);
        return 0;
    }
        """

    operator_ne_2 = declare_device(
        "operator_ne_2",
        bool_(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def operator_ne_2_caller(arg_0, arg_1):
        return operator_ne_2(arg_0, arg_1)

    @lower(operator.ne, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("operator_ne_2", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            operator_ne_2_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_operator_ne_2_lower(shim_stream, shim_obj)


def _operator_gt_2_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    operator_gt_2(bool &retval , __nv_bfloat162* lh, __nv_bfloat162* rh) {
        retval = operator>(*lh, *rh);
        return 0;
    }
        """

    operator_gt_2 = declare_device(
        "operator_gt_2",
        bool_(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def operator_gt_2_caller(arg_0, arg_1):
        return operator_gt_2(arg_0, arg_1)

    @lower(operator.gt, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("operator_gt_2", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            operator_gt_2_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_operator_gt_2_lower(shim_stream, shim_obj)


def _operator_lt_2_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    operator_lt_2(bool &retval , __nv_bfloat162* lh, __nv_bfloat162* rh) {
        retval = operator<(*lh, *rh);
        return 0;
    }
        """

    operator_lt_2 = declare_device(
        "operator_lt_2",
        bool_(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def operator_lt_2_caller(arg_0, arg_1):
        return operator_lt_2(arg_0, arg_1)

    @lower(operator.lt, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("operator_lt_2", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            operator_lt_2_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_operator_lt_2_lower(shim_stream, shim_obj)


def _operator_ge_2_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    operator_ge_2(bool &retval , __nv_bfloat162* lh, __nv_bfloat162* rh) {
        retval = operator>=(*lh, *rh);
        return 0;
    }
        """

    operator_ge_2 = declare_device(
        "operator_ge_2",
        bool_(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def operator_ge_2_caller(arg_0, arg_1):
        return operator_ge_2(arg_0, arg_1)

    @lower(operator.ge, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("operator_ge_2", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            operator_ge_2_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_operator_ge_2_lower(shim_stream, shim_obj)


def _operator_le_2_lower(shim_stream, shim_obj):
    shim_raw_str = """
    extern "C" __device__ int
    operator_le_2(bool &retval , __nv_bfloat162* lh, __nv_bfloat162* rh) {
        retval = operator<=(*lh, *rh);
        return 0;
    }
        """

    operator_le_2 = declare_device(
        "operator_le_2",
        bool_(CPointer(_type___nv_bfloat162), CPointer(_type___nv_bfloat162)),
    )

    def operator_le_2_caller(arg_0, arg_1):
        return operator_le_2(arg_0, arg_1)

    @lower(operator.le, _type___nv_bfloat162, _type___nv_bfloat162)
    def impl(context, builder, sig, args):
        context.active_code_library.add_linking_file(shim_obj)
        shim_stream.write_with_key("operator_le_2", shim_raw_str)
        ptrs = [builder.alloca(context.get_value_type(arg)) for arg in sig.args]
        for ptr, ty, arg in zip(ptrs, sig.args, args):
            builder.store(arg, ptr, align=getattr(ty, "alignof_", None))

        return context.compile_internal(
            builder,
            operator_le_2_caller,
            signature(
                bool_,
                CPointer(_type___nv_bfloat162),
                CPointer(_type___nv_bfloat162),
            ),
            ptrs,
        )


_operator_le_2_lower(shim_stream, shim_obj)


@register
class _typing_make_bfloat162(ConcreteTemplate):
    key = globals()["make_bfloat162"]
    cases = [
        signature(
            _type___nv_bfloat162, _type___nv_bfloat16, _type___nv_bfloat16
        )
    ]


register_global(make_bfloat162, types.Function(_typing_make_bfloat162))


@register
class _typing_htrunc(ConcreteTemplate):
    key = globals()["htrunc"]
    cases = [signature(_type___nv_bfloat16, _type___nv_bfloat16)]


register_global(htrunc, types.Function(_typing_htrunc))


@register
class _typing_hceil(ConcreteTemplate):
    key = globals()["hceil"]
    cases = [signature(_type___nv_bfloat16, _type___nv_bfloat16)]


register_global(hceil, types.Function(_typing_hceil))


@register
class _typing_hfloor(ConcreteTemplate):
    key = globals()["hfloor"]
    cases = [signature(_type___nv_bfloat16, _type___nv_bfloat16)]


register_global(hfloor, types.Function(_typing_hfloor))


@register
class _typing_hrint(ConcreteTemplate):
    key = globals()["hrint"]
    cases = [signature(_type___nv_bfloat16, _type___nv_bfloat16)]


register_global(hrint, types.Function(_typing_hrint))


@register
class _typing_h2trunc(ConcreteTemplate):
    key = globals()["h2trunc"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(h2trunc, types.Function(_typing_h2trunc))


@register
class _typing_h2ceil(ConcreteTemplate):
    key = globals()["h2ceil"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(h2ceil, types.Function(_typing_h2ceil))


@register
class _typing_h2floor(ConcreteTemplate):
    key = globals()["h2floor"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(h2floor, types.Function(_typing_h2floor))


@register
class _typing_h2rint(ConcreteTemplate):
    key = globals()["h2rint"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(h2rint, types.Function(_typing_h2rint))


@register
class _typing_hsqrt(ConcreteTemplate):
    key = globals()["hsqrt"]
    cases = [signature(_type___nv_bfloat16, _type___nv_bfloat16)]


register_global(hsqrt, types.Function(_typing_hsqrt))


@register
class _typing_hrsqrt(ConcreteTemplate):
    key = globals()["hrsqrt"]
    cases = [signature(_type___nv_bfloat16, _type___nv_bfloat16)]


register_global(hrsqrt, types.Function(_typing_hrsqrt))


@register
class _typing_hrcp(ConcreteTemplate):
    key = globals()["hrcp"]
    cases = [signature(_type___nv_bfloat16, _type___nv_bfloat16)]


register_global(hrcp, types.Function(_typing_hrcp))


@register
class _typing_hlog(ConcreteTemplate):
    key = globals()["hlog"]
    cases = [signature(_type___nv_bfloat16, _type___nv_bfloat16)]


register_global(hlog, types.Function(_typing_hlog))


@register
class _typing_hlog2(ConcreteTemplate):
    key = globals()["hlog2"]
    cases = [signature(_type___nv_bfloat16, _type___nv_bfloat16)]


register_global(hlog2, types.Function(_typing_hlog2))


@register
class _typing_hlog10(ConcreteTemplate):
    key = globals()["hlog10"]
    cases = [signature(_type___nv_bfloat16, _type___nv_bfloat16)]


register_global(hlog10, types.Function(_typing_hlog10))


@register
class _typing_hexp(ConcreteTemplate):
    key = globals()["hexp"]
    cases = [signature(_type___nv_bfloat16, _type___nv_bfloat16)]


register_global(hexp, types.Function(_typing_hexp))


@register
class _typing_htanh_approx(ConcreteTemplate):
    key = globals()["htanh_approx"]
    cases = [signature(_type___nv_bfloat16, _type___nv_bfloat16)]


register_global(htanh_approx, types.Function(_typing_htanh_approx))


@register
class _typing_h2tanh_approx(ConcreteTemplate):
    key = globals()["h2tanh_approx"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(h2tanh_approx, types.Function(_typing_h2tanh_approx))


@register
class _typing_htanh(ConcreteTemplate):
    key = globals()["htanh"]
    cases = [signature(_type___nv_bfloat16, _type___nv_bfloat16)]


register_global(htanh, types.Function(_typing_htanh))


@register
class _typing_h2tanh(ConcreteTemplate):
    key = globals()["h2tanh"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(h2tanh, types.Function(_typing_h2tanh))


@register
class _typing_hexp2(ConcreteTemplate):
    key = globals()["hexp2"]
    cases = [signature(_type___nv_bfloat16, _type___nv_bfloat16)]


register_global(hexp2, types.Function(_typing_hexp2))


@register
class _typing_hexp10(ConcreteTemplate):
    key = globals()["hexp10"]
    cases = [signature(_type___nv_bfloat16, _type___nv_bfloat16)]


register_global(hexp10, types.Function(_typing_hexp10))


@register
class _typing_hcos(ConcreteTemplate):
    key = globals()["hcos"]
    cases = [signature(_type___nv_bfloat16, _type___nv_bfloat16)]


register_global(hcos, types.Function(_typing_hcos))


@register
class _typing_hsin(ConcreteTemplate):
    key = globals()["hsin"]
    cases = [signature(_type___nv_bfloat16, _type___nv_bfloat16)]


register_global(hsin, types.Function(_typing_hsin))


@register
class _typing_h2sqrt(ConcreteTemplate):
    key = globals()["h2sqrt"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(h2sqrt, types.Function(_typing_h2sqrt))


@register
class _typing_h2rsqrt(ConcreteTemplate):
    key = globals()["h2rsqrt"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(h2rsqrt, types.Function(_typing_h2rsqrt))


@register
class _typing_h2rcp(ConcreteTemplate):
    key = globals()["h2rcp"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(h2rcp, types.Function(_typing_h2rcp))


@register
class _typing_h2log(ConcreteTemplate):
    key = globals()["h2log"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(h2log, types.Function(_typing_h2log))


@register
class _typing_h2log2(ConcreteTemplate):
    key = globals()["h2log2"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(h2log2, types.Function(_typing_h2log2))


@register
class _typing_h2log10(ConcreteTemplate):
    key = globals()["h2log10"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(h2log10, types.Function(_typing_h2log10))


@register
class _typing_h2exp(ConcreteTemplate):
    key = globals()["h2exp"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(h2exp, types.Function(_typing_h2exp))


@register
class _typing_h2exp2(ConcreteTemplate):
    key = globals()["h2exp2"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(h2exp2, types.Function(_typing_h2exp2))


@register
class _typing_h2exp10(ConcreteTemplate):
    key = globals()["h2exp10"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(h2exp10, types.Function(_typing_h2exp10))


@register
class _typing_h2cos(ConcreteTemplate):
    key = globals()["h2cos"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(h2cos, types.Function(_typing_h2cos))


@register
class _typing_h2sin(ConcreteTemplate):
    key = globals()["h2sin"]
    cases = [signature(_type___nv_bfloat162, _type___nv_bfloat162)]


register_global(h2sin, types.Function(_typing_h2sin))


@register
class _typing_atomicAdd(ConcreteTemplate):
    key = globals()["atomicAdd"]
    cases = [
        signature(
            _type___nv_bfloat162,
            CPointer(_type___nv_bfloat162),
            _type___nv_bfloat162,
        ),
        signature(
            _type___nv_bfloat16,
            CPointer(_type___nv_bfloat16),
            _type___nv_bfloat16,
        ),
    ]


register_global(atomicAdd, types.Function(_typing_atomicAdd))


@register_global(operator.add)
class _typing_operator_add(ConcreteTemplate):
    cases = [
        signature(
            _type___nv_bfloat16, _type___nv_bfloat16, _type___nv_bfloat16
        ),
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        ),
    ]


@register_global(operator.sub)
class _typing_operator_sub(ConcreteTemplate):
    cases = [
        signature(
            _type___nv_bfloat16, _type___nv_bfloat16, _type___nv_bfloat16
        ),
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        ),
    ]


@register_global(operator.mul)
class _typing_operator_mul(ConcreteTemplate):
    cases = [
        signature(
            _type___nv_bfloat16, _type___nv_bfloat16, _type___nv_bfloat16
        ),
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        ),
    ]


@register_global(operator.truediv)
class _typing_operator_truediv(ConcreteTemplate):
    cases = [
        signature(
            _type___nv_bfloat16, _type___nv_bfloat16, _type___nv_bfloat16
        ),
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        ),
    ]


@register_global(operator.iadd)
class _typing_operator_iadd(ConcreteTemplate):
    cases = [
        signature(
            _type___nv_bfloat16, _type___nv_bfloat16, _type___nv_bfloat16
        ),
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        ),
    ]


@register_global(operator.isub)
class _typing_operator_isub(ConcreteTemplate):
    cases = [
        signature(
            _type___nv_bfloat16, _type___nv_bfloat16, _type___nv_bfloat16
        ),
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        ),
    ]


@register_global(operator.imul)
class _typing_operator_imul(ConcreteTemplate):
    cases = [
        signature(
            _type___nv_bfloat16, _type___nv_bfloat16, _type___nv_bfloat16
        ),
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        ),
    ]


@register_global(operator.itruediv)
class _typing_operator_itruediv(ConcreteTemplate):
    cases = [
        signature(
            _type___nv_bfloat16, _type___nv_bfloat16, _type___nv_bfloat16
        ),
        signature(
            _type___nv_bfloat162, _type___nv_bfloat162, _type___nv_bfloat162
        ),
    ]


@register_global(operator.pos)
class _typing_operator_pos(ConcreteTemplate):
    cases = [
        signature(_type___nv_bfloat16, _type___nv_bfloat16),
        signature(_type___nv_bfloat162, _type___nv_bfloat162),
    ]


@register_global(operator.neg)
class _typing_operator_neg(ConcreteTemplate):
    cases = [
        signature(_type___nv_bfloat16, _type___nv_bfloat16),
        signature(_type___nv_bfloat162, _type___nv_bfloat162),
    ]


@register_global(operator.eq)
class _typing_operator_eq(ConcreteTemplate):
    cases = [
        signature(bool_, _type___nv_bfloat16, _type___nv_bfloat16),
        signature(bool_, _type___nv_bfloat162, _type___nv_bfloat162),
    ]


@register_global(operator.ne)
class _typing_operator_ne(ConcreteTemplate):
    cases = [
        signature(bool_, _type___nv_bfloat16, _type___nv_bfloat16),
        signature(bool_, _type___nv_bfloat162, _type___nv_bfloat162),
    ]


@register_global(operator.gt)
class _typing_operator_gt(ConcreteTemplate):
    cases = [
        signature(bool_, _type___nv_bfloat16, _type___nv_bfloat16),
        signature(bool_, _type___nv_bfloat162, _type___nv_bfloat162),
    ]


@register_global(operator.lt)
class _typing_operator_lt(ConcreteTemplate):
    cases = [
        signature(bool_, _type___nv_bfloat16, _type___nv_bfloat16),
        signature(bool_, _type___nv_bfloat162, _type___nv_bfloat162),
    ]


@register_global(operator.ge)
class _typing_operator_ge(ConcreteTemplate):
    cases = [
        signature(bool_, _type___nv_bfloat16, _type___nv_bfloat16),
        signature(bool_, _type___nv_bfloat162, _type___nv_bfloat162),
    ]


@register_global(operator.le)
class _typing_operator_le(ConcreteTemplate):
    cases = [
        signature(bool_, _type___nv_bfloat16, _type___nv_bfloat16),
        signature(bool_, _type___nv_bfloat162, _type___nv_bfloat162),
    ]


# Aliases:
__nv_bfloat16_raw = unnamed1401637
__nv_bfloat162_raw = unnamed1401746
nv_bfloat16 = __nv_bfloat16
nv_bfloat162 = __nv_bfloat162
