init 0.58

1fb0017a · dugupeiwen · 1fb0017a · 1fb0017a · 1fb0017a · 1fb0017a
Commit 1fb0017a authored Mar 23, 2024 by dugupeiwen
20 changed files
--- a/numba/__init__.py
+++ b/numba/__init__.py
+"""
+Expose top-level symbols that are safe for import *
+"""
+
+import platform
+import re
+import sys
+import warnings
+
+
+# ---------------------- WARNING WARNING WARNING ----------------------------
+# THIS MUST RUN FIRST, DO NOT MOVE... SEE DOCSTRING IN _ensure_critical_deps
+def _ensure_critical_deps():
+    """
+    Make sure the Python, NumPy and SciPy present are supported versions.
+    This has to be done _before_ importing anything from Numba such that
+    incompatible versions can be reported to the user. If this occurs _after_
+    importing things from Numba and there's an issue in e.g. a Numba c-ext, a
+    SystemError might have occurred which prevents reporting the likely cause of
+    the problem (incompatible versions of critical dependencies).
+    """
+    #NOTE THIS CODE SHOULD NOT IMPORT ANYTHING FROM NUMBA!
+
+    def extract_version(mod):
+        return tuple(map(int, mod.__version__.split('.')[:2]))
+
+    PYVERSION = sys.version_info[:2]
+
+    if PYVERSION < (3, 8):
+        msg = ("Numba needs Python 3.8 or greater. Got Python "
+               f"{PYVERSION[0]}.{PYVERSION[1]}.")
+        raise ImportError(msg)
+
+    import numpy as np
+    numpy_version = extract_version(np)
+
+    if numpy_version < (1, 22):
+        msg = (f"Numba needs NumPy 1.22 or greater. Got NumPy "
+               f"{numpy_version[0]}.{numpy_version[1]}.")
+        raise ImportError(msg)
+    elif numpy_version > (1, 26):
+        raise ImportError("Numba needs NumPy 1.26 or less")
+    try:
+        import scipy
+    except ImportError:
+        pass
+    else:
+        sp_version = extract_version(scipy)
+        if sp_version < (1, 0):
+            msg = ("Numba requires SciPy version 1.0 or greater. Got SciPy "
+                   f"{scipy.__version__}.")
+            raise ImportError(msg)
+
+
+_ensure_critical_deps()
+# END DO NOT MOVE
+# ---------------------- WARNING WARNING WARNING ----------------------------
+
+
+from ._version import get_versions
+from numba.misc.init_utils import generate_version_info
+
+__version__ = get_versions()['version']
+version_info = generate_version_info(__version__)
+del get_versions
+del generate_version_info
+
+
+from numba.core import config
+from numba.core import types, errors
+
+# Re-export typeof
+from numba.misc.special import (
+    typeof, prange, pndindex, gdb, gdb_breakpoint, gdb_init,
+    literally, literal_unroll,
+)
+
+# Re-export error classes
+from numba.core.errors import *
+
+# Re-export types itself
+import numba.core.types as types
+
+# Re-export all type names
+from numba.core.types import *
+
+# Re-export decorators
+from numba.core.decorators import (cfunc, generated_jit, jit, njit, stencil,
+                                   jit_module)
+
+# Re-export vectorize decorators and the thread layer querying function
+from numba.np.ufunc import (vectorize, guvectorize, threading_layer,
+                            get_num_threads, set_num_threads,
+                            set_parallel_chunksize, get_parallel_chunksize,
+                            get_thread_id)
+
+# Re-export Numpy helpers
+from numba.np.numpy_support import carray, farray, from_dtype
+
+# Re-export experimental
+from numba import experimental
+
+# Initialize withcontexts
+import numba.core.withcontexts
+from numba.core.withcontexts import objmode_context as objmode
+from numba.core.withcontexts import parallel_chunksize
+
+# Initialize target extensions
+import numba.core.target_extension
+
+# Initialize typed containers
+import numba.typed
+
+# Keep this for backward compatibility.
+def test(argv, **kwds):
+    # To speed up the import time, avoid importing `unittest` and other test
+    # dependencies unless the user is actually trying to run tests.
+    from numba.testing import _runtests as runtests
+    return runtests.main(argv, **kwds)
+
+__all__ = """
+    cfunc
+    from_dtype
+    guvectorize
+    jit
+    experimental
+    njit
+    stencil
+    jit_module
+    typeof
+    prange
+    gdb
+    gdb_breakpoint
+    gdb_init
+    vectorize
+    objmode
+    literal_unroll
+    get_num_threads
+    set_num_threads
+    set_parallel_chunksize
+    get_parallel_chunksize
+    parallel_chunksize
+    """.split() + types.__all__ + errors.__all__
+
+
+_min_llvmlite_version = (0, 41, 0)
+_min_llvm_version = (14, 0, 0)
+
+def _ensure_llvm():
+    """
+    Make sure llvmlite is operational.
+    """
+    import warnings
+    import llvmlite
+
+    # Only look at the the major, minor and bugfix version numbers.
+    # Ignore other stuffs
+    regex = re.compile(r'(\d+)\.(\d+).(\d+)')
+    m = regex.match(llvmlite.__version__)
+    if m:
+        ver = tuple(map(int, m.groups()))
+        if ver < _min_llvmlite_version:
+            msg = ("Numba requires at least version %d.%d.%d of llvmlite.\n"
+                   "Installed version is %s.\n"
+                   "Please update llvmlite." %
+                   (_min_llvmlite_version + (llvmlite.__version__,)))
+            raise ImportError(msg)
+    else:
+        # Not matching?
+        warnings.warn("llvmlite version format not recognized!")
+
+    from llvmlite.binding import llvm_version_info, check_jit_execution
+
+    if llvm_version_info < _min_llvm_version:
+        msg = ("Numba requires at least version %d.%d.%d of LLVM.\n"
+               "Installed llvmlite is built against version %d.%d.%d.\n"
+               "Please update llvmlite." %
+               (_min_llvm_version + llvm_version_info))
+        raise ImportError(msg)
+
+    check_jit_execution()
+
+
+def _try_enable_svml():
+    """
+    Tries to enable SVML if configuration permits use and the library is found.
+    """
+    if not config.DISABLE_INTEL_SVML:
+        try:
+            if sys.platform.startswith('linux'):
+                llvmlite.binding.load_library_permanently("libsvml.so")
+            elif sys.platform.startswith('darwin'):
+                llvmlite.binding.load_library_permanently("libsvml.dylib")
+            elif sys.platform.startswith('win'):
+                llvmlite.binding.load_library_permanently("svml_dispmd")
+            else:
+                return False
+            # The SVML library is loaded, therefore SVML *could* be supported.
+            # Now see if LLVM has been compiled with the SVML support patch.
+            # If llvmlite has the checking function `has_svml` and it returns
+            # True, then LLVM was compiled with SVML support and the the setup
+            # for SVML can proceed. We err on the side of caution and if the
+            # checking function is missing, regardless of that being fine for
+            # most 0.23.{0,1} llvmlite instances (i.e. conda or pip installed),
+            # we assume that SVML was not compiled in. llvmlite 0.23.2 is a
+            # bugfix release with the checking function present that will always
+            # produce correct behaviour. For context see: #3006.
+            try:
+                if not getattr(llvmlite.binding.targets, "has_svml")():
+                    # has detection function, but no svml compiled in, therefore
+                    # disable SVML
+                    return False
+            except AttributeError:
+                if platform.machine() == 'x86_64' and config.DEBUG:
+                    msg = ("SVML was found but llvmlite >= 0.23.2 is "
+                           "needed to support it.")
+                    warnings.warn(msg)
+                # does not have detection function, cannot detect reliably,
+                # disable SVML.
+                return False
+
+            # All is well, detection function present and reports SVML is
+            # compiled in, set the vector library to SVML.
+            llvmlite.binding.set_option('SVML', '-vector-library=SVML')
+            return True
+        except:
+            if platform.machine() == 'x86_64' and config.DEBUG:
+                warnings.warn("SVML was not found/could not be loaded.")
+    return False
+
+_ensure_llvm()
+
+# we know llvmlite is working as the above tests passed, import it now as SVML
+# needs to mutate runtime options (sets the `-vector-library`).
+import llvmlite
+
+"""
+Is set to True if Intel SVML is in use.
+"""
+config.USING_SVML = _try_enable_svml()
+
+
+# ---------------------- WARNING WARNING WARNING ----------------------------
+# The following imports occur below here (SVML init) because somewhere in their
+# import sequence they have a `@njit` wrapped function. This triggers too early
+# a bind to the underlying LLVM libraries which then irretrievably sets the LLVM
+# SVML state to "no SVML". See https://github.com/numba/numba/issues/4689 for
+# context.
+# ---------------------- WARNING WARNING WARNING ----------------------------
--- a/numba/__main__.py
+++ b/numba/__main__.py
+"""Expose Numba command via ``python -m numba``."""
+import sys
+from numba.misc.numba_entry import main
+
+if __name__ == '__main__':
+    sys.exit(main())
--- a/numba/_arraystruct.h
+++ b/numba/_arraystruct.h
+#ifndef NUMBA_ARYSTRUCT_H_
+#define NUMBA_ARYSTRUCT_H_
+/*
+ * Fill in the *arystruct* with information from the Numpy array *obj*.
+ * *arystruct*'s layout is defined in numba.targets.arrayobj (look
+ * for the ArrayTemplate class).
+ */
+
+typedef struct {
+    void     *meminfo;  /* see _nrt_python.c and nrt.h in numba/core/runtime */
+    PyObject *parent;
+    npy_intp nitems;
+    npy_intp itemsize;
+    void *data;
+
+    npy_intp shape_and_strides[];
+} arystruct_t;
+
+
+#endif  /* NUMBA_ARYSTRUCT_H_ */
+
--- a/numba/_devicearray.cpp
+++ b/numba/_devicearray.cpp
+/* This file contains the base class implementation for all device arrays. The
+ * base class is implemented in C so that computing typecodes for device arrays
+ * can be implemented efficiently. */
+
+#include "_pymodule.h"
+
+
+/* Include _devicearray., but make sure we don't get the definitions intended
+ * for consumers of the Device Array API.
+ */
+#define NUMBA_IN_DEVICEARRAY_CPP_
+#include "_devicearray.h"
+
+/* DeviceArray PyObject implementation. Note that adding more members here is
+ * presently prohibited because mapped and managed arrays derive from both
+ * DeviceArray and NumPy's ndarray, which is also a C extension class - the
+ * layout of the object cannot be resolved if this class also has members beyond
+ * PyObject_HEAD. */
+class DeviceArray {
+    PyObject_HEAD
+};
+
+/* Trivial traversal - DeviceArray instances own nothing. */
+static int
+DeviceArray_traverse(DeviceArray *self, visitproc visit, void *arg)
+{
+    return 0;
+}
+
+/* Trivial clear of all references - DeviceArray instances own nothing. */
+static int
+DeviceArray_clear(DeviceArray *self)
+{
+    return 0;
+}
+
+/* The _devicearray.DeviceArray type */
+PyTypeObject DeviceArrayType = {
+    PyVarObject_HEAD_INIT(NULL, 0)
+    "_devicearray.DeviceArray",                  /* tp_name */
+    sizeof(DeviceArray),                         /* tp_basicsize */
+    0,                                           /* tp_itemsize */
+    0,                                           /* tp_dealloc */
+    0,                                           /* tp_vectorcall_offset */
+    0,                                           /* tp_getattr */
+    0,                                           /* tp_setattr */
+    0,                                           /* tp_as_async */
+    0,                                           /* tp_repr */
+    0,                                           /* tp_as_number */
+    0,                                           /* tp_as_sequence */
+    0,                                           /* tp_as_mapping */
+    0,                                           /* tp_hash */
+    0,                                           /* tp_call*/
+    0,                                           /* tp_str*/
+    0,                                           /* tp_getattro*/
+    0,                                           /* tp_setattro*/
+    0,                                           /* tp_as_buffer*/
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
+                                                 /* tp_flags*/
+    "DeviceArray object",                        /* tp_doc */
+    (traverseproc) DeviceArray_traverse,         /* tp_traverse */
+    (inquiry) DeviceArray_clear,                 /* tp_clear */
+    0,                                           /* tp_richcompare */
+    0,                                           /* tp_weaklistoffset */
+    0,                                           /* tp_iter */
+    0,                                           /* tp_iternext */
+    0,                                           /* tp_methods */
+    0,                                           /* tp_members */
+    0,                                           /* tp_getset */
+    0,                                           /* tp_base */
+    0,                                           /* tp_dict */
+    0,                                           /* tp_descr_get */
+    0,                                           /* tp_descr_set */
+    0,                                           /* tp_dictoffset */
+    0,                                           /* tp_init */
+    0,                                           /* tp_alloc */
+    0,                                           /* tp_new */
+    0,                                           /* tp_free */
+    0,                                           /* tp_is_gc */
+    0,                                           /* tp_bases */
+    0,                                           /* tp_mro */
+    0,                                           /* tp_cache */
+    0,                                           /* tp_subclasses */
+    0,                                           /* tp_weaklist */
+    0,                                           /* tp_del */
+    0,                                           /* tp_version_tag */
+    0,                                           /* tp_finalize */
+/* The docs suggest Python 3.8 has no tp_vectorcall
+ * https://github.com/python/cpython/blob/d917cfe4051d45b2b755c726c096ecfcc4869ceb/Doc/c-api/typeobj.rst?plain=1#L146
+ * but the header has it:
+ * https://github.com/python/cpython/blob/d917cfe4051d45b2b755c726c096ecfcc4869ceb/Include/cpython/object.h#L257
+ */
+    0,                                           /* tp_vectorcall */
+#if (PY_MAJOR_VERSION == 3) && (PY_MINOR_VERSION == 8)
+/* This is Python 3.8 only.
+ * See: https://github.com/python/cpython/blob/3.8/Include/cpython/object.h
+ * there's a tp_print preserved for backwards compatibility. xref:
+ * https://github.com/python/cpython/blob/d917cfe4051d45b2b755c726c096ecfcc4869ceb/Include/cpython/object.h#L260
+ */
+    0,                                           /* tp_print */
+#endif
+
+/* WARNING: Do not remove this, only modify it! It is a version guard to
+ * act as a reminder to update this struct on Python version update! */
+#if (PY_MAJOR_VERSION == 3)
+#if ! ((PY_MINOR_VERSION == 8) || (PY_MINOR_VERSION == 9) || (PY_MINOR_VERSION == 10) || (PY_MINOR_VERSION == 11))
+#error "Python minor version is not supported."
+#endif
+#else
+#error "Python major version is not supported."
+#endif
+/* END WARNING*/
+};
+
+/* CUDA device array C API */
+static void *_DeviceArray_API[1] = {
+    (void*)&DeviceArrayType
+};
+
+MOD_INIT(_devicearray) {
+    PyObject *m = nullptr;
+    PyObject *d = nullptr;
+    PyObject *c_api = nullptr;
+    int error = 0;
+
+    MOD_DEF(m, "_devicearray", "No docs", NULL)
+    if (m == NULL)
+        goto error_occurred;
+
+    c_api = PyCapsule_New((void *)_DeviceArray_API, "numba._devicearray._DEVICEARRAY_API", NULL);
+    if (c_api == NULL)
+        goto error_occurred;
+
+    DeviceArrayType.tp_new = PyType_GenericNew;
+    if (PyType_Ready(&DeviceArrayType) < 0)
+        goto error_occurred;
+
+    Py_INCREF(&DeviceArrayType);
+    error = PyModule_AddObject(m, "DeviceArray", (PyObject*)(&DeviceArrayType));
+    if (error)
+        goto error_occurred;
+
+    d = PyModule_GetDict(m);
+    if (d == NULL)
+        goto error_occurred;
+
+    error = PyDict_SetItemString(d, "_DEVICEARRAY_API", c_api);
+    /* Decref and set c_api to NULL, Py_XDECREF in error_occurred will have no
+     * effect. */
+    Py_CLEAR(c_api);
+
+    if (error)
+        goto error_occurred;
+
+    return MOD_SUCCESS_VAL(m);
+
+error_occurred:
+    Py_XDECREF(m);
+    Py_XDECREF(c_api);
+    Py_XDECREF((PyObject*)&DeviceArrayType);
+
+    return MOD_ERROR_VAL;
+}
--- a/numba/_devicearray.h
+++ b/numba/_devicearray.h
+#ifndef NUMBA_DEVICEARRAY_H_
+#define NUMBA_DEVICEARRAY_H_
+
+#ifdef __cplusplus
+    extern "C" {
+#endif
+
+/* These definitions should only be used by consumers of the Device Array API.
+ * Consumers access the API through the opaque pointer stored in
+ * _devicearray._DEVICEARRAY_API.  We don't want these definitions in
+ * _devicearray.cpp itself because they would conflict with the actual
+ * implementations there.
+ */
+#ifndef NUMBA_IN_DEVICEARRAY_CPP_
+
+    extern void **DeviceArray_API;
+    #define DeviceArrayType (*(PyTypeObject*)DeviceArray_API[0])
+
+#endif /* ndef NUMBA_IN_DEVICEARRAY_CPP */
+
+#ifdef __cplusplus
+    }
+#endif
+
+#endif  /* NUMBA_DEVICEARRAY_H_ */
--- a/numba/_dispatcher.cpp
+++ b/numba/_dispatcher.cpp
+#include "_pymodule.h"
+
+#include <cstring>
+#include <ctime>
+#include <cassert>
+#include <vector>
+
+#include "_typeof.h"
+#include "frameobject.h"
+#include "traceback.h"
+#include "core/typeconv/typeconv.hpp"
+#include "_devicearray.h"
+
+/*
+ * Notes on the C_TRACE macro:
+ *
+ * The original C_TRACE macro (from ceval.c) would call
+ * PyTrace_C_CALL et al., for which the frame argument wouldn't
+ * be usable. Since we explicitly synthesize a frame using the
+ * original Python code object, we call PyTrace_CALL instead so
+ * the profiler can report the correct source location.
+ *
+ * Likewise, while ceval.c would call PyTrace_C_EXCEPTION in case
+ * of error, the profiler would simply expect a RETURN in case of
+ * a Python function, so we generate that here (making sure the
+ * exception state is preserved correctly).
+ *
+ */
+
+#if (PY_MAJOR_VERSION >= 3) && (PY_MINOR_VERSION >= 11)
+#ifndef Py_BUILD_CORE
+    #define Py_BUILD_CORE 1
+#endif
+#include "internal/pycore_frame.h"
+#include "internal/pycore_pyerrors.h"
+
+/*
+ * Code originally from:
+ * https://github.com/python/cpython/blob/deaf509e8fc6e0363bd6f26d52ad42f976ec42f2/Python/ceval.c#L6804
+ */
+static int
+call_trace(Py_tracefunc func, PyObject *obj,
+           PyThreadState *tstate, PyFrameObject *frame,
+           int what, PyObject *arg)
+{
+    int result;
+    if (tstate->tracing) {
+        return 0;
+    }
+    if (frame == NULL) {
+        return -1;
+    }
+    int old_what = tstate->tracing_what;
+    tstate->tracing_what = what;
+    PyThreadState_EnterTracing(tstate);
+    result = func(obj, frame, what, NULL);
+    PyThreadState_LeaveTracing(tstate);
+    tstate->tracing_what = old_what;
+    return result;
+}
+
+/*
+ * Code originally from:
+ * https://github.com/python/cpython/blob/d5650a1738fe34f6e1db4af5f4c4edb7cae90a36/Python/ceval.c#L4220-L4240
+ */
+static int
+call_trace_protected(Py_tracefunc func, PyObject *obj,
+                     PyThreadState *tstate, PyFrameObject *frame,
+                     int what, PyObject *arg)
+{
+    PyObject *type, *value, *traceback;
+    int err;
+    _PyErr_Fetch(tstate, &type, &value, &traceback);
+    err = call_trace(func, obj, tstate, frame, what, arg);
+    if (err == 0)
+    {
+        _PyErr_Restore(tstate, type, value, traceback);
+        return 0;
+    }
+    else {
+        Py_XDECREF(type);
+        Py_XDECREF(value);
+        Py_XDECREF(traceback);
+        return -1;
+    }
+}
+
+/*
+ * Code originally from:
+ * https://github.com/python/cpython/blob/deaf509e8fc6e0363bd6f26d52ad42f976ec42f2/Python/ceval.c#L7245
+ * NOTE: The state test https://github.com/python/cpython/blob/d5650a1738fe34f6e1db4af5f4c4edb7cae90a36/Python/ceval.c#L4521
+ * has been removed, it's dealt with in call_cfunc.
+ */
+#define C_TRACE(x, call, frame) \
+if (call_trace(tstate->c_profilefunc, tstate->c_profileobj, \
+    tstate, frame, \
+    PyTrace_CALL, cfunc)) { \
+    x = NULL; \
+} \
+else { \
+    x = call; \
+    if (tstate->c_profilefunc != NULL) { \
+        if (x == NULL) { \
+            call_trace_protected(tstate->c_profilefunc, \
+                tstate->c_profileobj, \
+                tstate, frame, \
+                PyTrace_RETURN, cfunc); \
+            /* XXX should pass (type, value, tb) */ \
+        } else { \
+            if (call_trace(tstate->c_profilefunc, \
+                tstate->c_profileobj, \
+                tstate, frame, \
+                PyTrace_RETURN, cfunc)) { \
+                Py_DECREF(x); \
+                x = NULL; \
+            } \
+        } \
+    } \
+} \
+
+#elif (PY_MAJOR_VERSION >= 3) && (PY_MINOR_VERSION >= 10)
+
+/*
+ * Code originally from:
+ * https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Python/ceval.c#L36-L40
+ */
+typedef struct {
+    PyCodeObject *code; // The code object for the bounds. May be NULL.
+    PyCodeAddressRange bounds; // Only valid if code != NULL.
+    CFrame cframe;
+} PyTraceInfo;
+
+
+/*
+ * Code originally from:
+ * https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Objects/codeobject.c#L1257-L1266
+ * NOTE: The function is renamed.
+ */
+static void
+_nb_PyLineTable_InitAddressRange(const char *linetable, Py_ssize_t length, int firstlineno, PyCodeAddressRange *range)
+{
+    range->opaque.lo_next = linetable;
+    range->opaque.limit = range->opaque.lo_next + length;
+    range->ar_start = -1;
+    range->ar_end = 0;
+    range->opaque.computed_line = firstlineno;
+    range->ar_line = -1;
+}
+
+/*
+ * Code originally from:
+ * https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Objects/codeobject.c#L1269-L1275
+ * NOTE: The function is renamed.
+ */
+static int
+_nb_PyCode_InitAddressRange(PyCodeObject* co, PyCodeAddressRange *bounds)
+{
+    const char *linetable = PyBytes_AS_STRING(co->co_linetable);
+    Py_ssize_t length = PyBytes_GET_SIZE(co->co_linetable);
+    _nb_PyLineTable_InitAddressRange(linetable, length, co->co_firstlineno, bounds);
+    return bounds->ar_line;
+}
+
+/*
+ * Code originally from:
+ * https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Python/ceval.c#L5468-L5475
+ * NOTE: The call to _PyCode_InitAddressRange is renamed.
+ */
+static void
+initialize_trace_info(PyTraceInfo *trace_info, PyFrameObject *frame)
+{
+    if (trace_info->code != frame->f_code) {
+        trace_info->code = frame->f_code;
+        _nb_PyCode_InitAddressRange(frame->f_code, &trace_info->bounds);
+    }
+}
+
+/*
+ * Code originally from:
+ * https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Python/ceval.c#L5477-L5501
+ */
+static int
+call_trace(Py_tracefunc func, PyObject *obj,
+           PyThreadState *tstate, PyFrameObject *frame,
+           PyTraceInfo *trace_info,
+           int what, PyObject *arg)
+{
+    int result;
+    if (tstate->tracing)
+        return 0;
+    tstate->tracing++;
+    tstate->cframe->use_tracing = 0;
+    if (frame->f_lasti < 0) {
+        frame->f_lineno = frame->f_code->co_firstlineno;
+    }
+    else {
+        initialize_trace_info(trace_info, frame);
+        frame->f_lineno = _PyCode_CheckLineNumber(frame->f_lasti*sizeof(_Py_CODEUNIT), &trace_info->bounds);
+    }
+    result = func(obj, frame, what, arg);
+    frame->f_lineno = 0;
+    tstate->cframe->use_tracing = ((tstate->c_tracefunc != NULL)
+                           || (tstate->c_profilefunc != NULL));
+    tstate->tracing--;
+    return result;
+}
+
+/*
+ * Code originally from:
+ * https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Python/ceval.c#L5445-L5466
+ */
+static int
+call_trace_protected(Py_tracefunc func, PyObject *obj,
+                     PyThreadState *tstate, PyFrameObject *frame,
+                     PyTraceInfo *trace_info,
+                     int what, PyObject *arg)
+{
+    PyObject *type, *value, *traceback;
+    int err;
+    PyErr_Fetch(&type, &value, &traceback);
+    err = call_trace(func, obj, tstate, frame, trace_info, what, arg);
+    if (err == 0)
+    {
+        PyErr_Restore(type, value, traceback);
+        return 0;
+    }
+    else
+    {
+        Py_XDECREF(type);
+        Py_XDECREF(value);
+        Py_XDECREF(traceback);
+        return -1;
+    }
+}
+
+/*
+ * Code originally from:
+ * https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Python/ceval.c#L5810-L5839
+ * NOTE: The state test https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Python/ceval.c#L5811
+ * has been removed, it's dealt with in call_cfunc.
+ */
+#define C_TRACE(x, call)                                        \
+if (call_trace(tstate->c_profilefunc, tstate->c_profileobj,     \
+               tstate, tstate->frame, &trace_info, PyTrace_CALL,\
+               cfunc))	                                        \
+    x = NULL;                                                   \
+else                                                            \
+{                                                               \
+    x = call;                                                   \
+    if (tstate->c_profilefunc != NULL)                          \
+    {                                                           \
+        if (x == NULL)                                          \
+        {                                                       \
+            call_trace_protected(tstate->c_profilefunc,         \
+                                 tstate->c_profileobj,          \
+                                 tstate, tstate->frame,         \
+                                 &trace_info,                   \
+                                 PyTrace_RETURN, cfunc);	\
+            /* XXX should pass (type, value, tb) */             \
+        }                                                       \
+        else                                                    \
+        {                                                       \
+            if (call_trace(tstate->c_profilefunc,               \
+                           tstate->c_profileobj,                \
+                           tstate, tstate->frame,               \
+                           &trace_info,                         \
+                           PyTrace_RETURN, cfunc))		\
+            {                                                   \
+                Py_DECREF(x);                                   \
+                x = NULL;                                       \
+            }                                                   \
+        }                                                       \
+    }                                                           \
+}
+
+#else  // Python <3.10
+
+/*
+ * Code originally from:
+ * https://github.com/python/cpython/blob/d5650a1738fe34f6e1db4af5f4c4edb7cae90a36/Python/ceval.c#L4242-L4257
+ */
+static int
+call_trace(Py_tracefunc func, PyObject *obj,
+           PyThreadState *tstate, PyFrameObject *frame,
+           int what, PyObject *arg)
+{
+    int result;
+    if (tstate->tracing)
+        return 0;
+    tstate->tracing++;
+    tstate->use_tracing = 0;
+    result = func(obj, frame, what, arg);
+    tstate->use_tracing = ((tstate->c_tracefunc != NULL)
+                           || (tstate->c_profilefunc != NULL));
+    tstate->tracing--;
+    return result;
+}
+
+/*
+ * Code originally from:
+ * https://github.com/python/cpython/blob/d5650a1738fe34f6e1db4af5f4c4edb7cae90a36/Python/ceval.c#L4220-L4240
+ */
+static int
+call_trace_protected(Py_tracefunc func, PyObject *obj,
+                     PyThreadState *tstate, PyFrameObject *frame,
+                     int what, PyObject *arg)
+{
+    PyObject *type, *value, *traceback;
+    int err;
+    PyErr_Fetch(&type, &value, &traceback);
+    err = call_trace(func, obj, tstate, frame, what, arg);
+    if (err == 0)
+    {
+        PyErr_Restore(type, value, traceback);
+        return 0;
+    }
+    else
+    {
+        Py_XDECREF(type);
+        Py_XDECREF(value);
+        Py_XDECREF(traceback);
+        return -1;
+    }
+}
+
+/*
+ * Code originally from:
+ * https://github.com/python/cpython/blob/d5650a1738fe34f6e1db4af5f4c4edb7cae90a36/Python/ceval.c#L4520-L4549
+ * NOTE: The state test https://github.com/python/cpython/blob/d5650a1738fe34f6e1db4af5f4c4edb7cae90a36/Python/ceval.c#L4521
+ * has been removed, it's dealt with in call_cfunc.
+ */
+#define C_TRACE(x, call)                                        \
+if (call_trace(tstate->c_profilefunc, tstate->c_profileobj,     \
+               tstate, tstate->frame, PyTrace_CALL, cfunc))     \
+    x = NULL;                                                   \
+else                                                            \
+{                                                               \
+    x = call;                                                   \
+    if (tstate->c_profilefunc != NULL)                          \
+    {                                                           \
+        if (x == NULL)                                          \
+        {                                                       \
+            call_trace_protected(tstate->c_profilefunc,         \
+                                 tstate->c_profileobj,          \
+                                 tstate, tstate->frame,         \
+                                 PyTrace_RETURN, cfunc);        \
+            /* XXX should pass (type, value, tb) */             \
+        }                                                       \
+        else                                                    \
+        {                                                       \
+            if (call_trace(tstate->c_profilefunc,               \
+                           tstate->c_profileobj,                \
+                           tstate, tstate->frame,               \
+                           PyTrace_RETURN, cfunc))              \
+            {                                                   \
+                Py_DECREF(x);                                   \
+                x = NULL;                                       \
+            }                                                   \
+        }                                                       \
+    }                                                           \
+}
+
+
+#endif
+
+typedef std::vector<Type> TypeTable;
+typedef std::vector<PyObject*> Functions;
+
+/* The Dispatcher class is the base class of all dispatchers in the CPU and
+   CUDA targets. Its main responsibilities are:
+
+   - Resolving the best overload to call for a given set of arguments, and
+   - Calling the resolved overload.
+
+   This logic is implemented within this class for efficiency (lookup of the
+   appropriate overload needs to be fast) and ease of implementation (calling
+   directly into a compiled function using a function pointer is easier within
+   the C++ code where the overload has been resolved). */
+class Dispatcher {
+public:
+    PyObject_HEAD
+    /* Whether compilation of new overloads is permitted */
+    char can_compile;
+    /* Whether fallback to object mode is permitted */
+    char can_fallback;
+    /* Whether types must match exactly when resolving overloads.
+       If not, conversions (e.g. float32 -> float64) are permitted when
+       searching for a match. */
+    char exact_match_required;
+    /* Borrowed reference */
+    PyObject *fallbackdef;
+    /* Whether to fold named arguments and default values
+      (false for lifted loops) */
+    int fold_args;
+    /* Whether the last positional argument is a stararg */
+    int has_stararg;
+    /* Tuple of argument names */
+    PyObject *argnames;
+    /* Tuple of default values */
+    PyObject *defargs;
+    /* Number of arguments to function */
+    int argct;
+    /* Used for selecting overloaded function implementations */
+    TypeManager *tm;
+    /* An array of overloads */
+    Functions functions;
+    /* A flattened array of argument types to all overloads
+     * (invariant: sizeof(overloads) == argct * sizeof(functions)) */
+    TypeTable overloads;
+
+    /* Add a new overload. Parameters:
+
+       - args: An array of Type objects, one for each parameter
+       - callable: The callable implementing this overload. */
+    void addDefinition(Type args[], PyObject *callable) {
+        overloads.reserve(argct + overloads.size());
+        for (int i=0; i<argct; ++i) {
+            overloads.push_back(args[i]);
+        }
+        functions.push_back(callable);
+    }
+
+    /* Given a list of types, find the overloads that have a matching signature.
+       Returns the best match, as well as the number of matches found.
+
+       Parameters:
+
+       - sig: an array of Type objects, one for each parameter.
+       - matches: the number of matches found (mutated by this function).
+       - allow_unsafe: whether to match overloads that would require an unsafe
+                       cast.
+       - exact_match_required: Whether all arguments types must match the
+                               overload's types exactly. When false,
+                               overloads that would require a type conversion
+                               can also be matched. */
+    PyObject* resolve(Type sig[], int &matches, bool allow_unsafe,
+                      bool exact_match_required) const {
+        const int ovct = functions.size();
+        int selected;
+        matches = 0;
+        if (0 == ovct) {
+            // No overloads registered
+            return NULL;
+        }
+        if (argct == 0) {
+            // Nullary function: trivial match on first overload
+            matches = 1;
+            selected = 0;
+        }
+        else {
+            matches = tm->selectOverload(sig, &overloads[0], selected, argct,
+                                         ovct, allow_unsafe,
+                                         exact_match_required);
+        }
+        if (matches == 1) {
+            return functions[selected];
+        }
+        return NULL;
+    }
+
+    /* Remove all overloads */
+    void clear() {
+        functions.clear();
+        overloads.clear();
+    }
+
+};
+
+
+static int
+Dispatcher_traverse(Dispatcher *self, visitproc visit, void *arg)
+{
+    Py_VISIT(self->defargs);
+    return 0;
+}
+
+static void
+Dispatcher_dealloc(Dispatcher *self)
+{
+    Py_XDECREF(self->argnames);
+    Py_XDECREF(self->defargs);
+    self->clear();
+    Py_TYPE(self)->tp_free((PyObject*)self);
+}
+
+
+static int
+Dispatcher_init(Dispatcher *self, PyObject *args, PyObject *kwds)
+{
+    PyObject *tmaddrobj;
+    void *tmaddr;
+    int argct;
+    int can_fallback;
+    int has_stararg = 0;
+    int exact_match_required = 0;
+
+    if (!PyArg_ParseTuple(args, "OiiO!O!i|ii", &tmaddrobj, &argct,
+                          &self->fold_args,
+                          &PyTuple_Type, &self->argnames,
+                          &PyTuple_Type, &self->defargs,
+                          &can_fallback,
+                          &has_stararg,
+                          &exact_match_required
+                         )) {
+        return -1;
+    }
+    Py_INCREF(self->argnames);
+    Py_INCREF(self->defargs);
+    tmaddr = PyLong_AsVoidPtr(tmaddrobj);
+    self->tm = static_cast<TypeManager*>(tmaddr);
+    self->argct = argct;
+    self->can_compile = 1;
+    self->can_fallback = can_fallback;
+    self->fallbackdef = NULL;
+    self->has_stararg = has_stararg;
+    self->exact_match_required = exact_match_required;
+    return 0;
+}
+
+static PyObject *
+Dispatcher_clear(Dispatcher *self, PyObject *args)
+{
+    self->clear();
+    Py_RETURN_NONE;
+}
+
+static
+PyObject*
+Dispatcher_Insert(Dispatcher *self, PyObject *args, PyObject *kwds)
+{
+    /* The cuda kwarg is a temporary addition until CUDA overloads are compiled
+     * functions. Once they are compiled functions, kwargs can be removed from
+     * this function. */
+    static char *keywords[] = {
+        (char*)"sig",
+        (char*)"func",
+        (char*)"objectmode",
+        (char*)"cuda",
+        NULL
+    };
+
+    PyObject *sigtup, *cfunc;
+    int i, sigsz;
+    int *sig;
+    int objectmode = 0;
+    int cuda = 0;
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|ip", keywords, &sigtup,
+                                     &cfunc, &objectmode, &cuda)) {
+        return NULL;
+    }
+
+    if (!cuda && !PyObject_TypeCheck(cfunc, &PyCFunction_Type) ) {
+        PyErr_SetString(PyExc_TypeError, "must be builtin_function_or_method");
+        return NULL;
+    }
+
+    sigsz = PySequence_Fast_GET_SIZE(sigtup);
+    sig = new int[sigsz];
+
+    for (i = 0; i < sigsz; ++i) {
+        sig[i] = PyLong_AsLong(PySequence_Fast_GET_ITEM(sigtup, i));
+    }
+
+    /* The reference to cfunc is borrowed; this only works because the
+       derived Python class also stores an (owned) reference to cfunc. */
+    self->addDefinition(sig, cfunc);
+
+    /* Add pure python fallback */
+    if (!self->fallbackdef && objectmode){
+        self->fallbackdef = cfunc;
+    }
+
+    delete[] sig;
+
+    Py_RETURN_NONE;
+}
+
+static
+void explain_issue(PyObject *dispatcher, PyObject *args, PyObject *kws,
+                   const char *method_name, const char *default_msg)
+{
+    PyObject *callback, *result;
+    callback = PyObject_GetAttrString(dispatcher, method_name);
+    if (!callback) {
+        PyErr_SetString(PyExc_TypeError, default_msg);
+        return;
+    }
+    result = PyObject_Call(callback, args, kws);
+    Py_DECREF(callback);
+    if (result != NULL) {
+        PyErr_Format(PyExc_RuntimeError, "%s must raise an exception",
+                     method_name);
+        Py_DECREF(result);
+    }
+}
+
+static
+void explain_ambiguous(PyObject *dispatcher, PyObject *args, PyObject *kws)
+{
+    explain_issue(dispatcher, args, kws, "_explain_ambiguous",
+                  "Ambiguous overloading");
+}
+
+static
+void explain_matching_error(PyObject *dispatcher, PyObject *args, PyObject *kws)
+{
+    explain_issue(dispatcher, args, kws, "_explain_matching_error",
+                  "No matching definition");
+}
+
+static
+int search_new_conversions(PyObject *dispatcher, PyObject *args, PyObject *kws)
+{
+    PyObject *callback, *result;
+    int res;
+
+    callback = PyObject_GetAttrString(dispatcher,
+                                      "_search_new_conversions");
+    if (!callback) {
+        return -1;
+    }
+    result = PyObject_Call(callback, args, kws);
+    Py_DECREF(callback);
+    if (result == NULL) {
+        return -1;
+    }
+    if (!PyBool_Check(result)) {
+        Py_DECREF(result);
+        PyErr_SetString(PyExc_TypeError,
+                        "_search_new_conversions() should return a boolean");
+        return -1;
+    }
+    res = (result == Py_True) ? 1 : 0;
+    Py_DECREF(result);
+    return res;
+}
+
+
+/* A custom, fast, inlinable version of PyCFunction_Call() */
+static PyObject *
+call_cfunc(Dispatcher *self, PyObject *cfunc, PyObject *args, PyObject *kws, PyObject *locals)
+{
+    PyCFunctionWithKeywords fn;
+    PyThreadState *tstate;
+
+    assert(PyCFunction_Check(cfunc));
+    assert(PyCFunction_GET_FLAGS(cfunc) == (METH_VARARGS | METH_KEYWORDS));
+    fn = (PyCFunctionWithKeywords) PyCFunction_GET_FUNCTION(cfunc);
+    tstate = PyThreadState_GET();
+
+#if (PY_MAJOR_VERSION >= 3) && (PY_MINOR_VERSION >= 11)
+    /*
+     * On Python 3.11, _PyEval_EvalFrameDefault stops using PyTraceInfo since 
+     * it's now baked into ThreadState.
+     * https://github.com/python/cpython/pull/26623
+     */
+    if (tstate->cframe->use_tracing && tstate->c_profilefunc)
+#elif (PY_MAJOR_VERSION >= 3) && (PY_MINOR_VERSION == 10)
+    /*
+     * On Python 3.10+ trace_info comes from somewhere up in PyFrameEval et al,
+     * Numba doesn't have access to that so creates an equivalent struct and
+     * wires it up against the cframes. This is passed into the tracing
+     * functions.
+     *
+     * Code originally from:
+     * https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Python/ceval.c#L1611-L1622
+     */
+    PyTraceInfo trace_info;
+    trace_info.code = NULL; // not initialized
+    CFrame *prev_cframe = tstate->cframe;
+    trace_info.cframe.use_tracing = prev_cframe->use_tracing;
+    trace_info.cframe.previous = prev_cframe;
+
+    if (trace_info.cframe.use_tracing && tstate->c_profilefunc)
+#else
+    /*
+     * On Python prior to 3.10, tracing state is a member of the threadstate
+     */
+    if (tstate->use_tracing && tstate->c_profilefunc)
+#endif
+    {
+        /*
+         * The following code requires some explaining:
+         *
+         * We want the jit-compiled function to be visible to the profiler, so we
+         * need to synthesize a frame for it.
+         * The PyFrame_New() constructor doesn't do anything with the 'locals' value if the 'code's
+         * 'CO_NEWLOCALS' flag is set (which is always the case nowadays).
+         * So, to get local variables into the frame, we have to manually set the 'f_locals'
+         * member, then call `PyFrame_LocalsToFast`, where a subsequent call to the `frame.f_locals`
+         * property (by virtue of the `frame_getlocals` function in frameobject.c) will find them.
+         */
+        PyCodeObject *code = (PyCodeObject*)PyObject_GetAttrString((PyObject*)self, "__code__");
+        PyObject *globals = PyDict_New();
+        PyObject *builtins = PyEval_GetBuiltins();
+        PyFrameObject *frame = NULL;
+        PyObject *result = NULL;
+
+        if (!code) {
+            PyErr_Format(PyExc_RuntimeError, "No __code__ attribute found.");
+            goto error;
+        }
+        /* Populate builtins, which is required by some JITted functions */
+        if (PyDict_SetItemString(globals, "__builtins__", builtins)) {
+            goto error;
+        }
+
+        /* unset the CO_OPTIMIZED flag, make the frame get a new locals dict */
+        code->co_flags &= 0xFFFE;
+
+        frame = PyFrame_New(tstate, code, globals, locals);
+        if (frame == NULL) {
+            goto error;
+        }
+        /* Populate the 'fast locals' in `frame` */
+        PyFrame_LocalsToFast(frame, 0);
+#if (PY_MAJOR_VERSION >= 3) && (PY_MINOR_VERSION >= 11)
+        C_TRACE(result, fn(PyCFunction_GET_SELF(cfunc), args, kws), frame);
+#else
+        tstate->frame = frame;
+        C_TRACE(result, fn(PyCFunction_GET_SELF(cfunc), args, kws));
+        /* write changes back to locals? */
+        PyFrame_FastToLocals(frame);
+        tstate->frame = frame->f_back;
+#endif
+    error:
+        Py_XDECREF(frame);
+        Py_XDECREF(globals);
+        Py_XDECREF(code);
+        return result;
+    }
+    else
+    {
+        return fn(PyCFunction_GET_SELF(cfunc), args, kws);
+    }
+}
+
+static
+PyObject*
+compile_and_invoke(Dispatcher *self, PyObject *args, PyObject *kws, PyObject *locals)
+{
+    /* Compile a new one */
+    PyObject *cfa, *cfunc, *retval;
+    cfa = PyObject_GetAttrString((PyObject*)self, "_compile_for_args");
+    if (cfa == NULL)
+        return NULL;
+
+    /* NOTE: we call the compiled function ourselves instead of
+       letting the Python derived class do it.  This is for proper
+       behaviour of globals() in jitted functions (issue #476). */
+    cfunc = PyObject_Call(cfa, args, kws);
+    Py_DECREF(cfa);
+
+    if (cfunc == NULL)
+        return NULL;
+
+    if (PyObject_TypeCheck(cfunc, &PyCFunction_Type)) {
+        retval = call_cfunc(self, cfunc, args, kws, locals);
+    } else {
+        /* Re-enter interpreter */
+        retval = PyObject_Call(cfunc, args, kws);
+    }
+    Py_DECREF(cfunc);
+
+    return retval;
+}
+
+/* A copy of compile_and_invoke, that only compiles. This is needed for CUDA
+ * kernels, because its overloads are Python instances of the _Kernel class,
+ * rather than compiled functions. Once CUDA overloads are compiled functions,
+ * cuda_compile_only can be removed. */
+static
+PyObject*
+cuda_compile_only(Dispatcher *self, PyObject *args, PyObject *kws, PyObject *locals)
+{
+    /* Compile a new one */
+    PyObject *cfa, *cfunc;
+    cfa = PyObject_GetAttrString((PyObject*)self, "_compile_for_args");
+    if (cfa == NULL)
+        return NULL;
+
+    cfunc = PyObject_Call(cfa, args, kws);
+    Py_DECREF(cfa);
+
+    return cfunc;
+}
+
+static int
+find_named_args(Dispatcher *self, PyObject **pargs, PyObject **pkws)
+{
+    PyObject *oldargs = *pargs, *newargs;
+    PyObject *kws = *pkws;
+    Py_ssize_t pos_args = PyTuple_GET_SIZE(oldargs);
+    Py_ssize_t named_args, total_args, i;
+    Py_ssize_t func_args = PyTuple_GET_SIZE(self->argnames);
+    Py_ssize_t defaults = PyTuple_GET_SIZE(self->defargs);
+    /* Last parameter with a default value */
+    Py_ssize_t last_def = (self->has_stararg)
+                          ? func_args - 2
+                          : func_args - 1;
+    /* First parameter with a default value */
+    Py_ssize_t first_def = last_def - defaults + 1;
+    /* Minimum number of required arguments */
+    Py_ssize_t minargs = first_def;
+
+    if (kws != NULL)
+        named_args = PyDict_Size(kws);
+    else
+        named_args = 0;
+    total_args = pos_args + named_args;
+    if (!self->has_stararg && total_args > func_args) {
+        PyErr_Format(PyExc_TypeError,
+                     "too many arguments: expected %d, got %d",
+                     (int) func_args, (int) total_args);
+        return -1;
+    }
+    else if (total_args < minargs) {
+        if (minargs == func_args)
+            PyErr_Format(PyExc_TypeError,
+                         "not enough arguments: expected %d, got %d",
+                         (int) minargs, (int) total_args);
+        else
+            PyErr_Format(PyExc_TypeError,
+                         "not enough arguments: expected at least %d, got %d",
+                         (int) minargs, (int) total_args);
+        return -1;
+    }
+    newargs = PyTuple_New(func_args);
+    if (!newargs)
+        return -1;
+    /* First pack the stararg */
+    if (self->has_stararg) {
+        Py_ssize_t stararg_size = Py_MAX(0, pos_args - func_args + 1);
+        PyObject *stararg = PyTuple_New(stararg_size);
+        if (!stararg) {
+            Py_DECREF(newargs);
+            return -1;
+        }
+        for (i = 0; i < stararg_size; i++) {
+            PyObject *value = PyTuple_GET_ITEM(oldargs, func_args - 1 + i);
+            Py_INCREF(value);
+            PyTuple_SET_ITEM(stararg, i, value);
+        }
+        /* Put it in last position */
+        PyTuple_SET_ITEM(newargs, func_args - 1, stararg);
+
+    }
+    for (i = 0; i < pos_args; i++) {
+        PyObject *value = PyTuple_GET_ITEM(oldargs, i);
+        if (self->has_stararg && i >= func_args - 1) {
+            /* Skip stararg */
+            break;
+        }
+        Py_INCREF(value);
+        PyTuple_SET_ITEM(newargs, i, value);
+    }
+
+    /* Iterate over missing positional arguments, try to find them in
+       named arguments or default values. */
+    for (i = pos_args; i < func_args; i++) {
+        PyObject *name = PyTuple_GET_ITEM(self->argnames, i);
+        if (self->has_stararg && i >= func_args - 1) {
+            /* Skip stararg */
+            break;
+        }
+        if (kws != NULL) {
+            /* Named argument? */
+            PyObject *value = PyDict_GetItem(kws, name);
+            if (value != NULL) {
+                Py_INCREF(value);
+                PyTuple_SET_ITEM(newargs, i, value);
+                named_args--;
+                continue;
+            }
+        }
+        if (i >= first_def && i <= last_def) {
+            /* Argument has a default value? */
+            PyObject *value = PyTuple_GET_ITEM(self->defargs, i - first_def);
+            Py_INCREF(value);
+            PyTuple_SET_ITEM(newargs, i, value);
+            continue;
+        }
+        else if (i < func_args - 1 || !self->has_stararg) {
+            PyErr_Format(PyExc_TypeError,
+                         "missing argument '%s'",
+                         PyString_AsString(name));
+            Py_DECREF(newargs);
+            return -1;
+        }
+    }
+    if (named_args) {
+        PyErr_Format(PyExc_TypeError,
+                     "some keyword arguments unexpected");
+        Py_DECREF(newargs);
+        return -1;
+    }
+    *pargs = newargs;
+    *pkws = NULL;
+    return 0;
+}
+
+
+/*
+ * Management of thread-local
+ */
+
+#ifdef _MSC_VER
+#define THREAD_LOCAL(ty) __declspec(thread) ty
+#else
+/* Non-standard C99 extension that's understood by gcc and clang */
+#define THREAD_LOCAL(ty) __thread ty
+#endif
+
+static THREAD_LOCAL(bool) use_tls_target_stack;
+
+
+struct raii_use_tls_target_stack {
+    bool old_setting;
+
+    raii_use_tls_target_stack(bool new_setting)
+        : old_setting(use_tls_target_stack)
+    {
+        use_tls_target_stack = new_setting;
+    }
+
+    ~raii_use_tls_target_stack() {
+        use_tls_target_stack = old_setting;
+    }
+};
+
+static PyObject*
+Dispatcher_call(Dispatcher *self, PyObject *args, PyObject *kws)
+{
+    PyObject *tmptype, *retval = NULL;
+    int *tys = NULL;
+    int argct;
+    int i;
+    int prealloc[24];
+    int matches;
+    PyObject *cfunc;
+    PyThreadState *ts = PyThreadState_Get();
+    PyObject *locals = NULL;
+
+    // Check TLS target stack
+    if (use_tls_target_stack) {
+        raii_use_tls_target_stack turn_off(false);
+        PyObject * meth_call_tls_target;
+        meth_call_tls_target = PyObject_GetAttrString((PyObject*)self,
+                                                      "_call_tls_target");
+        if (!meth_call_tls_target) return NULL;
+        // Transfer control to self._call_tls_target
+        retval = PyObject_Call(meth_call_tls_target, args, kws);
+        Py_DECREF(meth_call_tls_target);
+        return retval;
+    }
+
+    /* If compilation is enabled, ensure that an exact match is found and if
+     * not compile one */
+    int exact_match_required = self->can_compile ? 1 : self->exact_match_required;
+
+#if (PY_MAJOR_VERSION >= 3) && (PY_MINOR_VERSION >= 10)
+    if (ts->tracing && ts->c_profilefunc) {
+#else
+    if (ts->use_tracing && ts->c_profilefunc) {
+#endif
+        locals = PyEval_GetLocals();
+        if (locals == NULL) {
+            goto CLEANUP;
+        }
+    }
+    if (self->fold_args) {
+        if (find_named_args(self, &args, &kws))
+            return NULL;
+    }
+    else
+        Py_INCREF(args);
+    /* Now we own a reference to args */
+
+    argct = PySequence_Fast_GET_SIZE(args);
+
+    if (argct < (Py_ssize_t) (sizeof(prealloc) / sizeof(int)))
+        tys = prealloc;
+    else
+        tys = new int[argct];
+
+    for (i = 0; i < argct; ++i) {
+        tmptype = PySequence_Fast_GET_ITEM(args, i);
+        tys[i] = typeof_typecode((PyObject *) self, tmptype);
+        if (tys[i] == -1) {
+            if (self->can_fallback){
+                /* We will clear the exception if fallback is allowed. */
+                PyErr_Clear();
+            } else {
+                goto CLEANUP;
+            }
+        }
+    }
+
+    /* We only allow unsafe conversions if compilation of new specializations
+       has been disabled.
+
+       Note that the number of matches is returned in matches by resolve, which
+       accepts it as a reference. */
+    cfunc = self->resolve(tys, matches, !self->can_compile,
+                          exact_match_required);
+
+    if (matches == 0 && !self->can_compile) {
+        /*
+         * If we can't compile a new specialization, look for
+         * matching signatures for which conversions haven't been
+         * registered on the C++ TypeManager.
+         */
+        int res = search_new_conversions((PyObject *) self, args, kws);
+        if (res < 0) {
+            retval = NULL;
+            goto CLEANUP;
+        }
+        if (res > 0) {
+            /* Retry with the newly registered conversions */
+            cfunc = self->resolve(tys, matches, !self->can_compile,
+                                  exact_match_required);
+        }
+    }
+    if (matches == 1) {
+        /* Definition is found */
+        retval = call_cfunc(self, cfunc, args, kws, locals);
+    } else if (matches == 0) {
+        /* No matching definition */
+        if (self->can_compile) {
+            retval = compile_and_invoke(self, args, kws, locals);
+        } else if (self->fallbackdef) {
+            /* Have object fallback */
+            retval = call_cfunc(self, self->fallbackdef, args, kws, locals);
+        } else {
+            /* Raise TypeError */
+            explain_matching_error((PyObject *) self, args, kws);
+            retval = NULL;
+        }
+    } else if (self->can_compile) {
+        /* Ambiguous, but are allowed to compile */
+        retval = compile_and_invoke(self, args, kws, locals);
+    } else {
+        /* Ambiguous */
+        explain_ambiguous((PyObject *) self, args, kws);
+        retval = NULL;
+    }
+
+CLEANUP:
+    if (tys != prealloc)
+        delete[] tys;
+    Py_DECREF(args);
+
+    return retval;
+}
+
+/* Based on Dispatcher_call above, with the following differences:
+   1. It does not invoke the definition of the function.
+   2. It returns the definition, instead of a value returned by the function.
+
+   This is because CUDA functions are, at present, _Kernel objects rather than
+   compiled functions. */
+static PyObject*
+Dispatcher_cuda_call(Dispatcher *self, PyObject *args, PyObject *kws)
+{
+    PyObject *tmptype, *retval = NULL;
+    int *tys = NULL;
+    int argct;
+    int i;
+    int prealloc[24];
+    int matches;
+    PyObject *cfunc;
+    PyThreadState *ts = PyThreadState_Get();
+    PyObject *locals = NULL;
+
+    /* If compilation is enabled, ensure that an exact match is found and if
+     * not compile one */
+    int exact_match_required = self->can_compile ? 1 : self->exact_match_required;
+
+#if (PY_MAJOR_VERSION >= 3) && (PY_MINOR_VERSION >= 10)
+    if (ts->tracing && ts->c_profilefunc) {
+#else
+    if (ts->use_tracing && ts->c_profilefunc) {
+#endif
+        locals = PyEval_GetLocals();
+        if (locals == NULL) {
+            goto CLEANUP;
+        }
+    }
+    if (self->fold_args) {
+        if (find_named_args(self, &args, &kws))
+            return NULL;
+    }
+    else
+        Py_INCREF(args);
+    /* Now we own a reference to args */
+
+    argct = PySequence_Fast_GET_SIZE(args);
+
+    if (argct < (Py_ssize_t) (sizeof(prealloc) / sizeof(int)))
+        tys = prealloc;
+    else
+        tys = new int[argct];
+
+    for (i = 0; i < argct; ++i) {
+        tmptype = PySequence_Fast_GET_ITEM(args, i);
+        tys[i] = typeof_typecode((PyObject *) self, tmptype);
+        if (tys[i] == -1) {
+            if (self->can_fallback){
+                /* We will clear the exception if fallback is allowed. */
+                PyErr_Clear();
+            } else {
+                goto CLEANUP;
+            }
+        }
+    }
+
+    /* We only allow unsafe conversions if compilation of new specializations
+       has been disabled. */
+    cfunc = self->resolve(tys, matches, !self->can_compile,
+                          exact_match_required);
+
+    if (matches == 0 && !self->can_compile) {
+        /*
+         * If we can't compile a new specialization, look for
+         * matching signatures for which conversions haven't been
+         * registered on the C++ TypeManager.
+         */
+        int res = search_new_conversions((PyObject *) self, args, kws);
+        if (res < 0) {
+            retval = NULL;
+            goto CLEANUP;
+        }
+        if (res > 0) {
+            /* Retry with the newly registered conversions */
+            cfunc = self->resolve(tys, matches, !self->can_compile,
+                                  exact_match_required);
+        }
+    }
+
+    if (matches == 1) {
+        /* Definition is found */
+        retval = cfunc;
+        Py_INCREF(retval);
+    } else if (matches == 0) {
+        /* No matching definition */
+        if (self->can_compile) {
+            retval = cuda_compile_only(self, args, kws, locals);
+        } else if (self->fallbackdef) {
+            /* Have object fallback */
+            retval = call_cfunc(self, self->fallbackdef, args, kws, locals);
+        } else {
+            /* Raise TypeError */
+            explain_matching_error((PyObject *) self, args, kws);
+            retval = NULL;
+        }
+    } else if (self->can_compile) {
+        /* Ambiguous, but are allowed to compile */
+        retval = cuda_compile_only(self, args, kws, locals);
+    } else {
+        /* Ambiguous */
+        explain_ambiguous((PyObject *) self, args, kws);
+        retval = NULL;
+    }
+
+CLEANUP:
+    if (tys != prealloc)
+        delete[] tys;
+    Py_DECREF(args);
+
+    return retval;
+}
+
+static int
+import_devicearray(void)
+{
+    PyObject *devicearray = PyImport_ImportModule("numba._devicearray");
+    if (devicearray == NULL) {
+        return -1;
+    }
+    Py_DECREF(devicearray);
+
+    DeviceArray_API = (void**)PyCapsule_Import("numba._devicearray._DEVICEARRAY_API", 0);
+    if (DeviceArray_API == NULL) {
+        return -1;
+    }
+
+    return 0;
+}
+
+static PyMethodDef Dispatcher_methods[] = {
+    { "_clear", (PyCFunction)Dispatcher_clear, METH_NOARGS, NULL },
+    { "_insert", (PyCFunction)Dispatcher_Insert, METH_VARARGS | METH_KEYWORDS,
+      "insert new definition"},
+    { "_cuda_call", (PyCFunction)Dispatcher_cuda_call,
+      METH_VARARGS | METH_KEYWORDS, "CUDA call resolution" },
+    { NULL },
+};
+
+static PyMemberDef Dispatcher_members[] = {
+    {(char*)"_can_compile", T_BOOL, offsetof(Dispatcher, can_compile), 0, NULL },
+    {NULL}  /* Sentinel */
+};
+
+
+static PyTypeObject DispatcherType = {
+    PyVarObject_HEAD_INIT(NULL, 0)
+    "_dispatcher.Dispatcher",                    /* tp_name */
+    sizeof(Dispatcher),                          /* tp_basicsize */
+    0,                                           /* tp_itemsize */
+    (destructor)Dispatcher_dealloc,              /* tp_dealloc */
+    0,                                           /* tp_vectorcall_offset */
+    0,                                           /* tp_getattr */
+    0,                                           /* tp_setattr */
+    0,                                           /* tp_as_async */
+    0,                                           /* tp_repr */
+    0,                                           /* tp_as_number */
+    0,                                           /* tp_as_sequence */
+    0,                                           /* tp_as_mapping */
+    0,                                           /* tp_hash */
+    (PyCFunctionWithKeywords)Dispatcher_call,    /* tp_call*/
+    0,                                           /* tp_str*/
+    0,                                           /* tp_getattro*/
+    0,                                           /* tp_setattro*/
+    0,                                           /* tp_as_buffer*/
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /* tp_flags*/
+    "Dispatcher object",                         /* tp_doc */
+    (traverseproc) Dispatcher_traverse,          /* tp_traverse */
+    0,                                           /* tp_clear */
+    0,                                           /* tp_richcompare */
+    0,                                           /* tp_weaklistoffset */
+    0,                                           /* tp_iter */
+    0,                                           /* tp_iternext */
+    Dispatcher_methods,                          /* tp_methods */
+    Dispatcher_members,                          /* tp_members */
+    0,                                           /* tp_getset */
+    0,                                           /* tp_base */
+    0,                                           /* tp_dict */
+    0,                                           /* tp_descr_get */
+    0,                                           /* tp_descr_set */
+    0,                                           /* tp_dictoffset */
+    (initproc)Dispatcher_init,                   /* tp_init */
+    0,                                           /* tp_alloc */
+    0,                                           /* tp_new */
+    0,                                           /* tp_free */
+    0,                                           /* tp_is_gc */
+    0,                                           /* tp_bases */
+    0,                                           /* tp_mro */
+    0,                                           /* tp_cache */
+    0,                                           /* tp_subclasses */
+    0,                                           /* tp_weaklist */
+    0,                                           /* tp_del */
+    0,                                           /* tp_version_tag */
+    0,                                           /* tp_finalize */
+/* The docs suggest Python 3.8 has no tp_vectorcall
+ * https://github.com/python/cpython/blob/d917cfe4051d45b2b755c726c096ecfcc4869ceb/Doc/c-api/typeobj.rst?plain=1#L146
+ * but the header has it:
+ * https://github.com/python/cpython/blob/d917cfe4051d45b2b755c726c096ecfcc4869ceb/Include/cpython/object.h#L257
+ */
+    0,                                           /* tp_vectorcall */
+#if (PY_MAJOR_VERSION == 3) && (PY_MINOR_VERSION == 8)
+/* This is Python 3.8 only.
+ * See: https://github.com/python/cpython/blob/3.8/Include/cpython/object.h
+ * there's a tp_print preserved for backwards compatibility. xref:
+ * https://github.com/python/cpython/blob/d917cfe4051d45b2b755c726c096ecfcc4869ceb/Include/cpython/object.h#L260
+ */
+    0,                                           /* tp_print */
+#endif
+
+/* WARNING: Do not remove this, only modify it! It is a version guard to
+ * act as a reminder to update this struct on Python version update! */
+#if (PY_MAJOR_VERSION == 3)
+#if ! ((PY_MINOR_VERSION == 8) || (PY_MINOR_VERSION == 9) || (PY_MINOR_VERSION == 10) || (PY_MINOR_VERSION == 11))
+#error "Python minor version is not supported."
+#endif
+#else
+#error "Python major version is not supported."
+#endif
+/* END WARNING*/
+};
+
+
+static PyObject *compute_fingerprint(PyObject *self, PyObject *args)
+{
+    PyObject *val;
+    if (!PyArg_ParseTuple(args, "O:compute_fingerprint", &val))
+        return NULL;
+    return typeof_compute_fingerprint(val);
+}
+
+static PyObject *set_use_tls_target_stack(PyObject *self, PyObject *args)
+{
+    int val;
+    if (!PyArg_ParseTuple(args, "p", &val))
+        return NULL;
+    bool old = use_tls_target_stack;
+    use_tls_target_stack = val;
+    // return the old value
+    if (old) {
+        Py_RETURN_TRUE;
+    } else {
+        Py_RETURN_FALSE;
+    }
+}
+
+static PyMethodDef ext_methods[] = {
+#define declmethod(func) { #func , ( PyCFunction )func , METH_VARARGS , NULL }
+    declmethod(typeof_init),
+    declmethod(compute_fingerprint),
+    declmethod(set_use_tls_target_stack),
+    { NULL },
+#undef declmethod
+};
+
+
+MOD_INIT(_dispatcher) {
+    if (import_devicearray() < 0) {
+      PyErr_Print();
+      PyErr_SetString(PyExc_ImportError, "numba._devicearray failed to import");
+      return MOD_ERROR_VAL;
+    }
+
+    PyObject *m;
+    MOD_DEF(m, "_dispatcher", "No docs", ext_methods)
+    if (m == NULL)
+        return MOD_ERROR_VAL;
+
+    DispatcherType.tp_new = PyType_GenericNew;
+    if (PyType_Ready(&DispatcherType) < 0) {
+        return MOD_ERROR_VAL;
+    }
+    Py_INCREF(&DispatcherType);
+    PyModule_AddObject(m, "Dispatcher", (PyObject*)(&DispatcherType));
+
+    return MOD_SUCCESS_VAL(m);
+}
--- a/numba/_dynfunc.c
+++ b/numba/_dynfunc.c
+/*
+ * Definition of Environment and Closure objects.
+ * This module is included by _dynfuncmod.c and by pycc-compiled modules.
+ */
+
+#include "_pymodule.h"
+
+#include <string.h>
+
+/* NOTE: EnvironmentObject and ClosureObject must be kept in sync with
+ * the definitions in numba/targets/base.py (EnvBody and ClosureBody).
+ */
+
+/*
+ * EnvironmentObject hosts data needed for execution of compiled functions.
+ */
+typedef struct {
+    PyObject_HEAD
+    PyObject *globals;
+    /* Assorted "constants" that are needed at runtime to execute
+       the compiled function.  This can include frozen closure variables,
+       lifted loops, etc. */
+    PyObject *consts;
+} EnvironmentObject;
+
+
+static PyMemberDef env_members[] = {
+    {"globals", T_OBJECT, offsetof(EnvironmentObject, globals), READONLY, NULL},
+    {"consts", T_OBJECT, offsetof(EnvironmentObject, consts), READONLY, NULL},
+    {NULL}  /* Sentinel */
+};
+
+static int
+env_traverse(EnvironmentObject *env, visitproc visit, void *arg)
+{
+    Py_VISIT(env->globals);
+    Py_VISIT(env->consts);
+    return 0;
+}
+
+static int
+env_clear(EnvironmentObject *env)
+{
+    Py_CLEAR(env->globals);
+    Py_CLEAR(env->consts);
+    return 0;
+}
+
+static void
+env_dealloc(EnvironmentObject *env)
+{
+    PyObject_GC_UnTrack((PyObject *) env);
+    env_clear(env);
+    Py_TYPE(env)->tp_free((PyObject *) env);
+}
+
+static EnvironmentObject *
+env_new_empty(PyTypeObject* type)
+{
+    return (EnvironmentObject *) PyType_GenericNew(type, NULL, NULL);
+}
+
+static PyObject *
+env_new(PyTypeObject* type, PyObject* args, PyObject* kwds)
+{
+    PyObject *globals;
+    EnvironmentObject *env;
+    static char *kwlist[] = {"globals", 0};
+
+    if (!PyArg_ParseTupleAndKeywords(
+            args, kwds, "O!:function", kwlist,
+            &PyDict_Type, &globals))
+        return NULL;
+
+    env = env_new_empty(type);
+    if (env == NULL)
+        return NULL;
+    Py_INCREF(globals);
+    env->globals = globals;
+    env->consts = PyList_New(0);
+    if (!env->consts) {
+        Py_DECREF(env);
+        return NULL;
+    }
+    return (PyObject *) env;
+}
+
+
+static PyTypeObject EnvironmentType = {
+    PyVarObject_HEAD_INIT(NULL, 0)
+    "_dynfunc.Environment",                  /* tp_name */
+    sizeof(EnvironmentObject),               /* tp_basicsize */
+    0,                                       /* tp_itemsize */
+    (destructor) env_dealloc,                /* tp_dealloc */
+    0,                                       /* tp_vectorcall_offset */
+    0,                                       /* tp_getattr*/
+    0,                                       /* tp_setattr */
+    0,                                       /* tp_as_async */
+    0,                                       /* tp_repr */
+    0,                                       /* tp_as_number */
+    0,                                       /* tp_as_sequence */
+    0,                                       /* tp_as_mapping */
+    0,                                       /* tp_hash */
+    0,                                       /* tp_call */
+    0,                                       /* tp_str */
+    0,                                       /* tp_getattro */
+    0,                                       /* tp_setattro */
+    0,                                       /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /* tp_flags */
+    0,                                       /* tp_doc */
+    (traverseproc) env_traverse,             /* tp_traverse */
+    (inquiry) env_clear,                     /* tp_clear */
+    0,                                       /* tp_richcompare */
+    0,                                       /* tp_weaklistoffset */
+    0,                                       /* tp_iter */
+    0,                                       /* tp_iternext */
+    0,                                       /* tp_methods */
+    env_members,                             /* tp_members */
+    0,                                       /* tp_getset */
+    0,                                       /* tp_base */
+    0,                                       /* tp_dict */
+    0,                                       /* tp_descr_get */
+    0,                                       /* tp_descr_set */
+    0,                                       /* tp_dictoffset */
+    0,                                       /* tp_init */
+    0,                                       /* tp_alloc */
+    env_new,                                 /* tp_new */
+    0,                                       /* tp_free */
+    0,                                       /* tp_is_gc */
+    0,                                       /* tp_bases */
+    0,                                       /* tp_mro */
+    0,                                       /* tp_cache */
+    0,                                       /* tp_subclasses */
+    0,                                       /* tp_weaklist */
+    0,                                       /* tp_del */
+    0,                                       /* tp_version_tag */
+    0,                                       /* tp_finalize */
+/* The docs suggest Python 3.8 has no tp_vectorcall
+ * https://github.com/python/cpython/blob/d917cfe4051d45b2b755c726c096ecfcc4869ceb/Doc/c-api/typeobj.rst?plain=1#L146
+ * but the header has it:
+ * https://github.com/python/cpython/blob/d917cfe4051d45b2b755c726c096ecfcc4869ceb/Include/cpython/object.h#L257
+ */
+    0,                                       /* tp_vectorcall */
+#if (PY_MAJOR_VERSION == 3) && (PY_MINOR_VERSION == 8)
+/* This is Python 3.8 only.
+ * See: https://github.com/python/cpython/blob/3.8/Include/cpython/object.h
+ * there's a tp_print preserved for backwards compatibility. xref:
+ * https://github.com/python/cpython/blob/d917cfe4051d45b2b755c726c096ecfcc4869ceb/Include/cpython/object.h#L260
+ */
+    0,                                       /* tp_print */
+#endif
+
+/* WARNING: Do not remove this, only modify it! It is a version guard to
+ * act as a reminder to update this struct on Python version update! */
+#if (PY_MAJOR_VERSION == 3)
+#if ! ((PY_MINOR_VERSION == 8) || (PY_MINOR_VERSION == 9) || (PY_MINOR_VERSION == 10) || (PY_MINOR_VERSION == 11))
+#error "Python minor version is not supported."
+#endif
+#else
+#error "Python major version is not supported."
+#endif
+/* END WARNING*/
+};
+
+/* A closure object is created for each call to make_function(), and stored
+   as the resulting PyCFunction object's "self" pointer.  It points to an
+   EnvironmentObject which is constructed during compilation.  This allows
+   for two things:
+       - lifetime management of dependent data (e.g. lifted loop dispatchers)
+       - access to the execution environment by the compiled function
+         (for example the globals module)
+   */
+
+/* Closure is a variable-sized object for binary compatibility with
+   Generator (see below). */
+#define CLOSURE_HEAD          \
+    PyObject_VAR_HEAD         \
+    EnvironmentObject *env;
+
+typedef struct {
+    CLOSURE_HEAD
+    /* The dynamically-filled method definition for the PyCFunction object
+       using this closure. */
+    PyMethodDef def;
+    /* Arbitrary object to keep alive during the closure's lifetime.
+       (put a tuple to put several objects alive).
+       In practice, this helps keep the LLVM module and its generated
+       code alive. */
+    PyObject *keepalive;
+    PyObject *weakreflist;
+} ClosureObject;
+
+
+static int
+closure_traverse(ClosureObject *clo, visitproc visit, void *arg)
+{
+    Py_VISIT(clo->env);
+    Py_VISIT(clo->keepalive);
+    return 0;
+}
+
+static void
+closure_dealloc(ClosureObject *clo)
+{
+    PyObject_GC_UnTrack((PyObject *) clo);
+    if (clo->weakreflist != NULL)
+        PyObject_ClearWeakRefs((PyObject *) clo);
+    PyObject_Free((void *) clo->def.ml_name);
+    PyObject_Free((void *) clo->def.ml_doc);
+    Py_XDECREF(clo->env);
+    Py_XDECREF(clo->keepalive);
+    Py_TYPE(clo)->tp_free((PyObject *) clo);
+}
+
+static PyTypeObject ClosureType = {
+    PyVarObject_HEAD_INIT(NULL, 0)
+    "_dynfunc._Closure",                     /* tp_name */
+    sizeof(ClosureObject),                   /* tp_basicsize */
+    0,                                       /* tp_itemsize */
+    (destructor) closure_dealloc,            /* tp_dealloc */
+    0,                                       /* tp_vectorcall_offset */
+    0,                                       /* tp_getattr */
+    0,                                       /* tp_setattr */
+    0,                                       /* tp_as_async */
+    0,                                       /* tp_repr */
+    0,                                       /* tp_as_number */
+    0,                                       /* tp_as_sequence */
+    0,                                       /* tp_as_mapping */
+    0,                                       /* tp_hash */
+    0,                                       /* tp_call */
+    0,                                       /* tp_str */
+    0,                                       /* tp_getattro */
+    0,                                       /* tp_setattro */
+    0,                                       /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
+    0,                                       /* tp_doc */
+    (traverseproc) closure_traverse,         /* tp_traverse */
+    0,                                       /* tp_clear */
+    0,                                       /* tp_richcompare */
+    offsetof(ClosureObject, weakreflist),    /* tp_weaklistoffset */
+    0,                                       /* tp_iter */
+    0,                                       /* tp_iternext */
+    0,                                       /* tp_methods */
+    0,                                       /* tp_members */
+    0,                                       /* tp_getset */
+    0,                                       /* tp_base */
+    0,                                       /* tp_dict */
+    0,                                       /* tp_descr_get */
+    0,                                       /* tp_descr_set */
+    0,                                       /* tp_dictoffset */
+    0,                                       /* tp_init */
+    0,                                       /* tp_alloc */
+    0,                                       /* tp_new */
+    0,                                       /* tp_free */
+    0,                                       /* tp_is_gc */
+    0,                                       /* tp_bases */
+    0,                                       /* tp_mro */
+    0,                                       /* tp_cache */
+    0,                                       /* tp_subclasses */
+    0,                                       /* tp_weaklist */
+    0,                                       /* tp_del */
+    0,                                       /* tp_version_tag */
+    0,                                       /* tp_finalize */
+/* The docs suggest Python 3.8 has no tp_vectorcall
+ * https://github.com/python/cpython/blob/d917cfe4051d45b2b755c726c096ecfcc4869ceb/Doc/c-api/typeobj.rst?plain=1#L146
+ * but the header has it:
+ * https://github.com/python/cpython/blob/d917cfe4051d45b2b755c726c096ecfcc4869ceb/Include/cpython/object.h#L257
+ */
+    0,                                       /* tp_vectorcall */
+#if (PY_MAJOR_VERSION == 3) && (PY_MINOR_VERSION == 8)
+/* This is Python 3.8 only.
+ * See: https://github.com/python/cpython/blob/3.8/Include/cpython/object.h
+ * there's a tp_print preserved for backwards compatibility. xref:
+ * https://github.com/python/cpython/blob/d917cfe4051d45b2b755c726c096ecfcc4869ceb/Include/cpython/object.h#L260
+ */
+    0,                                       /* tp_print */
+#endif
+
+/* WARNING: Do not remove this, only modify it! It is a version guard to
+ * act as a reminder to update this struct on Python version update! */
+#if (PY_MAJOR_VERSION == 3)
+#if ! ((PY_MINOR_VERSION == 8) || (PY_MINOR_VERSION == 9) || (PY_MINOR_VERSION == 10 || (PY_MINOR_VERSION == 11)))
+#error "Python minor version is not supported."
+#endif
+#else
+#error "Python major version is not supported."
+#endif
+/* END WARNING*/
+};
+
+
+/* Return an owned piece of character data duplicating a Python string
+   object's value. */
+static char *
+dup_string(PyObject *strobj)
+{
+    const char *tmp = NULL;
+    char *str;
+    tmp = PyString_AsString(strobj);
+    if (tmp == NULL)
+        return NULL;
+    /* Using PyObject_Malloc allows this memory to be tracked for
+       leaks. */
+    str = PyObject_Malloc(strlen(tmp) + 1);
+    if (str == NULL) {
+        PyErr_NoMemory();
+        return NULL;
+    }
+    strcpy(str, tmp);
+    return str;
+}
+
+/* Create and initialize a new Closure object */
+static ClosureObject *
+closure_new(PyObject *name, PyObject *doc, PyCFunction fnaddr,
+            EnvironmentObject *env, PyObject *keepalive)
+{
+    ClosureObject *clo = (ClosureObject *) PyType_GenericAlloc(&ClosureType, 0);
+    if (clo == NULL)
+        return NULL;
+
+    clo->def.ml_name = dup_string(name);
+    if (!clo->def.ml_name) {
+        Py_DECREF(clo);
+        return NULL;
+    }
+    clo->def.ml_meth = fnaddr;
+    clo->def.ml_flags = METH_VARARGS | METH_KEYWORDS;
+    clo->def.ml_doc = dup_string(doc);
+    if (!clo->def.ml_doc) {
+        Py_DECREF(clo);
+        return NULL;
+    }
+    Py_INCREF(env);
+    clo->env = env;
+    Py_XINCREF(keepalive);
+    clo->keepalive = keepalive;
+    return clo;
+}
+
+/* Create a new PyCFunction object wrapping a closure defined by
+   the given arguments. */
+static PyObject *
+pycfunction_new(PyObject *module, PyObject *name, PyObject *doc,
+                PyCFunction fnaddr, EnvironmentObject *env, PyObject *keepalive)
+{
+    PyObject *funcobj;
+    PyObject *modname = NULL;
+    ClosureObject *closure = NULL;
+
+    closure = closure_new(name, doc, fnaddr, env, keepalive);
+    if (closure == NULL) goto FAIL;
+
+    modname = PyObject_GetAttrString(module, "__name__");
+    if (modname == NULL) goto FAIL;
+
+    funcobj = PyCFunction_NewEx(&closure->def, (PyObject *) closure, modname);
+    Py_DECREF(closure);
+    Py_DECREF(modname);
+
+    return funcobj;
+
+FAIL:
+    Py_XDECREF(closure);
+    Py_XDECREF(modname);
+    return NULL;
+}
+
+/*
+ * Python-facing wrapper for Numba-compiled generator.
+ * Note the Environment's offset inside the struct is the same as in the
+ * Closure object.  This is required to simplify generation of Python wrappers.
+ */
+
+typedef void (*gen_finalizer_t)(void *);
+
+typedef struct {
+    CLOSURE_HEAD
+    PyCFunctionWithKeywords nextfunc;
+    gen_finalizer_t finalizer;
+    PyObject *weakreflist;
+    union {
+        double dummy;   /* Force alignment */
+        char state[0];
+    };
+} GeneratorObject;
+
+static int
+generator_traverse(GeneratorObject *gen, visitproc visit, void *arg)
+{
+    /* XXX this doesn't traverse the state, which can own references to
+       PyObjects */
+    Py_VISIT(gen->env);
+    return 0;
+}
+
+static int
+generator_clear(GeneratorObject *gen)
+{
+    if (gen->finalizer != NULL) {
+        gen->finalizer(gen->state);
+        gen->finalizer = NULL;
+    }
+    Py_CLEAR(gen->env);
+    gen->nextfunc = NULL;
+    return 0;
+}
+
+static void
+generator_dealloc(GeneratorObject *gen)
+{
+    PyObject_GC_UnTrack((PyObject *) gen);
+    if (gen->weakreflist != NULL)
+        PyObject_ClearWeakRefs((PyObject *) gen);
+    /* XXX The finalizer may be called after the LLVM module has been
+       destroyed (typically at interpreter shutdown) */
+    if (!_Py_IsFinalizing())
+        if (gen->finalizer != NULL)
+            gen->finalizer(gen->state);
+    Py_XDECREF(gen->env);
+    Py_TYPE(gen)->tp_free((PyObject *) gen);
+}
+
+static PyObject *
+generator_iternext(GeneratorObject *gen)
+{
+    PyObject *res, *args;
+    if (gen->nextfunc == NULL) {
+        PyErr_SetString(PyExc_RuntimeError,
+                        "cannot call next() on finalized generator");
+        return NULL;
+    }
+    args = PyTuple_Pack(1, (PyObject *) gen);
+    if (args == NULL)
+        return NULL;
+    res = (*gen->nextfunc)((PyObject *) gen, args, NULL);
+    Py_DECREF(args);
+    return res;
+}
+
+static PyTypeObject GeneratorType = {
+    PyVarObject_HEAD_INIT(NULL, 0)
+    "_dynfunc._Generator",                    /* tp_name*/
+    offsetof(GeneratorObject, state),         /* tp_basicsize*/
+    1,                                        /* tp_itemsize*/
+    (destructor) generator_dealloc,           /* tp_dealloc*/
+    0,                                        /* tp_vectorcall_offset*/
+    0,                                        /* tp_getattr*/
+    0,                                        /* tp_setattr*/
+    0,                                        /* tp_as_async*/
+    0,                                        /* tp_repr*/
+    0,                                        /* tp_as_number*/
+    0,                                        /* tp_as_sequence*/
+    0,                                        /* tp_as_mapping*/
+    0,                                        /* tp_hash */
+    0,                                        /* tp_call*/
+    0,                                        /* tp_str*/
+    0,                                        /* tp_getattro*/
+    0,                                        /* tp_setattro*/
+    0,                                        /* tp_as_buffer*/
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
+                       | Py_TPFLAGS_BASETYPE, /* tp_flags*/
+    0,                                        /* tp_doc */
+    (traverseproc) generator_traverse,        /* tp_traverse */
+    (inquiry) generator_clear,                /* tp_clear */
+    0,                                        /* tp_richcompare */
+    offsetof(GeneratorObject, weakreflist),   /* tp_weaklistoffset */
+    PyObject_SelfIter,                        /* tp_iter */
+    (iternextfunc) generator_iternext,        /* tp_iternext */
+    0,                                        /* tp_methods */
+    0,                                        /* tp_members */
+    0,                                        /* tp_getset */
+    0,                                        /* tp_base */
+    0,                                        /* tp_dict */
+    0,                                        /* tp_descr_get */
+    0,                                        /* tp_descr_set */
+    0,                                        /* tp_dictoffset */
+    0,                                        /* tp_init */
+    0,                                        /* tp_alloc */
+    0,                                        /* tp_new */
+    0,                                        /* tp_free */
+    0,                                        /* tp_is_gc */
+    0,                                        /* tp_bases */
+    0,                                        /* tp_mro */
+    0,                                        /* tp_cache */
+    0,                                        /* tp_subclasses */
+    0,                                        /* tp_weaklist */
+    0,                                        /* tp_del */
+    0,                                        /* tp_version_tag */
+    0,                                        /* tp_finalize */
+/* The docs suggest Python 3.8 has no tp_vectorcall
+ * https://github.com/python/cpython/blob/d917cfe4051d45b2b755c726c096ecfcc4869ceb/Doc/c-api/typeobj.rst?plain=1#L146
+ * but the header has it:
+ * https://github.com/python/cpython/blob/d917cfe4051d45b2b755c726c096ecfcc4869ceb/Include/cpython/object.h#L257
+ */
+    0,                                        /* tp_vectorcall */
+#if (PY_MAJOR_VERSION == 3) && (PY_MINOR_VERSION == 8)
+/* This is Python 3.8 only.
+ * See: https://github.com/python/cpython/blob/3.8/Include/cpython/object.h
+ * there's a tp_print preserved for backwards compatibility. xref:
+ * https://github.com/python/cpython/blob/d917cfe4051d45b2b755c726c096ecfcc4869ceb/Include/cpython/object.h#L260
+ */
+    0,                                        /* tp_print */
+#endif
+
+/* WARNING: Do not remove this, only modify it! It is a version guard to
+ * act as a reminder to update this struct on Python version update! */
+#if (PY_MAJOR_VERSION == 3)
+#if ! ((PY_MINOR_VERSION == 8) || (PY_MINOR_VERSION == 9) || (PY_MINOR_VERSION == 10) || (PY_MINOR_VERSION == 11))
+#error "Python minor version is not supported."
+#endif
+#else
+#error "Python major version is not supported."
+#endif
+/* END WARNING*/
+};
+
+/* Dynamically create a new generator object */
+static PyObject *
+Numba_make_generator(Py_ssize_t gen_state_size,
+                     void *initial_state,
+                     PyCFunctionWithKeywords nextfunc,
+                     gen_finalizer_t finalizer,
+                     EnvironmentObject *env)
+{
+    GeneratorObject *gen;
+    gen = (GeneratorObject *) PyType_GenericAlloc(&GeneratorType, gen_state_size);
+    if (gen == NULL)
+        return NULL;
+    memcpy(gen->state, initial_state, gen_state_size);
+    gen->nextfunc = nextfunc;
+    Py_XINCREF(env);
+    gen->env = env;
+    gen->finalizer = finalizer;
+    return (PyObject *) gen;
+}
+
+/* Initialization subroutine for use by modules including this */
+static int
+init_dynfunc_module(PyObject *module)
+{
+    if (PyType_Ready(&ClosureType))
+        return -1;
+    if (PyType_Ready(&EnvironmentType))
+        return -1;
+    if (PyType_Ready(&GeneratorType))
+        return -1;
+    return 0;
+}
--- a/numba/_dynfuncmod.c
+++ b/numba/_dynfuncmod.c
+#include "_dynfunc.c"
+
+/* Python-facing function to dynamically create a new C function object */
+static PyObject*
+make_function(PyObject *self, PyObject *args)
+{
+    PyObject *module, *fname, *fdoc, *fnaddrobj;
+    void *fnaddr;
+    EnvironmentObject *env;
+    PyObject *keepalive;
+
+    if (!PyArg_ParseTuple(args, "OOOOO!|O",
+            &module, &fname, &fdoc, &fnaddrobj, &EnvironmentType, &env,
+            &keepalive)) {
+        return NULL;
+    }
+
+    fnaddr = PyLong_AsVoidPtr(fnaddrobj);
+    if (fnaddr == NULL && PyErr_Occurred())
+        return NULL;
+
+    return pycfunction_new(module, fname, fdoc, fnaddr, env, keepalive);
+}
+
+static PyMethodDef ext_methods[] = {
+#define declmethod(func) { #func , ( PyCFunction )func , METH_VARARGS , NULL }
+    declmethod(make_function),
+    { NULL },
+#undef declmethod
+};
+
+
+static PyObject *
+build_c_helpers_dict(void)
+{
+    PyObject *dct = PyDict_New();
+    if (dct == NULL)
+        goto error;
+
+#define _declpointer(name, value) do {                 \
+    PyObject *o = PyLong_FromVoidPtr(value);           \
+    if (o == NULL) goto error;                         \
+    if (PyDict_SetItemString(dct, name, o)) {          \
+        Py_DECREF(o);                                  \
+        goto error;                                    \
+    }                                                  \
+    Py_DECREF(o);                                      \
+} while (0)
+
+#define declmethod(func) _declpointer(#func, &Numba_##func)
+
+#define declpointer(ptr) _declpointer(#ptr, &ptr)
+
+    declmethod(make_generator);
+
+#undef declmethod
+    return dct;
+error:
+    Py_XDECREF(dct);
+    return NULL;
+}
+
+MOD_INIT(_dynfunc) {
+    PyObject *m, *impl_info;
+
+    MOD_DEF(m, "_dynfunc", "No docs", ext_methods)
+    if (m == NULL)
+        return MOD_ERROR_VAL;
+
+    if (init_dynfunc_module(m))
+        return MOD_ERROR_VAL;
+
+    impl_info = Py_BuildValue(
+        "{snsnsn}",
+        "offsetof_closure_body", offsetof(ClosureObject, env),
+        "offsetof_env_body", offsetof(EnvironmentObject, globals),
+        "offsetof_generator_state", offsetof(GeneratorObject, state)
+        );
+    if (impl_info == NULL)
+        return MOD_ERROR_VAL;
+    PyModule_AddObject(m, "_impl_info", impl_info);
+
+    Py_INCREF(&ClosureType);
+    PyModule_AddObject(m, "_Closure", (PyObject *) (&ClosureType));
+    Py_INCREF(&EnvironmentType);
+    PyModule_AddObject(m, "Environment", (PyObject *) (&EnvironmentType));
+    Py_INCREF(&GeneratorType);
+    PyModule_AddObject(m, "_Generator", (PyObject *) (&GeneratorType));
+
+    PyModule_AddObject(m, "c_helpers", build_c_helpers_dict());
+
+    return MOD_SUCCESS_VAL(m);
+}
--- a/numba/_hashtable.cpp
+++ b/numba/_hashtable.cpp
+/*
+ * This file and _hashtable.h are from CPython 3.5.  The symbols have been
+ * renamed from _Py_hashxxx to _Numba_hashxxx to avoid name clashes with
+ * the CPython definitions (including at runtime through dynamic linking).
+ * Those CPython APIs are private and can change in incompatible ways at
+ * any time.
+ *
+ * Command line used for renaming:
+ * $ sed -i -r 's/\b_Py_(has[h]table)/_Numba_\1/ig' numba/_hashtable.h numba/_hashtable.c
+ */
+
+/* The implementation of the hash table (_Numba_hashtable_t) is based on the cfuhash
+   project:
+   http://sourceforge.net/projects/libcfu/
+
+   Copyright of cfuhash:
+   ----------------------------------
+   Creation date: 2005-06-24 21:22:40
+   Authors: Don
+   Change log:
+
+   Copyright (c) 2005 Don Owens
+   All rights reserved.
+
+   This code is released under the BSD license:
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+     * Redistributions of source code must retain the above copyright
+       notice, this list of conditions and the following disclaimer.
+
+     * Redistributions in binary form must reproduce the above
+       copyright notice, this list of conditions and the following
+       disclaimer in the documentation and/or other materials provided
+       with the distribution.
+
+     * Neither the name of the author nor the names of its
+       contributors may be used to endorse or promote products derived
+       from this software without specific prior written permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+   FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+   COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+   INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+   (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+   HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+   STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+   OF THE POSSIBILITY OF SUCH DAMAGE.
+   ----------------------------------
+*/
+
+#include "_pymodule.h"
+#include "_hashtable.h"
+
+#define HASHTABLE_MIN_SIZE 16
+#define HASHTABLE_HIGH 0.50
+#define HASHTABLE_LOW 0.10
+#define HASHTABLE_REHASH_FACTOR 2.0 / (HASHTABLE_LOW + HASHTABLE_HIGH)
+
+#define BUCKETS_HEAD(SLIST) \
+        ((_Numba_hashtable_entry_t *)_Py_SLIST_HEAD(&(SLIST)))
+#define TABLE_HEAD(HT, BUCKET) \
+        ((_Numba_hashtable_entry_t *)_Py_SLIST_HEAD(&(HT)->buckets[BUCKET]))
+#define ENTRY_NEXT(ENTRY) \
+        ((_Numba_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY))
+#define HASHTABLE_ITEM_SIZE(HT) \
+        (sizeof(_Numba_hashtable_entry_t) + (HT)->data_size)
+
+/* Forward declaration */
+static void hashtable_rehash(_Numba_hashtable_t *ht);
+
+static void
+_Py_slist_init(_Py_slist_t *list)
+{
+    list->head = NULL;
+}
+
+static void
+_Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item)
+{
+    item->next = list->head;
+    list->head = item;
+}
+
+static void
+_Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous,
+                 _Py_slist_item_t *item)
+{
+    if (previous != NULL)
+        previous->next = item->next;
+    else
+        list->head = item->next;
+}
+
+extern "C" Py_uhash_t
+_Numba_hashtable_hash_int(const void *key)
+{
+    return (Py_uhash_t)key;
+}
+
+extern "C" Py_uhash_t
+_Numba_hashtable_hash_ptr(const void *key)
+{
+    return (Py_uhash_t)_Py_HashPointer((void *)key);
+}
+
+extern "C" int
+_Numba_hashtable_compare_direct(const void *key, const _Numba_hashtable_entry_t *entry)
+{
+    return entry->key == key;
+}
+
+/* makes sure the real size of the buckets array is a power of 2 */
+static size_t
+round_size(size_t s)
+{
+    size_t i;
+    if (s < HASHTABLE_MIN_SIZE)
+        return HASHTABLE_MIN_SIZE;
+    i = 1;
+    while (i < s)
+        i <<= 1;
+    return i;
+}
+
+extern "C" _Numba_hashtable_t *
+_Numba_hashtable_new_full(size_t data_size, size_t init_size,
+                       _Numba_hashtable_hash_func hash_func,
+                       _Numba_hashtable_compare_func compare_func,
+                       _Numba_hashtable_copy_data_func copy_data_func,
+                       _Numba_hashtable_free_data_func free_data_func,
+                       _Numba_hashtable_get_data_size_func get_data_size_func,
+                       _Numba_hashtable_allocator_t *allocator)
+{
+    _Numba_hashtable_t *ht;
+    size_t buckets_size;
+    _Numba_hashtable_allocator_t alloc;
+
+    if (allocator == NULL) {
+        alloc.malloc = PyMem_RawMalloc;
+        alloc.free = PyMem_RawFree;
+    }
+    else
+        alloc = *allocator;
+
+    ht = (_Numba_hashtable_t *)alloc.malloc(sizeof(_Numba_hashtable_t));
+    if (ht == NULL)
+        return ht;
+
+    ht->num_buckets = round_size(init_size);
+    ht->entries = 0;
+    ht->data_size = data_size;
+
+    buckets_size = ht->num_buckets * sizeof(ht->buckets[0]);
+    ht->buckets = (_Py_slist_t *) alloc.malloc(buckets_size);
+    if (ht->buckets == NULL) {
+        alloc.free(ht);
+        return NULL;
+    }
+    memset(ht->buckets, 0, buckets_size);
+
+    ht->hash_func = hash_func;
+    ht->compare_func = compare_func;
+    ht->copy_data_func = copy_data_func;
+    ht->free_data_func = free_data_func;
+    ht->get_data_size_func = get_data_size_func;
+    ht->alloc = alloc;
+    return ht;
+}
+
+extern "C" _Numba_hashtable_t *
+_Numba_hashtable_new(size_t data_size,
+                  _Numba_hashtable_hash_func hash_func,
+                  _Numba_hashtable_compare_func compare_func)
+{
+    return _Numba_hashtable_new_full(data_size, HASHTABLE_MIN_SIZE,
+                                  hash_func, compare_func,
+                                  NULL, NULL, NULL, NULL);
+}
+
+extern "C" size_t
+_Numba_hashtable_size(_Numba_hashtable_t *ht)
+{
+    size_t size;
+    size_t hv;
+
+    size = sizeof(_Numba_hashtable_t);
+
+    /* buckets */
+    size += ht->num_buckets * sizeof(_Numba_hashtable_entry_t *);
+
+    /* entries */
+    size += ht->entries * HASHTABLE_ITEM_SIZE(ht);
+
+    /* data linked from entries */
+    if (ht->get_data_size_func) {
+        for (hv = 0; hv < ht->num_buckets; hv++) {
+            _Numba_hashtable_entry_t *entry;
+
+            for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) {
+                void *data;
+
+                data = _Numba_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry);
+                size += ht->get_data_size_func(data);
+            }
+        }
+    }
+    return size;
+}
+
+#ifdef Py_DEBUG
+extern "C" void
+_Numba_hashtable_print_stats(_Numba_hashtable_t *ht)
+{
+    size_t size;
+    size_t chain_len, max_chain_len, total_chain_len, nchains;
+    _Numba_hashtable_entry_t *entry;
+    size_t hv;
+    double load;
+
+    size = _Numba_hashtable_size(ht);
+
+    load = (double)ht->entries / ht->num_buckets;
+
+    max_chain_len = 0;
+    total_chain_len = 0;
+    nchains = 0;
+    for (hv = 0; hv < ht->num_buckets; hv++) {
+        entry = TABLE_HEAD(ht, hv);
+        if (entry != NULL) {
+            chain_len = 0;
+            for (; entry; entry = ENTRY_NEXT(entry)) {
+                chain_len++;
+            }
+            if (chain_len > max_chain_len)
+                max_chain_len = chain_len;
+            total_chain_len += chain_len;
+            nchains++;
+        }
+    }
+    printf("hash table %p: entries=%"
+           PY_FORMAT_SIZE_T "u/%" PY_FORMAT_SIZE_T "u (%.0f%%), ",
+           ht, ht->entries, ht->num_buckets, load * 100.0);
+    if (nchains)
+        printf("avg_chain_len=%.1f, ", (double)total_chain_len / nchains);
+    printf("max_chain_len=%" PY_FORMAT_SIZE_T "u, %" PY_FORMAT_SIZE_T "u kB\n",
+           max_chain_len, size / 1024);
+}
+#endif
+
+/* Get an entry. Return NULL if the key does not exist. */
+extern "C" _Numba_hashtable_entry_t *
+_Numba_hashtable_get_entry(_Numba_hashtable_t *ht, const void *key)
+{
+    Py_uhash_t key_hash;
+    size_t index;
+    _Numba_hashtable_entry_t *entry;
+
+    key_hash = ht->hash_func(key);
+    index = key_hash & (ht->num_buckets - 1);
+
+    for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
+        if (entry->key_hash == key_hash && ht->compare_func(key, entry))
+            break;
+    }
+
+    return entry;
+}
+
+static int
+_hashtable_pop_entry(_Numba_hashtable_t *ht, const void *key, void *data, size_t data_size)
+{
+    Py_uhash_t key_hash;
+    size_t index;
+    _Numba_hashtable_entry_t *entry, *previous;
+
+    key_hash = ht->hash_func(key);
+    index = key_hash & (ht->num_buckets - 1);
+
+    previous = NULL;
+    for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
+        if (entry->key_hash == key_hash && ht->compare_func(key, entry))
+            break;
+        previous = entry;
+    }
+
+    if (entry == NULL)
+        return 0;
+
+    _Py_slist_remove(&ht->buckets[index], (_Py_slist_item_t *)previous,
+                     (_Py_slist_item_t *)entry);
+    ht->entries--;
+
+    if (data != NULL)
+        _Numba_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry);
+    ht->alloc.free(entry);
+
+    if ((float)ht->entries / (float)ht->num_buckets < HASHTABLE_LOW)
+        hashtable_rehash(ht);
+    return 1;
+}
+
+/* Add a new entry to the hash. The key must not be present in the hash table.
+   Return 0 on success, -1 on memory error. */
+extern "C" int
+_Numba_hashtable_set(_Numba_hashtable_t *ht, const void *key,
+                  void *data, size_t data_size)
+{
+    Py_uhash_t key_hash;
+    size_t index;
+    _Numba_hashtable_entry_t *entry;
+
+    assert(data != NULL || data_size == 0);
+#ifndef NDEBUG
+    /* Don't write the assertion on a single line because it is interesting
+       to know the duplicated entry if the assertion failed. The entry can
+       be read using a debugger. */
+    entry = _Numba_hashtable_get_entry(ht, key);
+    assert(entry == NULL);
+#endif
+
+    key_hash = ht->hash_func(key);
+    index = key_hash & (ht->num_buckets - 1);
+
+    entry = (_Numba_hashtable_entry_t *) ht->alloc.malloc(HASHTABLE_ITEM_SIZE(ht));
+    if (entry == NULL) {
+        /* memory allocation failed */
+        return -1;
+    }
+
+    entry->key = (void *)key;
+    entry->key_hash = key_hash;
+
+    assert(data_size == ht->data_size);
+    memcpy(_Numba_HASHTABLE_ENTRY_DATA(entry), data, data_size);
+
+    _Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry);
+    ht->entries++;
+
+    if ((float)ht->entries / (float)ht->num_buckets > HASHTABLE_HIGH)
+        hashtable_rehash(ht);
+    return 0;
+}
+
+/* Get data from an entry. Copy entry data into data and return 1 if the entry
+   exists, return 0 if the entry does not exist. */
+extern "C" int
+_Numba_hashtable_get(_Numba_hashtable_t *ht, const void *key, void *data, size_t data_size)
+{
+    _Numba_hashtable_entry_t *entry;
+
+    assert(data != NULL);
+
+    entry = _Numba_hashtable_get_entry(ht, key);
+    if (entry == NULL)
+        return 0;
+    _Numba_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry);
+    return 1;
+}
+
+extern "C" int
+_Numba_hashtable_pop(_Numba_hashtable_t *ht, const void *key, void *data, size_t data_size)
+{
+    assert(data != NULL);
+    assert(ht->free_data_func == NULL);
+    return _hashtable_pop_entry(ht, key, data, data_size);
+}
+
+/* Delete an entry. The entry must exist. */
+extern "C" void
+_Numba_hashtable_delete(_Numba_hashtable_t *ht, const void *key)
+{
+#ifndef NDEBUG
+    int found = _hashtable_pop_entry(ht, key, NULL, 0);
+    assert(found);
+#else
+    (void)_hashtable_pop_entry(ht, key, NULL, 0);
+#endif
+}
+
+/* Prototype for a pointer to a function to be called foreach
+   key/value pair in the hash by hashtable_foreach().  Iteration
+   stops if a non-zero value is returned. */
+extern "C" int
+_Numba_hashtable_foreach(_Numba_hashtable_t *ht,
+                      int (*func) (_Numba_hashtable_entry_t *entry, void *arg),
+                      void *arg)
+{
+    _Numba_hashtable_entry_t *entry;
+    size_t hv;
+
+    for (hv = 0; hv < ht->num_buckets; hv++) {
+        for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) {
+            int res = func(entry, arg);
+            if (res)
+                return res;
+        }
+    }
+    return 0;
+}
+
+static void
+hashtable_rehash(_Numba_hashtable_t *ht)
+{
+    size_t buckets_size, new_size, bucket;
+    _Py_slist_t *old_buckets = NULL;
+    size_t old_num_buckets;
+
+    new_size = round_size((size_t)(ht->entries * HASHTABLE_REHASH_FACTOR));
+    if (new_size == ht->num_buckets)
+        return;
+
+    old_num_buckets = ht->num_buckets;
+
+    buckets_size = new_size * sizeof(ht->buckets[0]);
+    old_buckets = ht->buckets;
+    ht->buckets = (_Py_slist_t *) ht->alloc.malloc(buckets_size);
+    if (ht->buckets == NULL) {
+        /* cancel rehash on memory allocation failure */
+        ht->buckets = old_buckets ;
+        /* memory allocation failed */
+        return;
+    }
+    memset(ht->buckets, 0, buckets_size);
+
+    ht->num_buckets = new_size;
+
+    for (bucket = 0; bucket < old_num_buckets; bucket++) {
+        _Numba_hashtable_entry_t *entry, *next;
+        for (entry = BUCKETS_HEAD(old_buckets[bucket]); entry != NULL; entry = next) {
+            size_t entry_index;
+
+            assert(ht->hash_func(entry->key) == entry->key_hash);
+            next = ENTRY_NEXT(entry);
+            entry_index = entry->key_hash & (new_size - 1);
+
+            _Py_slist_prepend(&ht->buckets[entry_index], (_Py_slist_item_t*)entry);
+        }
+    }
+
+    ht->alloc.free(old_buckets);
+}
+
+extern "C" void
+_Numba_hashtable_clear(_Numba_hashtable_t *ht)
+{
+    _Numba_hashtable_entry_t *entry, *next;
+    size_t i;
+
+    for (i=0; i < ht->num_buckets; i++) {
+        for (entry = TABLE_HEAD(ht, i); entry != NULL; entry = next) {
+            next = ENTRY_NEXT(entry);
+            if (ht->free_data_func)
+                ht->free_data_func(_Numba_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry));
+            ht->alloc.free(entry);
+        }
+        _Py_slist_init(&ht->buckets[i]);
+    }
+    ht->entries = 0;
+    hashtable_rehash(ht);
+}
+
+extern "C" void
+_Numba_hashtable_destroy(_Numba_hashtable_t *ht)
+{
+    size_t i;
+
+    for (i = 0; i < ht->num_buckets; i++) {
+        _Py_slist_item_t *entry = ht->buckets[i].head;
+        while (entry) {
+            _Py_slist_item_t *entry_next = entry->next;
+            if (ht->free_data_func)
+                ht->free_data_func(_Numba_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry));
+            ht->alloc.free(entry);
+            entry = entry_next;
+        }
+    }
+
+    ht->alloc.free(ht->buckets);
+    ht->alloc.free(ht);
+}
+
+/* Return a copy of the hash table */
+extern "C" _Numba_hashtable_t *
+_Numba_hashtable_copy(_Numba_hashtable_t *src)
+{
+    _Numba_hashtable_t *dst;
+    _Numba_hashtable_entry_t *entry;
+    size_t bucket;
+    int err;
+    void *data, *new_data;
+
+    dst = _Numba_hashtable_new_full(src->data_size, src->num_buckets,
+                            src->hash_func, src->compare_func,
+                            src->copy_data_func, src->free_data_func,
+                            src->get_data_size_func, &src->alloc);
+    if (dst == NULL)
+        return NULL;
+
+    for (bucket=0; bucket < src->num_buckets; bucket++) {
+        entry = TABLE_HEAD(src, bucket);
+        for (; entry; entry = ENTRY_NEXT(entry)) {
+            if (src->copy_data_func) {
+                data = _Numba_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry);
+                new_data = src->copy_data_func(data);
+                if (new_data != NULL)
+                    err = _Numba_hashtable_set(dst, entry->key,
+                                        &new_data, src->data_size);
+                else
+                    err = 1;
+            }
+            else {
+                data = _Numba_HASHTABLE_ENTRY_DATA(entry);
+                err = _Numba_hashtable_set(dst, entry->key, data, src->data_size);
+            }
+            if (err) {
+                _Numba_hashtable_destroy(dst);
+                return NULL;
+            }
+        }
+    }
+    return dst;
+}
+
--- a/numba/_hashtable.h
+++ b/numba/_hashtable.h
+/*
+ * See _hashtable.c for more information about this file.
+ */
+
+#ifndef Py_HASHTABLE_H
+#define Py_HASHTABLE_H
+
+/* The whole API is private */
+#ifndef Py_LIMITED_API
+
+typedef struct _Py_slist_item_s {
+    struct _Py_slist_item_s *next;
+} _Py_slist_item_t;
+
+typedef struct {
+    _Py_slist_item_t *head;
+} _Py_slist_t;
+
+#define _Py_SLIST_ITEM_NEXT(ITEM) (((_Py_slist_item_t *)ITEM)->next)
+
+#define _Py_SLIST_HEAD(SLIST) (((_Py_slist_t *)SLIST)->head)
+
+typedef struct {
+    /* used by _Numba_hashtable_t.buckets to link entries */
+    _Py_slist_item_t _Py_slist_item;
+
+    const void *key;
+    Py_uhash_t key_hash;
+
+    /* data follows */
+} _Numba_hashtable_entry_t;
+
+#define _Numba_HASHTABLE_ENTRY_DATA(ENTRY) \
+        ((char *)(ENTRY) + sizeof(_Numba_hashtable_entry_t))
+
+#define _Numba_HASHTABLE_ENTRY_DATA_AS_VOID_P(ENTRY) \
+        (*(void **)_Numba_HASHTABLE_ENTRY_DATA(ENTRY))
+
+#define _Numba_HASHTABLE_ENTRY_READ_DATA(TABLE, DATA, DATA_SIZE, ENTRY) \
+    do { \
+        assert((DATA_SIZE) == (TABLE)->data_size); \
+        memcpy(DATA, _Numba_HASHTABLE_ENTRY_DATA(ENTRY), DATA_SIZE); \
+    } while (0)
+
+typedef Py_uhash_t (*_Numba_hashtable_hash_func) (const void *key);
+typedef int (*_Numba_hashtable_compare_func) (const void *key, const _Numba_hashtable_entry_t *he);
+typedef void* (*_Numba_hashtable_copy_data_func)(void *data);
+typedef void (*_Numba_hashtable_free_data_func)(void *data);
+typedef size_t (*_Numba_hashtable_get_data_size_func)(void *data);
+
+typedef struct {
+    /* allocate a memory block */
+    void* (*malloc) (size_t size);
+
+    /* release a memory block */
+    void (*free) (void *ptr);
+} _Numba_hashtable_allocator_t;
+
+typedef struct {
+    size_t num_buckets;
+    size_t entries; /* Total number of entries in the table. */
+    _Py_slist_t *buckets;
+    size_t data_size;
+
+    _Numba_hashtable_hash_func hash_func;
+    _Numba_hashtable_compare_func compare_func;
+    _Numba_hashtable_copy_data_func copy_data_func;
+    _Numba_hashtable_free_data_func free_data_func;
+    _Numba_hashtable_get_data_size_func get_data_size_func;
+    _Numba_hashtable_allocator_t alloc;
+} _Numba_hashtable_t;
+
+/* hash and compare functions for integers and pointers */
+extern "C" PyAPI_FUNC(Py_uhash_t) _Numba_hashtable_hash_ptr(const void *key);
+extern "C" PyAPI_FUNC(Py_uhash_t) _Numba_hashtable_hash_int(const void *key);
+extern "C" PyAPI_FUNC(int) _Numba_hashtable_compare_direct(const void *key, const _Numba_hashtable_entry_t *entry);
+
+extern "C" PyAPI_FUNC(_Numba_hashtable_t *) _Numba_hashtable_new(
+    size_t data_size,
+    _Numba_hashtable_hash_func hash_func,
+    _Numba_hashtable_compare_func compare_func);
+extern "C" PyAPI_FUNC(_Numba_hashtable_t *) _Numba_hashtable_new_full(
+    size_t data_size,
+    size_t init_size,
+    _Numba_hashtable_hash_func hash_func,
+    _Numba_hashtable_compare_func compare_func,
+    _Numba_hashtable_copy_data_func copy_data_func,
+    _Numba_hashtable_free_data_func free_data_func,
+    _Numba_hashtable_get_data_size_func get_data_size_func,
+    _Numba_hashtable_allocator_t *allocator);
+extern "C" PyAPI_FUNC(_Numba_hashtable_t *) _Numba_hashtable_copy(_Numba_hashtable_t *src);
+extern "C" PyAPI_FUNC(void) _Numba_hashtable_clear(_Numba_hashtable_t *ht);
+extern "C" PyAPI_FUNC(void) _Numba_hashtable_destroy(_Numba_hashtable_t *ht);
+
+typedef int (*_Numba_hashtable_foreach_func) (_Numba_hashtable_entry_t *entry, void *arg);
+
+extern "C" PyAPI_FUNC(int) _Numba_hashtable_foreach(
+    _Numba_hashtable_t *ht,
+    _Numba_hashtable_foreach_func func, void *arg);
+extern "C" PyAPI_FUNC(size_t) _Numba_hashtable_size(_Numba_hashtable_t *ht);
+
+extern "C" PyAPI_FUNC(_Numba_hashtable_entry_t*) _Numba_hashtable_get_entry(
+    _Numba_hashtable_t *ht,
+    const void *key);
+extern "C" PyAPI_FUNC(int) _Numba_hashtable_set(
+    _Numba_hashtable_t *ht,
+    const void *key,
+    void *data,
+    size_t data_size);
+extern "C" PyAPI_FUNC(int) _Numba_hashtable_get(
+    _Numba_hashtable_t *ht,
+    const void *key,
+    void *data,
+    size_t data_size);
+extern "C" PyAPI_FUNC(int) _Numba_hashtable_pop(
+    _Numba_hashtable_t *ht,
+    const void *key,
+    void *data,
+    size_t data_size);
+extern "C" PyAPI_FUNC(void) _Numba_hashtable_delete(
+    _Numba_hashtable_t *ht,
+    const void *key);
+
+#define _Numba_HASHTABLE_SET(TABLE, KEY, DATA) \
+    _Numba_hashtable_set(TABLE, KEY, &(DATA), sizeof(DATA))
+
+#define _Numba_HASHTABLE_GET(TABLE, KEY, DATA) \
+    _Numba_hashtable_get(TABLE, KEY, &(DATA), sizeof(DATA))
+
+#endif   /* Py_LIMITED_API */
+
+#endif
--- a/numba/_helperlib.c
+++ b/numba/_helperlib.c
+/*
+ * Helper functions used by Numba at runtime.
+ * This C file is meant to be included after defining the
+ * NUMBA_EXPORT_FUNC() and NUMBA_EXPORT_DATA() macros.
+ */
+
+#include "_pymodule.h"
+#include <stddef.h>
+#include <stdio.h>
+#include <math.h>
+#include <complex.h>
+#ifdef _MSC_VER
+    #define int64_t signed __int64
+    #define uint64_t unsigned __int64
+    #define uint32_t unsigned __int32
+    #define _complex_float_t _Fcomplex
+    #define _complex_float_ctor(r, i) _FCbuild(r, i)
+#else
+    #include <stdint.h>
+    #define _complex_float_t complex float
+    #define _complex_float_ctor(r, i) (r + I * i)
+#endif
+#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
+#include <numpy/ndarrayobject.h>
+#include <numpy/arrayscalars.h>
+
+#include "_arraystruct.h"
+
+
+#if (PY_MAJOR_VERSION == 3) && (PY_MINOR_VERSION == 11)
+    /*
+     * For struct _frame
+     */
+    #include "internal/pycore_frame.h"
+#endif
+
+/*
+ * Other helpers.
+ */
+
+
+/* Fix fmod() and fmodf() for windows x64 VC 9.0 (VS 2008)
+https://support.microsoft.com/en-us/kb/982107
+*/
+static  void (*fnclex)(void) = NULL;
+
+NUMBA_EXPORT_FUNC(double)
+numba_fixed_fmod(double x, double y){
+    fnclex();  /* no inline asm in x64 =( */
+    return fmod(x, y);
+}
+
+NUMBA_EXPORT_FUNC(float)
+numba_fixed_fmodf(float x, float y) {
+    fnclex();  /* no inline asm in x64 =( */
+    return fmodf(x, y);
+}
+
+NUMBA_EXPORT_FUNC(void)
+numba_set_fnclex(void *fn){
+    fnclex = fn;
+}
+
+/* provide 64-bit division function to 32-bit platforms */
+NUMBA_EXPORT_FUNC(int64_t)
+numba_sdiv(int64_t a, int64_t b) {
+    return a / b;
+}
+
+NUMBA_EXPORT_FUNC(uint64_t)
+numba_udiv(uint64_t a, uint64_t b) {
+    return a / b;
+}
+
+/* provide 64-bit remainder function to 32-bit platforms */
+NUMBA_EXPORT_FUNC(int64_t)
+numba_srem(int64_t a, int64_t b) {
+    return a % b;
+}
+
+NUMBA_EXPORT_FUNC(uint64_t)
+numba_urem(uint64_t a, uint64_t b) {
+    return a % b;
+}
+
+/* provide frexp and ldexp; these wrappers deal with special cases
+ * (zero, nan, infinity) directly, to sidestep platform differences.
+ */
+NUMBA_EXPORT_FUNC(double)
+numba_frexp(double x, int *exp)
+{
+    if (!Py_IS_FINITE(x) || !x)
+        *exp = 0;
+    else
+        x = frexp(x, exp);
+    return x;
+}
+
+NUMBA_EXPORT_FUNC(float)
+numba_frexpf(float x, int *exp)
+{
+    if (Py_IS_NAN(x) || Py_IS_INFINITY(x) || !x)
+        *exp = 0;
+    else
+        x = frexpf(x, exp);
+    return x;
+}
+
+NUMBA_EXPORT_FUNC(double)
+numba_ldexp(double x, int exp)
+{
+    if (Py_IS_FINITE(x) && x && exp)
+        x = ldexp(x, exp);
+    return x;
+}
+
+NUMBA_EXPORT_FUNC(float)
+numba_ldexpf(float x, int exp)
+{
+    if (Py_IS_FINITE(x) && x && exp)
+        x = ldexpf(x, exp);
+    return x;
+}
+
+NUMBA_EXPORT_FUNC(double)
+numba_exp2(double x)
+{
+    return exp2(x);
+}
+
+NUMBA_EXPORT_FUNC(float)
+numba_exp2f(float x)
+{
+    return exp2f(x);
+}
+
+NUMBA_EXPORT_FUNC(double)
+numba_log2(double x)
+{
+    return log2(x);
+}
+
+NUMBA_EXPORT_FUNC(float)
+numba_log2f(float x)
+{
+    return log2f(x);
+}
+
+/* provide complex power */
+NUMBA_EXPORT_FUNC(void)
+numba_cpow(Py_complex *a, Py_complex *b, Py_complex *out) {
+    errno = 0;
+    *out = _Py_c_pow(*a, *b);
+    if (errno == EDOM) {
+        /* _Py_c_pow() doesn't bother returning the right value
+           in this case, as Python raises ZeroDivisionError */
+        out->real = out->imag = Py_NAN;
+    }
+}
+
+NUMBA_EXPORT_FUNC(void)
+numba_cpowf(_complex_float_t *a, _complex_float_t *b, _complex_float_t *out) {
+    Py_complex _a, _b, _out;
+    _a.real = crealf(*a);
+    _a.imag = cimagf(*a);
+    _b.real = crealf(*b);
+    _b.imag = cimagf(*b);
+    numba_cpow(&_a, &_b, &_out);
+    *out = _complex_float_ctor((float) _out.real, (float) _out.imag);
+}
+
+/* C99 math functions: redirect to system implementations */
+
+NUMBA_EXPORT_FUNC(double)
+numba_gamma(double x)
+{
+    return tgamma(x);
+}
+
+NUMBA_EXPORT_FUNC(float)
+numba_gammaf(float x)
+{
+    return tgammaf(x);
+}
+
+NUMBA_EXPORT_FUNC(double)
+numba_lgamma(double x)
+{
+    return lgamma(x);
+}
+
+NUMBA_EXPORT_FUNC(float)
+numba_lgammaf(float x)
+{
+    return lgammaf(x);
+}
+
+NUMBA_EXPORT_FUNC(double)
+numba_erf(double x)
+{
+    return erf(x);
+}
+
+NUMBA_EXPORT_FUNC(float)
+numba_erff(float x)
+{
+    return erff(x);
+}
+
+NUMBA_EXPORT_FUNC(double)
+numba_erfc(double x)
+{
+    return erfc(x);
+}
+
+NUMBA_EXPORT_FUNC(float)
+numba_erfcf(float x)
+{
+    return erfcf(x);
+}
+
+NUMBA_EXPORT_FUNC(float)
+numba_nextafterf(float a, float b)
+{
+    return nextafterf(a, b);
+}
+
+NUMBA_EXPORT_FUNC(double)
+numba_nextafter(double a, double b)
+{
+    return nextafter(a, b);
+}
+
+/* Unpack any Python complex-like object into a Py_complex structure */
+NUMBA_EXPORT_FUNC(int)
+numba_complex_adaptor(PyObject* obj, Py_complex *out) {
+    PyObject* fobj;
+    PyArray_Descr *dtype;
+    double val[2];
+
+    // Convert from python complex or numpy complex128
+    if (PyComplex_Check(obj)) {
+        out->real = PyComplex_RealAsDouble(obj);
+        out->imag = PyComplex_ImagAsDouble(obj);
+    }
+    // Convert from numpy complex64
+    else if (PyArray_IsScalar(obj, ComplexFloating)) {
+        dtype = PyArray_DescrFromScalar(obj);
+        if (dtype == NULL) {
+            return 0;
+        }
+        if (PyArray_CastScalarDirect(obj, dtype, &val[0], NPY_CDOUBLE) < 0) {
+            Py_DECREF(dtype);
+            return 0;
+        }
+        out->real = val[0];
+        out->imag = val[1];
+        Py_DECREF(dtype);
+    } else {
+        fobj = PyNumber_Float(obj);
+        if (!fobj) return 0;
+        out->real = PyFloat_AsDouble(fobj);
+        out->imag = 0.;
+        Py_DECREF(fobj);
+    }
+    return 1;
+}
+
+/* Minimum PyBufferObject structure to hack inside it */
+typedef struct {
+    PyObject_HEAD
+    PyObject *b_base;
+    void *b_ptr;
+    Py_ssize_t b_size;
+    Py_ssize_t b_offset;
+}  PyBufferObject_Hack;
+
+/*
+Get data address of record data buffer
+*/
+NUMBA_EXPORT_FUNC(void *)
+numba_extract_record_data(PyObject *recordobj, Py_buffer *pbuf) {
+    PyObject *attrdata;
+    void *ptr;
+
+    attrdata = PyObject_GetAttrString(recordobj, "data");
+    if (!attrdata) return NULL;
+
+    if (-1 == PyObject_GetBuffer(attrdata, pbuf, 0)){
+        Py_DECREF(attrdata);
+        return NULL;
+    } else {
+        ptr = pbuf->buf;
+    }
+    Py_DECREF(attrdata);
+    return ptr;
+}
+
+/*
+ * Return a record instance with dtype as the record type, and backed
+ * by a copy of the memory area pointed to by (pdata, size).
+ */
+NUMBA_EXPORT_FUNC(PyObject *)
+numba_recreate_record(void *pdata, int size, PyObject *dtype) {
+    PyObject *numpy = NULL;
+    PyObject *numpy_record = NULL;
+    PyObject *aryobj = NULL;
+    PyObject *dtypearg = NULL;
+    PyObject *record = NULL;
+    PyArray_Descr *descr = NULL;
+
+    if (dtype == NULL) {
+        PyErr_Format(PyExc_RuntimeError,
+            "In 'numba_recreate_record', 'dtype' is NULL");
+        return NULL;
+    }
+
+    numpy = PyImport_ImportModuleNoBlock("numpy");
+    if (!numpy) goto CLEANUP;
+
+    numpy_record = PyObject_GetAttrString(numpy, "record");
+    if (!numpy_record) goto CLEANUP;
+
+    dtypearg = PyTuple_Pack(2, numpy_record, dtype);
+    if (!dtypearg || !PyArray_DescrConverter(dtypearg, &descr))
+        goto CLEANUP;
+
+    /* This steals a reference to descr, so we don't have to DECREF it */
+    aryobj = PyArray_FromString(pdata, size, descr, 1, NULL);
+    if (!aryobj) goto CLEANUP;
+
+    record = PySequence_GetItem(aryobj, 0);
+
+CLEANUP:
+    Py_XDECREF(numpy);
+    Py_XDECREF(numpy_record);
+    Py_XDECREF(aryobj);
+    Py_XDECREF(dtypearg);
+
+    return record;
+}
+
+NUMBA_EXPORT_FUNC(int)
+numba_adapt_ndarray(PyObject *obj, arystruct_t* arystruct) {
+    PyArrayObject *ndary;
+    int i, ndim;
+    npy_intp *p;
+
+    if (!PyArray_Check(obj)) {
+        return -1;
+    }
+
+    ndary = (PyArrayObject*)obj;
+    ndim = PyArray_NDIM(ndary);
+
+    arystruct->data = PyArray_DATA(ndary);
+    arystruct->nitems = PyArray_SIZE(ndary);
+    arystruct->itemsize = PyArray_ITEMSIZE(ndary);
+    arystruct->parent = obj;
+    p = arystruct->shape_and_strides;
+    for (i = 0; i < ndim; i++, p++) {
+        *p = PyArray_DIM(ndary, i);
+    }
+    for (i = 0; i < ndim; i++, p++) {
+        *p = PyArray_STRIDE(ndary, i);
+    }
+    arystruct->meminfo = NULL;
+    return 0;
+}
+
+NUMBA_EXPORT_FUNC(int)
+numba_get_buffer(PyObject *obj, Py_buffer *buf)
+{
+    /* Ask for shape and strides, but no suboffsets */
+    return PyObject_GetBuffer(obj, buf, PyBUF_RECORDS_RO);
+}
+
+NUMBA_EXPORT_FUNC(void)
+numba_adapt_buffer(Py_buffer *buf, arystruct_t *arystruct)
+{
+    int i;
+    npy_intp *p;
+
+    arystruct->data = buf->buf;
+    arystruct->itemsize = buf->itemsize;
+    arystruct->parent = buf->obj;
+    arystruct->nitems = 1;
+    p = arystruct->shape_and_strides;
+    for (i = 0; i < buf->ndim; i++, p++) {
+        *p = buf->shape[i];
+        arystruct->nitems *= buf->shape[i];
+    }
+    for (i = 0; i < buf->ndim; i++, p++) {
+        *p = buf->strides[i];
+    }
+    arystruct->meminfo = NULL;
+}
+
+NUMBA_EXPORT_FUNC(void)
+numba_release_buffer(Py_buffer *buf)
+{
+    PyBuffer_Release(buf);
+}
+
+NUMBA_EXPORT_FUNC(PyObject *)
+numba_ndarray_new(int nd,
+                  npy_intp *dims,   /* shape */
+                  npy_intp *strides,
+                  void* data,
+                  int type_num,
+                  int itemsize)
+{
+    PyObject *ndary;
+    int flags = NPY_ARRAY_BEHAVED;
+    ndary = PyArray_New((PyTypeObject*)&PyArray_Type, nd, dims, type_num,
+                       strides, data, 0, flags, NULL);
+    return ndary;
+}
+
+
+/*
+ * Handle reshaping of zero-sized array.
+ * See numba_attempt_nocopy_reshape() below.
+ */
+static int
+nocopy_empty_reshape(npy_intp nd, const npy_intp *dims, const npy_intp *strides,
+                     npy_intp newnd, const npy_intp *newdims,
+                     npy_intp *newstrides, npy_intp itemsize,
+                     int is_f_order)
+{
+    int i;
+    /* Just make the strides vaguely reasonable
+     * (they can have any value in theory).
+     */
+    for (i = 0; i < newnd; i++)
+        newstrides[i] = itemsize;
+    return 1;  /* reshape successful */
+}
+
+/*
+ * Straight from Numpy's _attempt_nocopy_reshape()
+ * (np/core/src/multiarray/shape.c).
+ * Attempt to reshape an array without copying data
+ *
+ * This function should correctly handle all reshapes, including
+ * axes of length 1. Zero strides should work but are untested.
+ *
+ * If a copy is needed, returns 0
+ * If no copy is needed, returns 1 and fills `npy_intp *newstrides`
+ *     with appropriate strides
+ */
+
+NUMBA_EXPORT_FUNC(int)
+numba_attempt_nocopy_reshape(npy_intp nd, const npy_intp *dims, const npy_intp *strides,
+                             npy_intp newnd, const npy_intp *newdims,
+                             npy_intp *newstrides, npy_intp itemsize,
+                             int is_f_order)
+{
+    int oldnd;
+    npy_intp olddims[NPY_MAXDIMS];
+    npy_intp oldstrides[NPY_MAXDIMS];
+    npy_intp np, op, last_stride;
+    int oi, oj, ok, ni, nj, nk;
+
+    oldnd = 0;
+    /*
+     * Remove axes with dimension 1 from the old array. They have no effect
+     * but would need special cases since their strides do not matter.
+     */
+    for (oi = 0; oi < nd; oi++) {
+        if (dims[oi]!= 1) {
+            olddims[oldnd] = dims[oi];
+            oldstrides[oldnd] = strides[oi];
+            oldnd++;
+        }
+    }
+
+    np = 1;
+    for (ni = 0; ni < newnd; ni++) {
+        np *= newdims[ni];
+    }
+    op = 1;
+    for (oi = 0; oi < oldnd; oi++) {
+        op *= olddims[oi];
+    }
+    if (np != op) {
+        /* different total sizes; no hope */
+        return 0;
+    }
+
+    if (np == 0) {
+        /* the Numpy code does not handle 0-sized arrays */
+        return nocopy_empty_reshape(nd, dims, strides,
+                                    newnd, newdims, newstrides,
+                                    itemsize, is_f_order);
+    }
+
+    /* oi to oj and ni to nj give the axis ranges currently worked with */
+    oi = 0;
+    oj = 1;
+    ni = 0;
+    nj = 1;
+    while (ni < newnd && oi < oldnd) {
+        np = newdims[ni];
+        op = olddims[oi];
+
+        while (np != op) {
+            if (np < op) {
+                /* Misses trailing 1s, these are handled later */
+                np *= newdims[nj++];
+            } else {
+                op *= olddims[oj++];
+            }
+        }
+
+        /* Check whether the original axes can be combined */
+        for (ok = oi; ok < oj - 1; ok++) {
+            if (is_f_order) {
+                if (oldstrides[ok+1] != olddims[ok]*oldstrides[ok]) {
+                     /* not contiguous enough */
+                    return 0;
+                }
+            }
+            else {
+                /* C order */
+                if (oldstrides[ok] != olddims[ok+1]*oldstrides[ok+1]) {
+                    /* not contiguous enough */
+                    return 0;
+                }
+            }
+        }
+
+        /* Calculate new strides for all axes currently worked with */
+        if (is_f_order) {
+            newstrides[ni] = oldstrides[oi];
+            for (nk = ni + 1; nk < nj; nk++) {
+                newstrides[nk] = newstrides[nk - 1]*newdims[nk - 1];
+            }
+        }
+        else {
+            /* C order */
+            newstrides[nj - 1] = oldstrides[oj - 1];
+            for (nk = nj - 1; nk > ni; nk--) {
+                newstrides[nk - 1] = newstrides[nk]*newdims[nk];
+            }
+        }
+        ni = nj++;
+        oi = oj++;
+    }
+
+    /*
+     * Set strides corresponding to trailing 1s of the new shape.
+     */
+    if (ni >= 1) {
+        last_stride = newstrides[ni - 1];
+    }
+    else {
+        last_stride = itemsize;
+    }
+    if (is_f_order) {
+        last_stride *= newdims[ni - 1];
+    }
+    for (nk = ni; nk < newnd; nk++) {
+        newstrides[nk] = last_stride;
+    }
+
+    return 1;
+}
+
+/*
+ * Cython utilities.
+ */
+
+/* Fetch the address of the given function, as exposed by
+   a cython module */
+static void *
+import_cython_function(const char *module_name, const char *function_name)
+{
+    PyObject *module, *capi, *cobj;
+    void *res = NULL;
+    const char *capsule_name;
+
+    module = PyImport_ImportModule(module_name);
+    if (module == NULL)
+        return NULL;
+    capi = PyObject_GetAttrString(module, "__pyx_capi__");
+    Py_DECREF(module);
+    if (capi == NULL)
+        return NULL;
+    cobj = PyMapping_GetItemString(capi, (char *)function_name);
+    Py_DECREF(capi);
+    if (cobj == NULL) {
+        PyErr_Clear();
+        PyErr_Format(PyExc_ValueError,
+                     "No function '%s' found in __pyx_capi__ of '%s'",
+                     function_name, module_name);
+        return NULL;
+    }
+    /* 2.7+ => Cython exports a PyCapsule */
+    capsule_name = PyCapsule_GetName(cobj);
+    if (capsule_name != NULL) {
+        res = PyCapsule_GetPointer(cobj, capsule_name);
+    }
+    Py_DECREF(cobj);
+    return res;
+}
+
+NUMBA_EXPORT_FUNC(PyObject *)
+_numba_import_cython_function(PyObject *self, PyObject *args)
+{
+    const char *module_name;
+    const char *function_name;
+    void *p = NULL;
+    PyObject *res;
+
+    if (!PyArg_ParseTuple(args, "ss", &module_name, &function_name)) {
+        return NULL;
+    }
+    p = import_cython_function(module_name, function_name);
+    if (p == NULL) {
+        return NULL;
+    }
+    res = PyLong_FromVoidPtr(p);
+    if (res == NULL) {
+      PyErr_SetString(PyExc_RuntimeError,
+                      "Could not convert function address to int");
+      return NULL;
+    }
+    return res;
+}
+
+/* We use separate functions for datetime64 and timedelta64, to ensure
+ * proper type checking.
+ */
+NUMBA_EXPORT_FUNC(npy_int64)
+numba_extract_np_datetime(PyObject *td)
+{
+    if (!PyArray_IsScalar(td, Datetime)) {
+        PyErr_SetString(PyExc_TypeError,
+                        "expected a numpy.datetime64 object");
+        return -1;
+    }
+    return PyArrayScalar_VAL(td, Timedelta);
+}
+
+NUMBA_EXPORT_FUNC(npy_int64)
+numba_extract_np_timedelta(PyObject *td)
+{
+    if (!PyArray_IsScalar(td, Timedelta)) {
+        PyErr_SetString(PyExc_TypeError,
+                        "expected a numpy.timedelta64 object");
+        return -1;
+    }
+    return PyArrayScalar_VAL(td, Timedelta);
+}
+
+NUMBA_EXPORT_FUNC(PyObject *)
+numba_create_np_datetime(npy_int64 value, int unit_code)
+{
+    PyDatetimeScalarObject *obj = (PyDatetimeScalarObject *)
+        PyArrayScalar_New(Datetime);
+    if (obj != NULL) {
+        obj->obval = value;
+        obj->obmeta.base = unit_code;
+        obj->obmeta.num = 1;
+    }
+    return (PyObject *) obj;
+}
+
+NUMBA_EXPORT_FUNC(PyObject *)
+numba_create_np_timedelta(npy_int64 value, int unit_code)
+{
+    PyTimedeltaScalarObject *obj = (PyTimedeltaScalarObject *)
+        PyArrayScalar_New(Timedelta);
+    if (obj != NULL) {
+        obj->obval = value;
+        obj->obmeta.base = unit_code;
+        obj->obmeta.num = 1;
+    }
+    return (PyObject *) obj;
+}
+
+NUMBA_EXPORT_FUNC(uint64_t)
+numba_fptoui(double x) {
+    /* First cast to signed int of the full width to make sure sign extension
+       happens (this can make a difference on some platforms...). */
+    return (uint64_t) (int64_t) x;
+}
+
+NUMBA_EXPORT_FUNC(uint64_t)
+numba_fptouif(float x) {
+    return (uint64_t) (int64_t) x;
+}
+
+NUMBA_EXPORT_FUNC(void)
+numba_gil_ensure(PyGILState_STATE *state) {
+    *state = PyGILState_Ensure();
+}
+
+NUMBA_EXPORT_FUNC(void)
+numba_gil_release(PyGILState_STATE *state) {
+    PyGILState_Release(*state);
+}
+
+NUMBA_EXPORT_FUNC(PyObject *)
+numba_py_type(PyObject *obj) {
+    return (PyObject *) Py_TYPE(obj);
+}
+
+
+/*
+ * Functions for tagging an arbitrary Python object with an arbitrary pointer.
+ * These functions make strong lifetime assumptions, see below.
+ */
+
+static PyObject *private_data_dict = NULL;
+
+static PyObject *
+_get_private_data_dict(void)
+{
+    if (private_data_dict == NULL)
+        private_data_dict = PyDict_New();
+    return private_data_dict;
+}
+
+NUMBA_EXPORT_FUNC(void)
+numba_set_pyobject_private_data(PyObject *obj, void *ptr)
+{
+    PyObject *dct = _get_private_data_dict();
+    /* This assumes the reference to setobj is kept alive until the
+       call to numba_reset_set_private_data()! */
+    PyObject *key = PyLong_FromVoidPtr((void *) obj);
+    PyObject *value = PyLong_FromVoidPtr(ptr);
+
+    if (!dct || !value || !key)
+        goto error;
+    if (PyDict_SetItem(dct, key, value))
+        goto error;
+    Py_DECREF(key);
+    Py_DECREF(value);
+    return;
+
+error:
+    Py_FatalError("unable to set private data");
+}
+
+NUMBA_EXPORT_FUNC(void *)
+numba_get_pyobject_private_data(PyObject *obj)
+{
+    PyObject *dct = _get_private_data_dict();
+    PyObject *value, *key = PyLong_FromVoidPtr((void *) obj);
+    void *ptr;
+    if (!dct || !key)
+        goto error;
+
+    value = PyDict_GetItem(dct, key);
+    Py_DECREF(key);
+    if (!value)
+        return NULL;
+    else {
+        ptr = PyLong_AsVoidPtr(value);
+        if (ptr == NULL && PyErr_Occurred())
+            goto error;
+        return ptr;
+    }
+
+error:
+    Py_FatalError("unable to get private data");
+    return NULL;
+}
+
+NUMBA_EXPORT_FUNC(void)
+numba_reset_pyobject_private_data(PyObject *obj)
+{
+    PyObject *dct = _get_private_data_dict();
+    PyObject *key = PyLong_FromVoidPtr((void *) obj);
+
+    if (!key)
+        goto error;
+    if (PyDict_DelItem(dct, key))
+        PyErr_Clear();
+    Py_DECREF(key);
+    return;
+
+error:
+    Py_FatalError("unable to reset private data");
+}
+
+NUMBA_EXPORT_FUNC(int)
+numba_unpack_slice(PyObject *obj,
+                   Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step)
+{
+    PySliceObject *slice = (PySliceObject *) obj;
+    if (!PySlice_Check(obj)) {
+        PyErr_Format(PyExc_TypeError,
+                     "Expected a slice object, got '%s'",
+                     Py_TYPE(slice)->tp_name);
+        return -1;
+    }
+#define FETCH_MEMBER(NAME, DEFAULT)                             \
+    if (slice->NAME != Py_None) {                               \
+        Py_ssize_t v = PyNumber_AsSsize_t(slice->NAME,          \
+                                          PyExc_OverflowError); \
+        if (v == -1 && PyErr_Occurred())                        \
+            return -1;                                          \
+        *NAME = v;                                              \
+    }                                                           \
+    else {                                                      \
+        *NAME = DEFAULT;                                        \
+    }
+    FETCH_MEMBER(step, 1)
+    FETCH_MEMBER(stop, (*step > 0) ? PY_SSIZE_T_MAX : PY_SSIZE_T_MIN)
+    FETCH_MEMBER(start, (*step > 0) ? 0 : PY_SSIZE_T_MAX)
+    return 0;
+
+#undef FETCH_MEMBER
+}
+
+NUMBA_EXPORT_FUNC(int)
+numba_fatal_error(void)
+{
+    PyGILState_Ensure();
+    Py_FatalError("in Numba-compiled function");
+    return 0; /* unreachable */
+}
+
+/* Insert a frame into the traceback for (funcname, filename, lineno). */
+/* This function is CPython's _PyTraceback_Add, renamed, see:
+ * https://github.com/python/cpython/blob/d545869d084e70d4838310e79b52a25a72a1ca56/Python/traceback.c#L246
+ * and modified for Python 2.x based on
+ * https://github.com/python/cpython/blob/2e1a34025cde19bddf12a2eac8fedb6afcca8339/Modules/_ctypes/callbacks.c#L151-L174
+ */
+static void traceback_add(const char *funcname, const char *filename, int lineno)
+{
+    PyObject *globals = NULL;
+    PyCodeObject *code = NULL;
+    PyFrameObject *frame = NULL;
+    PyObject *exc, *val, *tb;
+
+    /* Save and clear the current exception. Python functions must not be
+       called with an exception set. Calling Python functions happens when
+       the codec of the filesystem encoding is implemented in pure Python. */
+    PyErr_Fetch(&exc, &val, &tb);
+
+    globals = PyDict_New();
+    if (!globals)
+        goto error;
+    code = PyCode_NewEmpty(filename, funcname, lineno);
+    if (!code) {
+        goto error;
+    }
+    frame = PyFrame_New(PyThreadState_Get(), code, globals, NULL);
+    Py_DECREF(globals);
+    Py_DECREF(code);
+    if (!frame)
+        goto error;
+
+#if (PY_MAJOR_VERSION == 3) && (PY_MINOR_VERSION == 11) /* 3.11 */
+
+    /* unsafe cast to our copy of _frame to access the f_lineno field */
+    typedef struct _frame py_frame;
+    py_frame* hacked_frame = (py_frame*)frame;
+    hacked_frame->f_lineno = lineno;
+
+#elif (PY_MAJOR_VERSION == 3) && (PY_MINOR_VERSION < 11) /* <3.11 */
+    frame->f_lineno = lineno;
+#else
+    #error "Check if struct _frame has been changed in the new version"
+#endif
+    PyErr_Restore(exc, val, tb);
+    PyTraceBack_Here(frame);
+    Py_DECREF(frame);
+    return;
+
+error:
+    _PyErr_ChainExceptions(exc, val, tb);
+}
+
+
+/*
+ * Add traceback information to *loc* to the active exception.
+ * loc can be NULL, which causes this function to become a no-op.
+ */
+static
+void traceback_add_loc(PyObject *loc) {
+    const char *function_name_str = NULL, *filename_str = NULL;
+    PyObject *function_name = NULL, *filename = NULL, *lineno = NULL;
+    Py_ssize_t pos;
+
+    /* instance is instantiated/internal exception is raised, if loc is present
+     * add a frame for it into the traceback */
+    if(loc && loc != Py_None && PyTuple_Check(loc))
+    {
+        pos = 0;
+        function_name = PyTuple_GET_ITEM(loc, pos);
+        function_name_str = PyString_AsString(function_name);
+        pos = 1;
+        filename = PyTuple_GET_ITEM(loc, pos);
+        filename_str = PyString_AsString(filename);
+        pos = 2;
+        lineno = PyTuple_GET_ITEM(loc, pos);
+        traceback_add(function_name_str, filename_str, \
+                      (int)PyLong_AsLong(lineno));
+    }
+}
+
+/**
+ * Re-raise the current active exception.
+ * Called internal by process_raise() when *exc* is None.
+ */
+static
+int reraise_exc_is_none(void) {
+    /* Reraise */
+    PyObject *tb, *type, *value;
+
+#if (PY_MAJOR_VERSION >= 3) && (PY_MINOR_VERSION >= 11)
+    PyErr_GetExcInfo(&type, &value, &tb);
+#elif (PY_MAJOR_VERSION >= 3) && (PY_MINOR_VERSION >= 8)
+    PyThreadState *tstate = PyThreadState_GET();
+    _PyErr_StackItem *tstate_exc = tstate->exc_info;
+    type = tstate_exc->exc_type;
+    value = tstate_exc->exc_value;
+    tb = tstate_exc->exc_traceback;
+#endif
+    if (type == Py_None) {
+        PyErr_SetString(PyExc_RuntimeError,
+                        "No active exception to reraise");
+        return 0;
+    }
+    /* incref needed because PyErr_Restore DOES NOT */
+    Py_XINCREF(type);
+    Py_XINCREF(value);
+    Py_XINCREF(tb);
+    PyErr_Restore(type, value, tb);
+    return 1;
+}
+
+/*
+ * Set exception given the Exception type and the constructor argument.
+ * Equivalent to ``raise exc(value)``.
+ * PyExceptionClass_Check(exc) must be True.
+ * value can be NULL.
+ */
+static
+int process_exception_class(PyObject *exc, PyObject *value) {
+    PyObject *type;
+    /* It is a class, type used here just as a tmp var */
+    type = PyObject_CallObject(exc, value);
+    if (type == NULL){
+        return 0;
+    }
+    if (!PyExceptionInstance_Check(type)) {
+        PyErr_SetString(PyExc_TypeError,
+                        "exceptions must derive from BaseException");
+        Py_DECREF(type);
+        return 0;
+    }
+    /* all ok, set type to the exc */
+    Py_DECREF(type);
+    type = exc;
+    PyErr_SetObject(type, value);
+    return 1;
+}
+
+/*
+ * Internal routine to process exceptions.
+ * exc cannot be NULL. It can be a None, Exception type, or Exception instance.
+ * value can be NULL for absent, or any PyObject valid for the exception.
+ */
+static
+int process_raise(PyObject *exc, PyObject *value) {
+    /* exc is None */
+    if (exc == Py_None) {
+        return reraise_exc_is_none();
+    }
+    /* exc should be an exception class */
+    else if (PyExceptionClass_Check(exc)) {
+        return process_exception_class(exc, value);
+    }
+    /* exc is an instance of an Exception */
+    else if (PyExceptionInstance_Check(exc)) {
+        PyObject *type = PyExceptionInstance_Class(exc);
+        PyErr_SetObject(type, exc);
+        return 0;
+    }
+    else {
+        /* Not something you can raise.  You get an exception
+        anyway, just not what you specified :-) */
+        PyErr_SetString(PyExc_TypeError,
+                        "exceptions must derive from BaseException");
+        return 0;
+    }
+}
+
+/* Logic for raising an arbitrary object.  Adapted from CPython's ceval.c.
+   This *consumes* a reference count to its argument. */
+NUMBA_EXPORT_FUNC(int)
+numba_do_raise(PyObject *exc_packed)
+{
+    int status;
+    PyObject *exc = NULL, *value = NULL, *loc = NULL;
+
+    /* We support the following forms of raise:
+       raise
+       raise <instance>
+       raise <type> */
+
+    /* could be a tuple from npm (some exc like thing, args, location) */
+    if (PyTuple_CheckExact(exc_packed)) {
+        /* Unpack a (class/inst/tuple, arguments, location) tuple. */
+        if (!PyArg_ParseTuple(exc_packed, "OOO", &exc, &value, &loc)) {
+            traceback_add_loc(loc);
+            return 0;
+        }
+    } else {
+        /* could be a reraise or an exception from objmode */
+        exc = exc_packed;
+        /* branch exit with value = NULL and loc = NULL */
+    }
+    /* value is either NULL or borrowed */
+    status = process_raise(exc, value);
+    traceback_add_loc(loc);
+    Py_DECREF(exc_packed);
+    return status;
+}
+
+#ifdef PYCC_COMPILING
+/* AOT avoid the use of `numba.core.serialize` */
+NUMBA_EXPORT_FUNC(PyObject *)
+numba_unpickle(const char *data, int n, const char *hashed)
+{
+    PyObject *buf, *obj;
+    static PyObject *loads;
+
+    /* Caching the pickle.loads function shaves a couple µs here. */
+    if (loads == NULL) {
+        PyObject *picklemod;
+        picklemod = PyImport_ImportModule("pickle");
+        if (picklemod == NULL)
+            return NULL;
+        loads = PyObject_GetAttrString(picklemod, "loads");
+        Py_DECREF(picklemod);
+        if (loads == NULL)
+            return NULL;
+    }
+
+    buf = PyBytes_FromStringAndSize(data, n);
+    if (buf == NULL)
+        return NULL;
+    obj = PyObject_CallFunctionObjArgs(loads, buf, NULL);
+    Py_DECREF(buf);
+    return obj;
+}
+
+#else
+
+NUMBA_EXPORT_FUNC(PyObject *)
+numba_unpickle(const char *data, int n, const char *hashed)
+{
+    PyObject *buf=NULL, *obj=NULL, *addr=NULL, *hashedbuf=NULL;
+    static PyObject *loads=NULL;
+
+    /* Caching the _numba_unpickle function shaves a couple µs here. */
+    if (loads == NULL) {
+        PyObject *picklemod;
+        picklemod = PyImport_ImportModule("numba.core.serialize");
+        if (picklemod == NULL)
+            return NULL;
+        loads = PyObject_GetAttrString(picklemod, "_numba_unpickle");
+        Py_DECREF(picklemod);
+        if (loads == NULL)
+            return NULL;
+    }
+
+    buf = PyBytes_FromStringAndSize(data, n);
+    if (buf == NULL)
+        return NULL;
+    /* SHA1 produces 160 bit or 20 bytes */
+    hashedbuf = PyBytes_FromStringAndSize(hashed, 20);
+    if (hashedbuf == NULL)
+        goto error;
+    addr = PyLong_FromVoidPtr((void*)data);
+    if (addr == NULL)
+        goto error;
+    obj = PyObject_CallFunctionObjArgs(loads, addr, buf, hashedbuf, NULL);
+error:
+    Py_XDECREF(addr);
+    Py_XDECREF(hashedbuf);
+    Py_DECREF(buf);
+    return obj;
+}
+#endif
+
+NUMBA_EXPORT_FUNC(PyObject *)
+numba_runtime_build_excinfo_struct(PyObject* struct_gv, PyObject* exc_args)
+{
+    PyObject *obj = NULL;
+    static PyObject *func = NULL;
+
+    /* Caching the function shaves a couple µs here. */
+    if (func == NULL)
+    {
+        PyObject *picklemod;
+        picklemod = PyImport_ImportModule("numba.core.serialize");
+        if (picklemod == NULL)
+            return NULL;
+        func = PyObject_GetAttrString(picklemod,
+                                      "runtime_build_excinfo_struct");
+        Py_DECREF(picklemod);
+        if (func == NULL)
+            return NULL;
+    }
+
+    obj = PyObject_CallFunctionObjArgs(func, struct_gv, exc_args, NULL);
+    // func returns None on failure (i.e. can't serialize one of the args).
+    // Is there a better way to handle this? raise an exception here?
+    return obj;
+}
+
+/*
+ * Unicode helpers
+ */
+
+/* Developer note:
+ *
+ * The hash value of unicode objects is obtained via:
+ * ((PyASCIIObject *)(obj))->hash;
+ * The use comes from this definition:
+ * https://github.com/python/cpython/blob/6d43f6f081023b680d9db4542d19b9e382149f0a/Objects/unicodeobject.c#L119-L120
+ * and it's used extensively throughout the `cpython/Object/unicodeobject.c`
+ * source, not least in `unicode_hash` itself:
+ * https://github.com/python/cpython/blob/6d43f6f081023b680d9db4542d19b9e382149f0a/Objects/unicodeobject.c#L11662-L11679
+ *
+ * The Unicode string struct layouts are described here:
+ * https://github.com/python/cpython/blob/6d43f6f081023b680d9db4542d19b9e382149f0a/Include/cpython/unicodeobject.h#L82-L161
+ * essentially, all the unicode string layouts start with a `PyASCIIObject` at
+ * offset 0 (as of commit 6d43f6f081023b680d9db4542d19b9e382149f0a, somewhere
+ * in the 3.8 development cycle).
+ *
+ * For safety against future CPython internal changes, the code checks that the
+ * _base members of the unicode structs are what is expected in 3.7, and that
+ * their offset is 0. It then walks the struct to the hash location to make sure
+ * the offset is indeed the same as PyASCIIObject->hash.
+ * Note: The large condition in the if should evaluate to a compile time
+ * constant.
+ */
+
+#define MEMBER_SIZE(structure, member) sizeof(((structure *)0)->member)
+
+NUMBA_EXPORT_FUNC(void *)
+numba_extract_unicode(PyObject *obj, Py_ssize_t *length, int *kind,
+                      unsigned int *ascii, Py_ssize_t *hash) {
+    if (!PyUnicode_READY(obj)) {
+        *length = PyUnicode_GET_LENGTH(obj);
+        *kind = PyUnicode_KIND(obj);
+        /* could also use PyUnicode_IS_ASCII but it is not publicly advertised in https://docs.python.org/3/c-api/unicode.html */
+        *ascii = (unsigned int)(PyUnicode_MAX_CHAR_VALUE(obj) == (0x7f));
+        /* this is here as a crude check for safe casting of all unicode string
+         * structs to a PyASCIIObject */
+        if (MEMBER_SIZE(PyCompactUnicodeObject, _base) == sizeof(PyASCIIObject)             &&
+            MEMBER_SIZE(PyUnicodeObject, _base) == sizeof(PyCompactUnicodeObject)           &&
+            offsetof(PyCompactUnicodeObject, _base) == 0                                    &&
+            offsetof(PyUnicodeObject, _base) == 0                                           &&
+            offsetof(PyCompactUnicodeObject, _base.hash) == offsetof(PyASCIIObject, hash)   &&
+            offsetof(PyUnicodeObject, _base._base.hash) == offsetof(PyASCIIObject, hash)
+           ) {
+            /* Grab the hash from the type object cache, do not compute it. */
+            *hash = ((PyASCIIObject *)(obj))->hash;
+        }
+        else {
+            /* cast is not safe, fail */
+            return NULL;
+        }
+        return PyUnicode_DATA(obj);
+    } else {
+        return NULL;
+    }
+}
+
+/* this is late included as it #defines e.g. SHIFT that should not impact
+ * the above */
+#include "_unicodetype_db.h"
+
+/* This function is a modified copy of the private function gettyperecord from
+ * CPython's Objects/unicodectype.c
+ *
+ * See:https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodectype.c#L45-L59
+ */
+NUMBA_EXPORT_FUNC(void)
+numba_gettyperecord(Py_UCS4 code, int *upper, int *lower, int *title,
+                    unsigned char *decimal, unsigned char *digit,
+                    unsigned short *flags)
+{
+    int index;
+    const numba_PyUnicode_TypeRecord *rec;
+
+    if (code >= 0x110000)
+        index = 0;
+    else
+    {
+        index = index1[(code>>SHIFT)];
+        index = index2[(index<<SHIFT)+(code&((1<<SHIFT)-1))];
+    }
+
+    rec = &numba_PyUnicode_TypeRecords[index];
+    *upper = rec->upper;
+    *lower = rec->lower;
+    *title = rec->title;
+    *decimal = rec->decimal;
+    *digit = rec->digit;
+    *flags = rec->flags;
+}
+
+/* This function provides a consistent access point for the
+ * _PyUnicode_ExtendedCase array defined in CPython's Objects/unicodectype.c
+ * and now also as numba_PyUnicode_ExtendedCase in Numba's _unicodetype_db.h
+ */
+NUMBA_EXPORT_FUNC(Py_UCS4)
+numba_get_PyUnicode_ExtendedCase(int code)
+{
+    return numba_PyUnicode_ExtendedCase[code];
+}
+
+/* from _unicodetype_db.h */
+#undef SHIFT
+
+/*
+ * defined break point for gdb
+ */
+NUMBA_EXPORT_FUNC(void)
+numba_gdb_breakpoint(void) {
+  /* does nothing */
+}
+
+/*
+ * Define bridge for all math functions
+ */
+
+#define MATH_UNARY(F, R, A) \
+    NUMBA_EXPORT_FUNC(R) numba_##F(A a) { return F(a); }
+#define MATH_BINARY(F, R, A, B) \
+    NUMBA_EXPORT_FUNC(R) numba_##F(A a, B b) { return F(a, b); }
+
+#include "mathnames.h"
+
+#undef MATH_UNARY
+#undef MATH_BINARY
+
+/*
+ * BLAS and LAPACK wrappers
+ */
+
+#include "_lapack.c"
+
+/*
+ * PRNG support
+ */
+
+#include "_random.c"
--- a/numba/_helpermod.c
+++ b/numba/_helpermod.c
+/*
+Expose all functions as pointers in a dedicated C extension.
+*/
+#include "cext/cext.h"
+/* Import _pymodule.h first, for a recent _POSIX_C_SOURCE */
+#include "_pymodule.h"
+
+#include <math.h>
+#ifdef _MSC_VER
+    #define false 0
+    #define true 1
+    #define bool int
+#else
+    #include <stdbool.h>
+#endif
+
+/*
+Include C-extension here
+*/
+#include "cext/cext.h"
+
+/* Numba C helpers */
+#include "_helperlib.c"
+
+static PyObject *
+build_c_helpers_dict(void)
+{
+    PyObject *dct = PyDict_New();
+    if (dct == NULL)
+        goto error;
+
+#define _declpointer(name, value) do {                 \
+    PyObject *o = PyLong_FromVoidPtr(value);           \
+    if (o == NULL) goto error;                         \
+    if (PyDict_SetItemString(dct, name, o)) {          \
+        Py_DECREF(o);                                  \
+        goto error;                                    \
+    }                                                  \
+    Py_DECREF(o);                                      \
+} while (0)
+
+#define declmethod(func) _declpointer(#func, &numba_##func)
+
+#define declpointer(ptr) _declpointer(#ptr, &numba_##ptr)
+
+    declmethod(fixed_fmod);
+    declmethod(fixed_fmodf);
+    declmethod(set_fnclex);
+
+    declmethod(sdiv);
+    declmethod(srem);
+    declmethod(udiv);
+    declmethod(urem);
+    declmethod(frexp);
+    declmethod(frexpf);
+    declmethod(ldexp);
+    declmethod(ldexpf);
+    declmethod(exp2);
+    declmethod(exp2f);
+    declmethod(log2);
+    declmethod(log2f);
+    declmethod(cpow);
+    declmethod(cpowf);
+    declmethod(erf);
+    declmethod(erff);
+    declmethod(erfc);
+    declmethod(erfcf);
+    declmethod(gamma);
+    declmethod(gammaf);
+    declmethod(lgamma);
+    declmethod(lgammaf);
+    declmethod(nextafter);
+    declmethod(nextafterf);
+    declmethod(complex_adaptor);
+    declmethod(adapt_ndarray);
+    declmethod(ndarray_new);
+    declmethod(extract_record_data);
+    declmethod(get_buffer);
+    declmethod(adapt_buffer);
+    declmethod(release_buffer);
+    declmethod(extract_np_datetime);
+    declmethod(create_np_datetime);
+    declmethod(extract_np_timedelta);
+    declmethod(create_np_timedelta);
+    declmethod(recreate_record);
+    declmethod(fptoui);
+    declmethod(fptouif);
+    declmethod(gil_ensure);
+    declmethod(gil_release);
+    declmethod(fatal_error);
+    declmethod(py_type);
+    declmethod(unpack_slice);
+    declmethod(do_raise);
+    declmethod(unpickle);
+    declmethod(runtime_build_excinfo_struct);
+    declmethod(attempt_nocopy_reshape);
+    declmethod(get_pyobject_private_data);
+    declmethod(set_pyobject_private_data);
+    declmethod(reset_pyobject_private_data);
+
+    /* BLAS / LAPACK */
+    declmethod(xxgemm);
+    declmethod(xxgemv);
+    declmethod(xxdot);
+    declmethod(xxgetrf);
+    declmethod(ez_xxgetri);
+    declmethod(xxpotrf);
+    declmethod(ez_rgeev);
+    declmethod(ez_cgeev);
+    declmethod(ez_xxxevd);
+    declmethod(ez_gesdd);
+    declmethod(ez_geqrf);
+    declmethod(ez_xxgqr);
+    declmethod(ez_gelsd);
+    declmethod(xgesv);
+    declmethod(xxnrm2);
+
+    /* PRNG support */
+    declmethod(get_py_random_state);
+    declmethod(get_np_random_state);
+    declmethod(get_internal_random_state);
+    declmethod(rnd_shuffle);
+    declmethod(rnd_init);
+    declmethod(poisson_ptrs);
+
+    /* Unicode string support */
+    declmethod(extract_unicode);
+    declmethod(gettyperecord);
+    declmethod(get_PyUnicode_ExtendedCase);
+
+    /* for gdb breakpoint */
+    declmethod(gdb_breakpoint);
+
+    /* for dictionary support */
+    declmethod(test_dict);
+    declmethod(dict_new_sized);
+    declmethod(dict_set_method_table);
+    declmethod(dict_free);
+    declmethod(dict_length);
+    declmethod(dict_lookup);
+    declmethod(dict_insert);
+    declmethod(dict_insert_ez);
+    declmethod(dict_delitem);
+    declmethod(dict_popitem);
+    declmethod(dict_iter_sizeof);
+    declmethod(dict_iter);
+    declmethod(dict_iter_next);
+    declmethod(dict_dump);
+
+    /* for list support */
+    declmethod(test_list);
+    declmethod(list_new);
+    declmethod(list_set_method_table);
+    declmethod(list_free);
+    declmethod(list_base_ptr);
+    declmethod(list_size_address);
+    declmethod(list_length);
+    declmethod(list_allocated);
+    declmethod(list_is_mutable);
+    declmethod(list_set_is_mutable);
+    declmethod(list_setitem);
+    declmethod(list_getitem);
+    declmethod(list_append);
+    declmethod(list_delitem);
+    declmethod(list_delete_slice);
+    declmethod(list_iter_sizeof);
+    declmethod(list_iter);
+    declmethod(list_iter_next);
+
+#define MATH_UNARY(F, R, A) declmethod(F);
+#define MATH_BINARY(F, R, A, B) declmethod(F);
+    #include "mathnames.h"
+#undef MATH_UNARY
+#undef MATH_BINARY
+
+#undef declmethod
+    return dct;
+error:
+    Py_XDECREF(dct);
+    return NULL;
+}
+
+
+/*
+ * Helper to deal with flushing stdout
+ */
+PyAPI_FUNC(void) _numba_flush_stdout(void) ;
+
+void
+_numba_flush_stdout(void) {
+  fflush(stdout);
+}
+
+
+static PyMethodDef ext_methods[] = {
+    { "rnd_get_state", (PyCFunction) _numba_rnd_get_state, METH_O, NULL },
+    { "rnd_get_py_state_ptr", (PyCFunction) _numba_rnd_get_py_state_ptr, METH_NOARGS, NULL },
+    { "rnd_get_np_state_ptr", (PyCFunction) _numba_rnd_get_np_state_ptr, METH_NOARGS, NULL },
+    { "rnd_seed", (PyCFunction) _numba_rnd_seed, METH_VARARGS, NULL },
+    { "rnd_set_state", (PyCFunction) _numba_rnd_set_state, METH_VARARGS, NULL },
+    { "rnd_shuffle", (PyCFunction) _numba_rnd_shuffle, METH_O, NULL },
+    { "_import_cython_function", (PyCFunction) _numba_import_cython_function, METH_VARARGS, NULL },
+    { NULL },
+};
+
+/*
+ * These functions are exported by the module's DLL, to exercise ctypes / cffi
+ * without relying on libc availability (see https://bugs.python.org/issue23606)
+ */
+
+PyAPI_FUNC(double) _numba_test_sin(double x);
+PyAPI_FUNC(double) _numba_test_cos(double x);
+PyAPI_FUNC(double) _numba_test_exp(double x);
+PyAPI_FUNC(void) _numba_test_vsquare(int n, double *x, double *out);
+PyAPI_FUNC(double) _numba_test_funcptr(double (*func)(double));
+PyAPI_FUNC(bool) _numba_test_boolean(void);
+
+double _numba_test_sin(double x)
+{
+    return sin(x);
+}
+
+double _numba_test_cos(double x)
+{
+    return cos(x);
+}
+
+double _numba_test_exp(double x)
+{
+    return exp(x);
+}
+
+void _numba_test_vsquare(int n, double *x, double *out)
+{
+    int i;
+    for (i = 0; i < n; i++)
+        out[i] = pow(x[i], 2.0);
+}
+
+void _numba_test_vcube(int n, double *x, double *out)
+{
+    int i;
+    for (i = 0; i < n; i++)
+        out[i] = pow(x[i], 3.0);
+}
+
+double _numba_test_funcptr(double (*func)(double))
+{
+    return func(1.5);
+}
+
+bool _numba_test_boolean()
+{
+    return true;
+}
+
+MOD_INIT(_helperlib) {
+    PyObject *m;
+    MOD_DEF(m, "_helperlib", "No docs", ext_methods)
+    if (m == NULL)
+        return MOD_ERROR_VAL;
+
+    import_array();
+
+    PyModule_AddObject(m, "c_helpers", build_c_helpers_dict());
+    PyModule_AddIntConstant(m, "long_min", LONG_MIN);
+    PyModule_AddIntConstant(m, "long_max", LONG_MAX);
+    PyModule_AddIntConstant(m, "py_buffer_size", sizeof(Py_buffer));
+    PyModule_AddIntConstant(m, "py_gil_state_size", sizeof(PyGILState_STATE));
+    PyModule_AddIntConstant(m, "py_unicode_1byte_kind", PyUnicode_1BYTE_KIND);
+    PyModule_AddIntConstant(m, "py_unicode_2byte_kind", PyUnicode_2BYTE_KIND);
+    PyModule_AddIntConstant(m, "py_unicode_4byte_kind", PyUnicode_4BYTE_KIND);
+    PyModule_AddIntConstant(m, "py_unicode_wchar_kind", PyUnicode_WCHAR_KIND);
+    numba_rnd_ensure_global_init();
+
+    return MOD_SUCCESS_VAL(m);
+}
--- a/numba/_lapack.c
+++ b/numba/_lapack.c
+/*
+ * This file contains wrappers of BLAS and LAPACK functions
+ */
+/*
+ * BLAS calling helpers.  The helpers can be called without the GIL held.
+ * The caller is responsible for checking arguments (especially dimensions).
+ */
+
+/* Fast getters caching the value of a function's address after
+   the first call to import_cblas_function(). */
+
+#define EMIT_GET_CBLAS_FUNC(name)                                 \
+    static void *cblas_ ## name = NULL;                           \
+    static void *get_cblas_ ## name(void) {                       \
+        if (cblas_ ## name == NULL) {                             \
+            PyGILState_STATE st = PyGILState_Ensure();            \
+            const char *mod = "scipy.linalg.cython_blas";         \
+            cblas_ ## name = import_cython_function(mod, # name); \
+            PyGILState_Release(st);                               \
+        }                                                         \
+        return cblas_ ## name;                                    \
+    }
+
+EMIT_GET_CBLAS_FUNC(dgemm)
+EMIT_GET_CBLAS_FUNC(sgemm)
+EMIT_GET_CBLAS_FUNC(cgemm)
+EMIT_GET_CBLAS_FUNC(zgemm)
+EMIT_GET_CBLAS_FUNC(dgemv)
+EMIT_GET_CBLAS_FUNC(sgemv)
+EMIT_GET_CBLAS_FUNC(cgemv)
+EMIT_GET_CBLAS_FUNC(zgemv)
+EMIT_GET_CBLAS_FUNC(ddot)
+EMIT_GET_CBLAS_FUNC(sdot)
+EMIT_GET_CBLAS_FUNC(cdotu)
+EMIT_GET_CBLAS_FUNC(zdotu)
+EMIT_GET_CBLAS_FUNC(cdotc)
+EMIT_GET_CBLAS_FUNC(zdotc)
+EMIT_GET_CBLAS_FUNC(snrm2)
+EMIT_GET_CBLAS_FUNC(dnrm2)
+EMIT_GET_CBLAS_FUNC(scnrm2)
+EMIT_GET_CBLAS_FUNC(dznrm2)
+
+
+#undef EMIT_GET_CBLAS_FUNC
+
+/*
+ * NOTE: On return value convention.
+ * For LAPACK wrapper development the following conventions are followed:
+ * Publicly exposed wrapper functions must return:-
+ * STATUS_ERROR  : For an unrecoverable error e.g. caught by xerbla, this is so
+ *                 a Py_FatalError can be raised.
+ * STATUS_SUCCESS: For successful execution
+ * +n            : Where n is an integer for a routine specific error
+ *                 (typically derived from an `info` argument).
+ *
+ * The caller is responsible for checking and handling the error status.
+ */
+
+/* return STATUS_SUCCESS if everything went ok */
+#define STATUS_SUCCESS  (0)
+
+/* return STATUS_ERROR if an unrecoverable error is encountered */
+#define STATUS_ERROR  (-1)
+
+/*
+ * A union of all the types accepted by BLAS/LAPACK for use in cases where
+ * stack based allocation is needed (typically for work space query args length
+ * 1).
+ */
+typedef union all_dtypes_
+{
+    float  s;
+    double d;
+    npy_complex64 c;
+    npy_complex128 z;
+} all_dtypes;
+
+/*
+ * A checked PyMem_RawMalloc, ensures that the var is either NULL
+ * and an exception is raised, or that the allocation was successful.
+ * Returns zero on success for status checking.
+ */
+static int checked_PyMem_RawMalloc(void** var, size_t bytes)
+{
+    *var = NULL;
+    *var = PyMem_RawMalloc(bytes);
+    if (!(*var))
+    {
+        {
+            PyGILState_STATE st = PyGILState_Ensure();
+
+            PyErr_SetString(PyExc_MemoryError,
+                            "Insufficient memory for buffer allocation\
+                             required by LAPACK.");
+            PyGILState_Release(st);
+        }
+        return 1;
+    }
+    return 0;
+}
+
+/*
+ * Checks that the char kind is valid (one of [s,d,c,z]) for use in blas/lapack.
+ * Returns zero on success for status checking.
+ */
+static int check_kind(char kind)
+{
+    switch (kind)
+    {
+        case 's':
+        case 'd':
+        case 'c':
+        case 'z':
+            break;
+        default:
+        {
+            PyGILState_STATE st = PyGILState_Ensure();
+            PyErr_SetString(PyExc_ValueError,
+                            "invalid data type (kind) found");
+            PyGILState_Release(st);
+        }
+        return 1;
+    }
+    return 0;
+}
+
+/*
+ * Guard macro for ensuring a valid data "kind" is being used.
+ * Place at the top of all routines with switches on "kind" that accept
+ * one of [s,d,c,z].
+ */
+#define ENSURE_VALID_KIND(__KIND) \
+if (check_kind( __KIND ))         \
+{                                 \
+    return STATUS_ERROR;          \
+}                                 \
+
+/*
+ * Checks that the char kind is valid for the real domain (one of [s,d])
+ * for use in blas/lapack.
+ * Returns zero on success for status checking.
+ */
+static int check_real_kind(char kind)
+{
+    switch (kind)
+    {
+        case 's':
+        case 'd':
+            break;
+        default:
+        {
+            PyGILState_STATE st = PyGILState_Ensure();
+            PyErr_SetString(PyExc_ValueError,
+                            "invalid data type (kind) found");
+            PyGILState_Release(st);
+        }
+        return 1;
+    }
+    return 0;
+}
+
+/*
+ * Guard macro for ensuring a valid data "kind" is being used for the
+ * real domain routines.
+ * Place at the top of all routines with switches on "kind" that accept
+ * one of [s,d].
+ */
+#define ENSURE_VALID_REAL_KIND(__KIND) \
+if (check_real_kind( __KIND ))         \
+{                                      \
+    return STATUS_ERROR;               \
+}                                      \
+
+
+/*
+ * Checks that the char kind is valid for the complex domain (one of [c,z])
+ * for use in blas/lapack.
+ * Returns zero on success for status checking.
+ */
+static int check_complex_kind(char kind)
+{
+    switch (kind)
+    {
+        case 'c':
+        case 'z':
+            break;
+        default:
+        {
+            PyGILState_STATE st = PyGILState_Ensure();
+            PyErr_SetString(PyExc_ValueError,
+                            "invalid data type (kind) found");
+            PyGILState_Release(st);
+        }
+        return 1;
+    }
+    return 0;
+}
+
+/*
+ * Guard macro for ensuring a valid data "kind" is being used for the
+ * real domain routines.
+ * Place at the top of all routines with switches on "kind" that accept
+ * one of [c,z].
+ */
+#define ENSURE_VALID_COMPLEX_KIND(__KIND) \
+if (check_complex_kind( __KIND ))         \
+{                                         \
+    return STATUS_ERROR;                  \
+}                                         \
+
+
+/*
+ * Checks that a function is found (i.e. not null)
+ * Returns zero on success for status checking.
+ */
+static int check_func(void *func)
+{
+    if (func == NULL)
+    {
+        PyGILState_STATE st = PyGILState_Ensure();
+        PyErr_SetString(PyExc_RuntimeError,
+                        "Specified LAPACK function could not be found.");
+        PyGILState_Release(st);
+        return STATUS_ERROR;
+    }
+    return STATUS_SUCCESS;
+}
+
+
+/*
+ * Guard macro for ensuring a valid function is found.
+ */
+#define ENSURE_VALID_FUNC(__FUNC)         \
+if (check_func(__FUNC))                   \
+{                                         \
+    return STATUS_ERROR;                  \
+}                                         \
+
+
+/*
+ * Define what a Fortran "int" is, some LAPACKs have 64 bit integer support
+ * numba presently opts for a 32 bit C int.
+ * This definition allows scope for later configuration time magic to adjust
+ * the size of int at all the call sites.
+ */
+#define F_INT int
+
+
+typedef float (*sdot_t)(F_INT *n, void *dx, F_INT *incx, void *dy, F_INT *incy);
+typedef double (*ddot_t)(F_INT *n, void *dx, F_INT *incx, void *dy, F_INT
+                         *incy);
+typedef npy_complex64 (*cdot_t)(F_INT *n, void *dx, F_INT *incx, void *dy,
+                                F_INT *incy);
+typedef npy_complex128 (*zdot_t)(F_INT *n, void *dx, F_INT *incx, void *dy,
+                                 F_INT *incy);
+
+typedef void (*xxgemv_t)(char *trans, F_INT *m, F_INT *n,
+                         void *alpha, void *a, F_INT *lda,
+                         void *x, F_INT *incx, void *beta,
+                         void *y, F_INT *incy);
+
+typedef void (*xxgemm_t)(char *transa, char *transb,
+                         F_INT *m, F_INT *n, F_INT *k,
+                         void *alpha, void *a, F_INT *lda,
+                         void *b, F_INT *ldb, void *beta,
+                         void *c, F_INT *ldc);
+
+typedef float (*sxnrm2_t) (F_INT *n, void *x, F_INT *incx);
+typedef double (*dxnrm2_t) (F_INT *n, void *x, F_INT *incx);
+
+/* Vector * vector: result = dx * dy */
+NUMBA_EXPORT_FUNC(int)
+numba_xxdot(char kind, char conjugate, Py_ssize_t n, void *dx, void *dy,
+            void *result)
+{
+    void *raw_func = NULL;
+    F_INT _n;
+    F_INT inc = 1;
+
+    ENSURE_VALID_KIND(kind)
+
+    switch (kind)
+    {
+        case 's':
+            raw_func = get_cblas_sdot();
+            break;
+        case 'd':
+            raw_func = get_cblas_ddot();
+            break;
+        case 'c':
+            raw_func = conjugate ? get_cblas_cdotc() : get_cblas_cdotu();
+            break;
+        case 'z':
+            raw_func = conjugate ? get_cblas_zdotc() : get_cblas_zdotu();
+            break;
+    }
+    ENSURE_VALID_FUNC(raw_func)
+
+    _n = (F_INT) n;
+
+    switch (kind)
+    {
+        case 's':
+            *(float *) result = (*(sdot_t) raw_func)(&_n, dx, &inc, dy, &inc);;
+            break;
+        case 'd':
+            *(double *) result = (*(ddot_t) raw_func)(&_n, dx, &inc, dy, &inc);;
+            break;
+        case 'c':
+            *(npy_complex64 *) result = (*(cdot_t) raw_func)(&_n, dx, &inc, dy,\
+                                        &inc);;
+            break;
+        case 'z':
+            *(npy_complex128 *) result = (*(zdot_t) raw_func)(&_n, dx, &inc,\
+                                         dy, &inc);;
+            break;
+    }
+
+    return 0;
+}
+
+/* Matrix * vector: y = alpha * a * x + beta * y */
+NUMBA_EXPORT_FUNC(int)
+numba_xxgemv(char kind, char trans, Py_ssize_t m, Py_ssize_t n,
+             void *alpha, void *a, Py_ssize_t lda,
+             void *x, void *beta, void *y)
+{
+    void *raw_func = NULL;
+    F_INT _m, _n;
+    F_INT _lda;
+    F_INT inc = 1;
+
+    ENSURE_VALID_KIND(kind)
+
+    switch (kind)
+    {
+        case 's':
+            raw_func = get_cblas_sgemv();
+            break;
+        case 'd':
+            raw_func = get_cblas_dgemv();
+            break;
+        case 'c':
+            raw_func = get_cblas_cgemv();
+            break;
+        case 'z':
+            raw_func = get_cblas_zgemv();
+            break;
+    }
+    ENSURE_VALID_FUNC(raw_func)
+
+    _m = (F_INT) m;
+    _n = (F_INT) n;
+    _lda = (F_INT) lda;
+
+    (*(xxgemv_t) raw_func)(&trans, &_m, &_n, alpha, a, &_lda,
+                           x, &inc, beta, y, &inc);
+    return 0;
+}
+
+/* Matrix * matrix: c = alpha * a * b + beta * c */
+NUMBA_EXPORT_FUNC(int)
+numba_xxgemm(char kind, char transa, char transb,
+             Py_ssize_t m, Py_ssize_t n, Py_ssize_t k,
+             void *alpha, void *a, Py_ssize_t lda,
+             void *b, Py_ssize_t ldb, void *beta,
+             void *c, Py_ssize_t ldc)
+{
+    void *raw_func = NULL;
+    F_INT _m, _n, _k;
+    F_INT _lda, _ldb, _ldc;
+
+    ENSURE_VALID_KIND(kind)
+
+    switch (kind)
+    {
+        case 's':
+            raw_func = get_cblas_sgemm();
+            break;
+        case 'd':
+            raw_func = get_cblas_dgemm();
+            break;
+        case 'c':
+            raw_func = get_cblas_cgemm();
+            break;
+        case 'z':
+            raw_func = get_cblas_zgemm();
+            break;
+    }
+    ENSURE_VALID_FUNC(raw_func)
+
+    _m = (F_INT) m;
+    _n = (F_INT) n;
+    _k = (F_INT) k;
+    _lda = (F_INT) lda;
+    _ldb = (F_INT) ldb;
+    _ldc = (F_INT) ldc;
+
+    (*(xxgemm_t) raw_func)(&transa, &transb, &_m, &_n, &_k, alpha, a, &_lda,
+                           b, &_ldb, beta, c, &_ldc);
+    return 0;
+}
+
+
+/* L2-norms */
+NUMBA_EXPORT_FUNC(F_INT)
+numba_xxnrm2(char kind, Py_ssize_t n, void * x, Py_ssize_t incx, void * result)
+{
+    void *raw_func = NULL;
+    F_INT _incx;
+    F_INT _n;
+
+    ENSURE_VALID_KIND(kind)
+
+    switch (kind)
+    {
+        case 's':
+            raw_func = get_cblas_snrm2();
+            break;
+        case 'd':
+            raw_func = get_cblas_dnrm2();
+            break;
+        case 'c':
+            raw_func = get_cblas_scnrm2();
+            break;
+        case 'z':
+            raw_func = get_cblas_dznrm2();
+            break;
+    }
+    ENSURE_VALID_FUNC(raw_func)
+
+    _n = (F_INT) n;
+    _incx = (F_INT) incx;
+
+    switch (kind)
+    {
+        case 's':
+            *(float *) result = (*(sxnrm2_t) raw_func)(&_n, x, &_incx);;
+            break;
+        case 'd':
+            *(double *) result = (*(dxnrm2_t) raw_func)(&_n, x, &_incx);;
+            break;
+        case 'c':
+            *(float *) result = (*(sxnrm2_t) raw_func)(&_n, x, &_incx);;
+            break;
+        case 'z':
+            *(double *) result = (*(dxnrm2_t) raw_func)(&_n, x, &_incx);;
+            break;
+    }
+
+    return 0;
+}
+
+
+/*
+ * LAPACK calling helpers.  The helpers can be called without the GIL held.
+ * The caller is responsible for checking arguments (especially dimensions).
+ */
+
+/* Fast getters caching the value of a function's address after
+   the first call to import_clapack_function(). */
+
+#define EMIT_GET_CLAPACK_FUNC(name)                                 \
+    static void *clapack_ ## name = NULL;                           \
+    static void *get_clapack_ ## name(void) {                       \
+        if (clapack_ ## name == NULL) {                             \
+            PyGILState_STATE st = PyGILState_Ensure();              \
+            const char *mod = "scipy.linalg.cython_lapack";         \
+            clapack_ ## name = import_cython_function(mod, # name); \
+            PyGILState_Release(st);                                 \
+        }                                                           \
+        return clapack_ ## name;                                    \
+    }
+
+/* Computes an LU factorization of a general M-by-N matrix A
+ * using partial pivoting with row interchanges.
+ */
+EMIT_GET_CLAPACK_FUNC(sgetrf)
+EMIT_GET_CLAPACK_FUNC(dgetrf)
+EMIT_GET_CLAPACK_FUNC(cgetrf)
+EMIT_GET_CLAPACK_FUNC(zgetrf)
+
+/* Computes the inverse of a matrix using the LU factorization
+ * computed by xGETRF.
+ */
+EMIT_GET_CLAPACK_FUNC(sgetri)
+EMIT_GET_CLAPACK_FUNC(dgetri)
+EMIT_GET_CLAPACK_FUNC(cgetri)
+EMIT_GET_CLAPACK_FUNC(zgetri)
+
+/* Compute Cholesky factorizations */
+EMIT_GET_CLAPACK_FUNC(spotrf)
+EMIT_GET_CLAPACK_FUNC(dpotrf)
+EMIT_GET_CLAPACK_FUNC(cpotrf)
+EMIT_GET_CLAPACK_FUNC(zpotrf)
+
+/* Computes for an N-by-N real nonsymmetric matrix A, the
+ * eigenvalues and, optionally, the left and/or right eigenvectors.
+ */
+EMIT_GET_CLAPACK_FUNC(sgeev)
+EMIT_GET_CLAPACK_FUNC(dgeev)
+EMIT_GET_CLAPACK_FUNC(cgeev)
+EMIT_GET_CLAPACK_FUNC(zgeev)
+
+/* Computes for an N-by-N Hermitian matrix A, the
+ * eigenvalues and, optionally, the left and/or right eigenvectors.
+ */
+EMIT_GET_CLAPACK_FUNC(ssyevd)
+EMIT_GET_CLAPACK_FUNC(dsyevd)
+EMIT_GET_CLAPACK_FUNC(cheevd)
+EMIT_GET_CLAPACK_FUNC(zheevd)
+
+/* Computes generalised SVD */
+EMIT_GET_CLAPACK_FUNC(sgesdd)
+EMIT_GET_CLAPACK_FUNC(dgesdd)
+EMIT_GET_CLAPACK_FUNC(cgesdd)
+EMIT_GET_CLAPACK_FUNC(zgesdd)
+
+/* Computes QR decompositions */
+EMIT_GET_CLAPACK_FUNC(sgeqrf)
+EMIT_GET_CLAPACK_FUNC(dgeqrf)
+EMIT_GET_CLAPACK_FUNC(cgeqrf)
+EMIT_GET_CLAPACK_FUNC(zgeqrf)
+
+/* Computes columns of Q from elementary reflectors produced by xgeqrf() (QR).
+ */
+EMIT_GET_CLAPACK_FUNC(sorgqr)
+EMIT_GET_CLAPACK_FUNC(dorgqr)
+EMIT_GET_CLAPACK_FUNC(cungqr)
+EMIT_GET_CLAPACK_FUNC(zungqr)
+
+/* Computes the minimum norm solution to linear least squares problems */
+EMIT_GET_CLAPACK_FUNC(sgelsd)
+EMIT_GET_CLAPACK_FUNC(dgelsd)
+EMIT_GET_CLAPACK_FUNC(cgelsd)
+EMIT_GET_CLAPACK_FUNC(zgelsd)
+
+// Computes the solution to a system of linear equations
+EMIT_GET_CLAPACK_FUNC(sgesv)
+EMIT_GET_CLAPACK_FUNC(dgesv)
+EMIT_GET_CLAPACK_FUNC(cgesv)
+EMIT_GET_CLAPACK_FUNC(zgesv)
+
+
+#undef EMIT_GET_CLAPACK_FUNC
+
+typedef void (*xxgetrf_t)(F_INT *m, F_INT *n, void *a, F_INT *lda, F_INT *ipiv,
+                          F_INT *info);
+
+typedef void (*xxgetri_t)(F_INT *n, void *a, F_INT *lda, F_INT *ipiv, void
+                          *work, F_INT *lwork, F_INT *info);
+
+typedef void (*xxpotrf_t)(char *uplo, F_INT *n, void *a, F_INT *lda, F_INT
+                          *info);
+
+typedef void (*rgeev_t)(char *jobvl, char *jobvr, F_INT *n, void *a, F_INT *lda,
+                        void *wr, void *wi, void *vl, F_INT *ldvl, void *vr,
+                        F_INT *ldvr, void *work, F_INT *lwork, F_INT *info);
+
+typedef void (*cgeev_t)(char *jobvl, char *jobvr, F_INT *n, void *a, F_INT
+                        *lda, void *w, void *vl, F_INT *ldvl, void *vr,
+                        F_INT *ldvr, void *work, F_INT *lwork, void *rwork,
+                        F_INT *info);
+
+typedef void (*rgesdd_t)(char *jobz, F_INT *m, F_INT *n, void *a, F_INT *lda,
+                         void *s, void *u, F_INT *ldu, void *vt, F_INT *ldvt,
+                         void *work, F_INT *lwork, F_INT *iwork, F_INT *info);
+
+typedef void (*cgesdd_t)(char *jobz, F_INT *m, F_INT *n, void *a, F_INT *lda,
+                         void *s, void * u, F_INT *ldu, void * vt, F_INT *ldvt,
+                         void *work, F_INT *lwork, void *rwork, F_INT *iwork,
+                         F_INT *info);
+
+typedef void (*xsyevd_t)(char *jobz, char *uplo, F_INT *n, void *a, F_INT *lda,
+                         void *w, void *work, F_INT *lwork, F_INT *iwork,
+                         F_INT *liwork, F_INT *info);
+
+typedef void (*xheevd_t)(char *jobz, char *uplo, F_INT *n, void *a, F_INT *lda,
+                         void *w, void *work, F_INT *lwork, void *rwork,
+                         F_INT *lrwork, F_INT *iwork, F_INT *liwork,
+                         F_INT *info);
+
+typedef void (*xgeqrf_t)(F_INT *m, F_INT *n, void *a, F_INT *lda, void *tau,
+                         void *work, F_INT *lwork, F_INT *info);
+
+typedef void (*xxxgqr_t)(F_INT *m, F_INT *n, F_INT *k, void *a, F_INT *lda,
+                         void *tau, void *work, F_INT *lwork, F_INT *info);
+
+typedef void (*rgelsd_t)(F_INT *m, F_INT *n, F_INT *nrhs, void *a, F_INT *lda,
+                         void *b, F_INT *ldb, void *s, void *rcond, F_INT *rank,
+                         void *work, F_INT *lwork, F_INT *iwork, F_INT *info);
+
+typedef void (*cgelsd_t)(F_INT *m, F_INT *n, F_INT *nrhs, void *a, F_INT *lda,
+                         void *b, F_INT *ldb, void *s, void *rcond, F_INT *rank,
+                         void *work, F_INT *lwork, void *rwork, F_INT *iwork,
+                         F_INT *info);
+
+typedef void (*xgesv_t)(F_INT *n, F_INT *nrhs, void *a, F_INT *lda, F_INT *ipiv,
+                        void *b, F_INT *ldb, F_INT *info);
+
+
+
+/*
+ * kind_size()
+ * gets the data size appropriate for a specified kind.
+ *
+ * Input:
+ * kind - the kind, one of:
+ *         (s, d, c, z) = (float, double, complex, double complex).
+ *
+ * Returns:
+ * data_size - the appropriate data size.
+ *
+ */
+static size_t kind_size(char kind)
+{
+    size_t data_size = 0;
+    switch (kind)
+    {
+        case 's':
+            data_size  = sizeof(float);
+            break;
+        case 'd':
+            data_size  = sizeof(double);
+            break;
+        case 'c':
+            data_size  = sizeof(npy_complex64);
+            break;
+        case 'z':
+            data_size  = sizeof(npy_complex128);
+            break;
+    }
+    return data_size;
+
+}
+
+/*
+ * underlying_float_kind()
+ * gets the underlying float kind for a given kind.
+ *
+ * Input:
+ * kind - the kind, one of:
+ *         (s, d, c, z) = (float, double, complex, double complex).
+ *
+ * Returns:
+ * underlying_float_kind - the underlying float kind, one of:
+ *         (s, d) = (float, double).
+ *
+ * This function essentially provides a map between the char kind
+ * of a type and the char kind of the underlying float used in the
+ * type. Essentially:
+ * ---------------
+ * Input -> Output
+ * ---------------
+ *     s -> s
+ *     d -> d
+ *     c -> s
+ *     z -> d
+ * ---------------
+ *
+ */
+static char underlying_float_kind(char kind)
+{
+    switch(kind)
+    {
+        case 's':
+        case 'c':
+            return 's';
+        case 'd':
+        case 'z':
+            return 'd';
+        default:
+        {
+            PyGILState_STATE st = PyGILState_Ensure();
+            PyErr_SetString(PyExc_ValueError,
+                            "invalid kind in underlying_float_kind()");
+            PyGILState_Release(st);
+        }
+    }
+    return -1;
+}
+
+/*
+ * cast_from_X()
+ * cast from a kind (s, d, c, z) = (float, double, complex, double complex)
+ * to a Fortran integer.
+ *
+ * Parameters:
+ * kind the kind of val
+ * val  a pointer to the value to cast
+ *
+ * Returns:
+ * A Fortran int from a cast of val (in complex case, takes the real part).
+ *
+ * Struct access via non c99 (python only) cmplx types, used for compatibility.
+ */
+static F_INT
+cast_from_X(char kind, void *val)
+{
+    switch(kind)
+    {
+        case 's':
+            return (F_INT)(*((float *) val));
+        case 'd':
+            return (F_INT)(*((double *) val));
+        case 'c':
+            return (F_INT)(*((npy_complex64 *)val)).real;
+        case 'z':
+            return (F_INT)(*((npy_complex128 *)val)).real;
+        default:
+        {
+            PyGILState_STATE st = PyGILState_Ensure();
+            PyErr_SetString(PyExc_ValueError,
+                            "invalid kind in cast");
+            PyGILState_Release(st);
+        }
+    }
+    return -1;
+}
+
+
+#define CATCH_LAPACK_INVALID_ARG(__routine, info)                      \
+    do {                                                               \
+        if (info < 0) {                                                \
+            PyGILState_STATE st = PyGILState_Ensure();                 \
+            PyErr_Format(PyExc_RuntimeError,                           \
+                 "LAPACK Error: Routine " #__routine ". On input %d\n",\
+                  -(int) info);                                        \
+            PyGILState_Release(st);                                    \
+            return STATUS_ERROR;                                       \
+        }                                                              \
+    } while(0)
+
+/* Compute LU decomposition of A
+ * NOTE: ipiv is an array of Fortran integers allocated by the caller,
+ * which is therefore expected to use the right dtype.
+ */
+NUMBA_EXPORT_FUNC(int)
+numba_xxgetrf(char kind, Py_ssize_t m, Py_ssize_t n, void *a, Py_ssize_t lda,
+              F_INT *ipiv)
+{
+    void *raw_func = NULL;
+    F_INT _m, _n, _lda, info;
+
+    ENSURE_VALID_KIND(kind)
+
+    switch (kind)
+    {
+        case 's':
+            raw_func = get_clapack_sgetrf();
+            break;
+        case 'd':
+            raw_func = get_clapack_dgetrf();
+            break;
+        case 'c':
+            raw_func = get_clapack_cgetrf();
+            break;
+        case 'z':
+            raw_func = get_clapack_zgetrf();
+            break;
+    }
+    ENSURE_VALID_FUNC(raw_func)
+
+    _m = (F_INT) m;
+    _n = (F_INT) n;
+    _lda = (F_INT) lda;
+
+    (*(xxgetrf_t) raw_func)(&_m, &_n, a, &_lda, ipiv, &info);
+    CATCH_LAPACK_INVALID_ARG("xxgetrf", info);
+
+    return (int)info;
+}
+
+/* Compute the inverse of a matrix given its LU decomposition
+ * Args are as per LAPACK.
+ */
+static int
+numba_raw_xxgetri(char kind, F_INT n, void *a, F_INT lda,
+                  F_INT *ipiv, void *work, F_INT *lwork, F_INT *info)
+{
+    void *raw_func = NULL;
+
+    ENSURE_VALID_KIND(kind)
+
+    switch (kind)
+    {
+        case 's':
+            raw_func = get_clapack_sgetri();
+            break;
+        case 'd':
+            raw_func = get_clapack_dgetri();
+            break;
+        case 'c':
+            raw_func = get_clapack_cgetri();
+            break;
+        case 'z':
+            raw_func = get_clapack_zgetri();
+            break;
+    }
+    ENSURE_VALID_FUNC(raw_func)
+
+    (*(xxgetri_t) raw_func)(&n, a, &lda, ipiv, work, lwork, info);
+
+    return 0;
+}
+
+/* Compute the inverse of a matrix from the factorization provided by
+ * xxgetrf. (see numba_xxgetrf() about ipiv)
+ * Args are as per LAPACK.
+ */
+NUMBA_EXPORT_FUNC(int)
+numba_ez_xxgetri(char kind, Py_ssize_t n, void *a, Py_ssize_t lda,
+                 F_INT *ipiv)
+{
+    F_INT _n, _lda;
+    F_INT lwork = -1;
+    F_INT info = 0;
+    size_t base_size = -1;
+    void * work = NULL;
+    all_dtypes stack_slot;
+
+    ENSURE_VALID_KIND(kind)
+
+    _n = (F_INT)n;
+    _lda = (F_INT)lda;
+
+    base_size = kind_size(kind);
+
+    work = &stack_slot;
+
+    numba_raw_xxgetri(kind, _n, a, _lda, ipiv, work, &lwork, &info);
+    CATCH_LAPACK_INVALID_ARG("xxgetri", info);
+
+    lwork = cast_from_X(kind, work);
+
+    if (checked_PyMem_RawMalloc(&work, base_size * lwork))
+    {
+        return STATUS_ERROR;
+    }
+
+    numba_raw_xxgetri(kind, _n, a, _lda, ipiv, work, &lwork, &info);
+    PyMem_RawFree(work);
+    CATCH_LAPACK_INVALID_ARG("xxgetri", info);
+
+    return (int)info;
+}
+
+/* Compute the Cholesky factorization of a matrix. */
+NUMBA_EXPORT_FUNC(int)
+numba_xxpotrf(char kind, char uplo, Py_ssize_t n, void *a, Py_ssize_t lda)
+{
+    void *raw_func = NULL;
+    F_INT _n, _lda, info;
+
+    ENSURE_VALID_KIND(kind)
+
+    switch (kind)
+    {
+        case 's':
+            raw_func = get_clapack_spotrf();
+            break;
+        case 'd':
+            raw_func = get_clapack_dpotrf();
+            break;
+        case 'c':
+            raw_func = get_clapack_cpotrf();
+            break;
+        case 'z':
+            raw_func = get_clapack_zpotrf();
+            break;
+    }
+    ENSURE_VALID_FUNC(raw_func)
+
+    _n = (F_INT) n;
+    _lda = (F_INT) lda;
+
+    (*(xxpotrf_t) raw_func)(&uplo, &_n, a, &_lda, &info);
+    CATCH_LAPACK_INVALID_ARG("xxpotrf", info);
+    return (int)info;
+}
+
+
+/* real space eigen systems info from dgeev/sgeev */
+static int
+numba_raw_rgeev(char kind, char jobvl, char jobvr,
+                Py_ssize_t n, void *a, Py_ssize_t lda, void *wr, void *wi,
+                void *vl, Py_ssize_t ldvl, void *vr, Py_ssize_t ldvr,
+                void *work, Py_ssize_t lwork, F_INT *info)
+{
+    void *raw_func = NULL;
+    F_INT _n, _lda, _ldvl, _ldvr, _lwork;
+
+    ENSURE_VALID_REAL_KIND(kind)
+
+    switch (kind)
+    {
+        case 's':
+            raw_func = get_clapack_sgeev();
+            break;
+        case 'd':
+            raw_func = get_clapack_dgeev();
+            break;
+    }
+    ENSURE_VALID_FUNC(raw_func)
+
+    _n = (F_INT) n;
+    _lda = (F_INT) lda;
+    _ldvl = (F_INT) ldvl;
+    _ldvr = (F_INT) ldvr;
+    _lwork = (F_INT) lwork;
+
+    (*(rgeev_t) raw_func)(&jobvl, &jobvr, &_n, a, &_lda, wr, wi, vl, &_ldvl, vr,
+                          &_ldvr, work, &_lwork, info);
+    return 0;
+}
+
+/* Real space eigen systems info from dgeev/sgeev
+ * as numba_raw_rgeev but the allocation and error handling is done for the user.
+ * Args are as per LAPACK.
+ */
+NUMBA_EXPORT_FUNC(int)
+numba_ez_rgeev(char kind, char jobvl, char jobvr, Py_ssize_t n, void *a,
+               Py_ssize_t lda, void *wr, void *wi, void *vl, Py_ssize_t ldvl,
+               void *vr, Py_ssize_t ldvr)
+{
+    F_INT info = 0;
+    F_INT lwork = -1;
+    F_INT _n, _lda, _ldvl, _ldvr;
+    size_t base_size = -1;
+    void * work = NULL;
+    all_dtypes stack_slot;
+
+    ENSURE_VALID_REAL_KIND(kind)
+
+    _n = (F_INT) n;
+    _lda = (F_INT) lda;
+    _ldvl = (F_INT) ldvl;
+    _ldvr = (F_INT) ldvr;
+
+    base_size = kind_size(kind);
+
+    work = &stack_slot;
+    numba_raw_rgeev(kind, jobvl, jobvr, _n, a, _lda, wr, wi, vl, _ldvl,
+                    vr, _ldvr, work, lwork, &info);
+    CATCH_LAPACK_INVALID_ARG("numba_raw_rgeev", info);
+
+    lwork = cast_from_X(kind, work);
+    if (checked_PyMem_RawMalloc(&work, base_size * lwork))
+    {
+        return STATUS_ERROR;
+    }
+    numba_raw_rgeev(kind, jobvl, jobvr, _n, a, _lda, wr, wi, vl, _ldvl,
+                    vr, _ldvr, work, lwork, &info);
+    PyMem_RawFree(work);
+
+    CATCH_LAPACK_INVALID_ARG("numba_raw_rgeev", info);
+
+    return (int)info;
+}
+
+/* Complex space eigen systems info from cgeev/zgeev
+ * Args are as per LAPACK.
+ */
+static int
+numba_raw_cgeev(char kind, char jobvl, char jobvr,
+                Py_ssize_t n, void *a, Py_ssize_t lda, void *w, void *vl,
+                Py_ssize_t ldvl, void *vr, Py_ssize_t ldvr, void *work,
+                Py_ssize_t lwork, void *rwork, F_INT *info)
+{
+    void *raw_func = NULL;
+    F_INT _n, _lda, _ldvl, _ldvr, _lwork;
+
+    ENSURE_VALID_COMPLEX_KIND(kind)
+
+    _n = (F_INT) n;
+    _lda = (F_INT) lda;
+    _ldvl = (F_INT) ldvl;
+    _ldvr = (F_INT) ldvr;
+    _lwork = (F_INT) lwork;
+
+    switch (kind)
+    {
+        case 'c':
+            raw_func = get_clapack_cgeev();
+            break;
+        case 'z':
+            raw_func = get_clapack_zgeev();
+            break;
+    }
+    ENSURE_VALID_FUNC(raw_func)
+
+    (*(cgeev_t) raw_func)(&jobvl, &jobvr, &_n, a, &_lda, w, vl, &_ldvl, vr,
+                          &_ldvr, work, &_lwork, rwork, info);
+    return 0;
+}
+
+
+/* Complex space eigen systems info from cgeev/zgeev
+ * as numba_raw_cgeev but the allocation and error handling is done for the user.
+ * Args are as per LAPACK.
+ */
+NUMBA_EXPORT_FUNC(int)
+numba_ez_cgeev(char kind, char jobvl, char jobvr,  Py_ssize_t n, void *a,
+               Py_ssize_t lda, void *w, void *vl, Py_ssize_t ldvl, void *vr,
+               Py_ssize_t ldvr)
+{
+    F_INT info = 0;
+    F_INT lwork = -1;
+    F_INT _n, _lda, _ldvl, _ldvr;
+    size_t base_size = -1;
+    all_dtypes stack_slot, wk;
+    void * work = NULL;
+    void * rwork = (void *)&wk;
+
+    ENSURE_VALID_COMPLEX_KIND(kind)
+
+    _n = (F_INT) n;
+    _lda = (F_INT) lda;
+    _ldvl = (F_INT) ldvl;
+    _ldvr = (F_INT) ldvr;
+
+    base_size = kind_size(kind);
+
+    work = &stack_slot;
+    numba_raw_cgeev(kind, jobvl, jobvr, n, a, lda, w, vl, ldvl,
+                    vr, ldvr, work, lwork, rwork, &info);
+    CATCH_LAPACK_INVALID_ARG("numba_raw_cgeev", info);
+
+    lwork = cast_from_X(kind, work);
+    if (checked_PyMem_RawMalloc((void**)&rwork, 2*n*base_size))
+    {
+        return STATUS_ERROR;
+    }
+    if (checked_PyMem_RawMalloc(&work, base_size * lwork))
+    {
+        PyMem_RawFree(rwork);
+        return STATUS_ERROR;
+    }
+    numba_raw_cgeev(kind, jobvl, jobvr, _n, a, _lda, w, vl, _ldvl,
+                    vr, _ldvr, work, lwork, rwork, &info);
+    PyMem_RawFree(work);
+    PyMem_RawFree(rwork);
+    CATCH_LAPACK_INVALID_ARG("numba_raw_cgeev", info);
+
+    return (int)info;
+}
+
+/* real space symmetric eigen systems info from ssyevd/dsyevd */
+static int
+numba_raw_rsyevd(char kind, char jobz, char uplo, Py_ssize_t n, void *a,
+                 Py_ssize_t lda, void *w, void *work, Py_ssize_t lwork,
+                 F_INT *iwork, Py_ssize_t liwork, F_INT *info)
+{
+    void *raw_func = NULL;
+    F_INT _n, _lda, _lwork, _liwork;
+
+    ENSURE_VALID_REAL_KIND(kind)
+
+    switch (kind)
+    {
+        case 's':
+            raw_func = get_clapack_ssyevd();
+            break;
+        case 'd':
+            raw_func = get_clapack_dsyevd();
+            break;
+    }
+    ENSURE_VALID_FUNC(raw_func)
+
+    _n = (F_INT) n;
+    _lda = (F_INT) lda;
+    _lwork = (F_INT) lwork;
+    _liwork = (F_INT) liwork;
+
+    (*(xsyevd_t) raw_func)(&jobz, &uplo, &_n, a, &_lda, w, work, &_lwork, iwork, &_liwork, info);
+    return 0;
+}
+
+/* Real space eigen systems info from dsyevd/ssyevd
+ * as numba_raw_rsyevd but the allocation and error handling is done for the user.
+ * Args are as per LAPACK.
+ */
+static int
+numba_ez_rsyevd(char kind, char jobz, char uplo, Py_ssize_t n, void *a, Py_ssize_t lda, void *w)
+{
+    F_INT info = 0;
+    F_INT lwork = -1, liwork=-1;
+    F_INT _n, _lda;
+    size_t base_size = -1;
+    void *work = NULL;
+    F_INT *iwork = NULL;
+    all_dtypes stack_slot;
+    int stack_int = -1;
+
+    ENSURE_VALID_REAL_KIND(kind)
+
+    _n = (F_INT) n;
+    _lda = (F_INT) lda;
+
+    base_size = kind_size(kind);
+
+    work = &stack_slot;
+    iwork = &stack_int;
+    numba_raw_rsyevd(kind, jobz, uplo, _n, a, _lda, w, work, lwork, iwork, liwork, &info);
+    CATCH_LAPACK_INVALID_ARG("numba_raw_rsyevd", info);
+
+    lwork = cast_from_X(kind, work);
+    if (checked_PyMem_RawMalloc(&work, base_size * lwork))
+    {
+        return STATUS_ERROR;
+    }
+    liwork = *iwork;
+    if (checked_PyMem_RawMalloc((void**)&iwork, base_size * liwork))
+    {
+        PyMem_RawFree(work);
+        return STATUS_ERROR;
+    }
+    numba_raw_rsyevd(kind, jobz, uplo, _n, a, _lda, w, work, lwork, iwork, liwork, &info);
+    PyMem_RawFree(work);
+    PyMem_RawFree(iwork);
+
+    CATCH_LAPACK_INVALID_ARG("numba_raw_rsyevd", info);
+
+    return (int)info;
+}
+
+
+/* complex space symmetric eigen systems info from cheevd/zheevd*/
+static int
+numba_raw_cheevd(char kind, char jobz, char uplo, Py_ssize_t n, void *a,
+                 Py_ssize_t lda, void *w, void *work, Py_ssize_t lwork,
+                 void *rwork, Py_ssize_t lrwork, F_INT *iwork,
+                 Py_ssize_t liwork, F_INT *info)
+{
+    void *raw_func = NULL;
+    F_INT _n, _lda, _lwork, _lrwork, _liwork;
+
+    ENSURE_VALID_COMPLEX_KIND(kind)
+
+    switch (kind)
+    {
+        case 'c':
+            raw_func = get_clapack_cheevd();
+            break;
+        case 'z':
+            raw_func = get_clapack_zheevd();
+            break;
+    }
+    ENSURE_VALID_FUNC(raw_func)
+
+    _n = (F_INT) n;
+    _lda = (F_INT) lda;
+    _lwork = (F_INT) lwork;
+    _lrwork = (F_INT) lrwork;
+    _liwork = (F_INT) liwork;
+
+    (*(xheevd_t) raw_func)(&jobz, &uplo, &_n, a, &_lda, w, work, &_lwork, rwork, &_lrwork, iwork, &_liwork, info);
+    return 0;
+}
+
+/* complex space eigen systems info from cheevd/zheevd
+ * as numba_raw_cheevd but the allocation and error handling is done for the user.
+ * Args are as per LAPACK.
+ */
+static int
+numba_ez_cheevd(char kind, char jobz, char uplo, Py_ssize_t n, void *a, Py_ssize_t lda, void *w)
+{
+    F_INT info = 0;
+    F_INT lwork = -1, lrwork = -1, liwork=-1;
+    F_INT _n, _lda;
+    size_t base_size = -1, underlying_float_size = -1;
+    void *work = NULL, *rwork = NULL;
+    F_INT *iwork = NULL;
+    all_dtypes stack_slot1, stack_slot2;
+    char uf_kind;
+    int stack_int = -1;
+
+    ENSURE_VALID_COMPLEX_KIND(kind)
+
+    _n = (F_INT) n;
+    _lda = (F_INT) lda;
+
+    base_size = kind_size(kind);
+    uf_kind = underlying_float_kind(kind);
+    underlying_float_size = kind_size(uf_kind);
+
+    work = &stack_slot1;
+    rwork = &stack_slot2;
+    iwork = &stack_int;
+    numba_raw_cheevd(kind, jobz, uplo, _n, a, _lda, w, work, lwork, rwork, lrwork, iwork, liwork, &info);
+    CATCH_LAPACK_INVALID_ARG("numba_raw_cheevd", info);
+
+    lwork = cast_from_X(uf_kind, work);
+    if (checked_PyMem_RawMalloc(&work, base_size * lwork))
+    {
+        return STATUS_ERROR;
+    }
+
+    lrwork = cast_from_X(uf_kind, rwork);
+    if (checked_PyMem_RawMalloc(&rwork, underlying_float_size * lrwork))
+    {
+        PyMem_RawFree(work);
+        return STATUS_ERROR;
+    }
+
+    liwork = *iwork;
+    if (checked_PyMem_RawMalloc((void**)&iwork, base_size * liwork))
+    {
+        PyMem_RawFree(work);
+        PyMem_RawFree(rwork);
+        return STATUS_ERROR;
+    }
+    numba_raw_cheevd(kind, jobz, uplo, _n, a, _lda, w, work, lwork, rwork, lrwork, iwork, liwork, &info);
+    PyMem_RawFree(work);
+    PyMem_RawFree(rwork);
+    PyMem_RawFree(iwork);
+
+    CATCH_LAPACK_INVALID_ARG("numba_raw_cheevd", info);
+
+    return (int)info;
+}
+
+/* Hermitian eigenvalue systems info from *syevd and *heevd.
+ * This routine hides the type and general complexity involved with making the
+ * calls. The work space computation and error handling etc is hidden.
+ * Args are as per LAPACK.
+ */
+NUMBA_EXPORT_FUNC(int)
+numba_ez_xxxevd(char kind, char jobz, char uplo, Py_ssize_t n, void *a, Py_ssize_t lda, void *w)
+{
+    ENSURE_VALID_KIND(kind)
+
+    switch (kind)
+    {
+        case 's':
+        case 'd':
+            return numba_ez_rsyevd(kind, jobz, uplo, n, a, lda, w);
+        case 'c':
+        case 'z':
+            return numba_ez_cheevd(kind, jobz, uplo, n, a, lda, w);
+    }
+    return STATUS_ERROR; /* unreachable */
+}
+
+/* Real space svd systems info from dgesdd/sgesdd
+ * Args are as per LAPACK.
+ */
+static int
+numba_raw_rgesdd(char kind, char jobz, Py_ssize_t m, Py_ssize_t n, void *a,
+                 Py_ssize_t lda, void *s, void *u, Py_ssize_t ldu, void *vt,
+                 Py_ssize_t ldvt, void *work, Py_ssize_t lwork,
+                 F_INT *iwork, F_INT *info)
+{
+    void *raw_func = NULL;
+    F_INT _m, _n, _lda, _ldu, _ldvt, _lwork;
+
+    ENSURE_VALID_REAL_KIND(kind)
+
+    _m = (F_INT) m;
+    _n = (F_INT) n;
+    _lda = (F_INT) lda;
+    _ldu = (F_INT) ldu;
+    _ldvt = (F_INT) ldvt;
+    _lwork = (F_INT) lwork;
+
+    switch (kind)
+    {
+        case 's':
+            raw_func = get_clapack_sgesdd();
+            break;
+        case 'd':
+            raw_func = get_clapack_dgesdd();
+            break;
+    }
+    ENSURE_VALID_FUNC(raw_func)
+
+    (*(rgesdd_t) raw_func)(&jobz, &_m, &_n, a, &_lda, s, u, &_ldu, vt, &_ldvt,
+                           work, &_lwork, iwork, info);
+    return 0;
+}
+
+/* Real space svd info from dgesdd/sgesdd.
+ * As numba_raw_rgesdd but the allocation and error handling is done for the
+ * user.
+ * Args are as per LAPACK.
+ */
+static int
+numba_ez_rgesdd(char kind, char jobz, Py_ssize_t m, Py_ssize_t n, void *a,
+                Py_ssize_t lda, void *s, void *u, Py_ssize_t ldu, void *vt,
+                Py_ssize_t ldvt)
+{
+    F_INT info = 0;
+    Py_ssize_t minmn = -1;
+    Py_ssize_t lwork = -1;
+    all_dtypes stack_slot, wk;
+    size_t base_size = -1;
+    F_INT *iwork = (F_INT *)&wk;
+    void *work = NULL;
+
+    ENSURE_VALID_REAL_KIND(kind)
+
+    base_size = kind_size(kind);
+
+    work = &stack_slot;
+
+    /* Compute optimal work size (lwork) */
+    numba_raw_rgesdd(kind, jobz, m, n, a, lda, s, u, ldu, vt, ldvt, work,
+                     lwork, iwork, &info);
+    CATCH_LAPACK_INVALID_ARG("numba_raw_rgesdd", info);
+
+    /* Allocate work array */
+    lwork = cast_from_X(kind, work);
+    if (checked_PyMem_RawMalloc(&work, base_size * lwork))
+        return -1;
+    minmn = m > n ? n : m;
+    if (checked_PyMem_RawMalloc((void**) &iwork, 8 * minmn * sizeof(F_INT)))
+    {
+        PyMem_RawFree(work);
+        return STATUS_ERROR;
+    }
+    numba_raw_rgesdd(kind, jobz, m, n, a, lda, s, u ,ldu, vt, ldvt, work, lwork,
+                     iwork, &info);
+    PyMem_RawFree(work);
+    PyMem_RawFree(iwork);
+    CATCH_LAPACK_INVALID_ARG("numba_raw_rgesdd", info);
+
+    return (int)info;
+}
+
+/* Complex space svd systems info from cgesdd/zgesdd
+ * Args are as per LAPACK.
+ */
+static int
+numba_raw_cgesdd(char kind, char jobz, Py_ssize_t m, Py_ssize_t n, void *a,
+                 Py_ssize_t lda, void *s, void *u, Py_ssize_t ldu, void *vt,
+                 Py_ssize_t ldvt, void *work, Py_ssize_t lwork, void *rwork,
+                 F_INT *iwork, F_INT *info)
+{
+    void *raw_func = NULL;
+    F_INT _m, _n, _lda, _ldu, _ldvt, _lwork;
+
+    ENSURE_VALID_COMPLEX_KIND(kind)
+
+    _m = (F_INT) m;
+    _n = (F_INT) n;
+    _lda = (F_INT) lda;
+    _ldu = (F_INT) ldu;
+    _ldvt = (F_INT) ldvt;
+    _lwork = (F_INT) lwork;
+
+    switch (kind)
+    {
+        case 'c':
+            raw_func = get_clapack_cgesdd();
+            break;
+        case 'z':
+            raw_func = get_clapack_zgesdd();
+            break;
+    }
+    ENSURE_VALID_FUNC(raw_func)
+
+    (*(cgesdd_t) raw_func)(&jobz, &_m, &_n, a, &_lda, s, u, &_ldu, vt, &_ldvt,
+                           work, &_lwork, rwork, iwork, info);
+    return 0;
+}
+
+/* complex space svd info from cgesdd/zgesdd.
+ * As numba_raw_cgesdd but the allocation and error handling is done for the
+ * user.
+ * Args are as per LAPACK.
+ */
+static int
+numba_ez_cgesdd(char kind, char jobz, Py_ssize_t m, Py_ssize_t n, void *a,
+                Py_ssize_t lda, void *s, void *u, Py_ssize_t ldu, void *vt,
+                Py_ssize_t ldvt)
+{
+    F_INT info = 0;
+    Py_ssize_t lwork = -1;
+    Py_ssize_t lrwork = -1;
+    Py_ssize_t minmn = -1;
+    Py_ssize_t tmp1, tmp2;
+    Py_ssize_t maxmn = -1;
+    size_t real_base_size = -1;
+    size_t complex_base_size = -1;
+    all_dtypes stack_slot, wk1, wk2;
+    void *work = NULL;
+    void *rwork = (void *)&wk1;
+    F_INT *iwork = (F_INT *)&wk2;
+
+    ENSURE_VALID_COMPLEX_KIND(kind)
+
+    switch (kind)
+    {
+        case 'c':
+            real_base_size = sizeof(float);
+            complex_base_size = sizeof(npy_complex64);
+            break;
+        case 'z':
+            real_base_size = sizeof(double);
+            complex_base_size = sizeof(npy_complex128);
+            break;
+        default:
+        {
+            PyGILState_STATE st = PyGILState_Ensure();
+            PyErr_SetString(PyExc_ValueError,\
+                            "Invalid kind in numba_ez_rgesdd");
+            PyGILState_Release(st);
+        }
+        return STATUS_ERROR;
+    }
+
+    work = &stack_slot;
+
+    /* Compute optimal work size (lwork) */
+    numba_raw_cgesdd(kind, jobz, m, n, a, lda, s, u ,ldu, vt, ldvt, work, lwork,
+                     rwork, iwork, &info);
+    CATCH_LAPACK_INVALID_ARG("numba_raw_cgesdd", info);
+
+    /* Allocate work array */
+    lwork = cast_from_X(kind, work);
+    if (checked_PyMem_RawMalloc(&work, complex_base_size * lwork))
+        return STATUS_ERROR;
+
+    minmn = m > n ? n : m;
+    if (jobz == 'n')
+    {
+        lrwork = 7 * minmn;
+    }
+    else
+    {
+        maxmn = m > n ? m : n;
+        tmp1 = 5 * minmn + 7;
+        tmp2 = 2 * maxmn + 2 * minmn + 1;
+        lrwork = minmn * (tmp1 > tmp2 ? tmp1: tmp2);
+    }
+
+    if (checked_PyMem_RawMalloc(&rwork,
+                                real_base_size * (lrwork > 1 ? lrwork : 1)))
+    {
+        PyMem_RawFree(work);
+        return STATUS_ERROR;
+    }
+    if (checked_PyMem_RawMalloc((void **) &iwork,
+                                8 * minmn * sizeof(F_INT)))
+    {
+        PyMem_RawFree(work);
+        PyMem_RawFree(rwork);
+        return STATUS_ERROR;
+    }
+    numba_raw_cgesdd(kind, jobz, m, n, a, lda, s, u ,ldu, vt, ldvt, work, lwork,
+                     rwork, iwork, &info);
+    PyMem_RawFree(work);
+    PyMem_RawFree(rwork);
+    PyMem_RawFree(iwork);
+    CATCH_LAPACK_INVALID_ARG("numba_raw_cgesdd", info);
+
+    return (int)info;
+}
+
+
+/* SVD systems info from *gesdd.
+ * This routine hides the type and general complexity involved with making the
+ * calls to *gesdd. The work space computation and error handling etc is hidden.
+ * Args are as per LAPACK.
+ */
+NUMBA_EXPORT_FUNC(int)
+numba_ez_gesdd(char kind, char jobz, Py_ssize_t m, Py_ssize_t n, void *a,
+               Py_ssize_t lda, void *s, void *u, Py_ssize_t ldu, void *vt,
+               Py_ssize_t ldvt)
+{
+    ENSURE_VALID_KIND(kind)
+
+    switch (kind)
+    {
+        case 's':
+        case 'd':
+            return numba_ez_rgesdd(kind, jobz, m, n, a, lda, s, u, ldu, vt,
+                                   ldvt);
+        case 'c':
+        case 'z':
+            return numba_ez_cgesdd(kind, jobz, m, n, a, lda, s, u, ldu, vt,
+                                   ldvt);
+    }
+    return STATUS_ERROR; /* unreachable */
+}
+
+
+/*
+ * Compute the QR factorization of a matrix.
+ * Return -1 on internal error, 0 on success, > 0 on failure.
+ */
+static int
+numba_raw_xgeqrf(char kind, Py_ssize_t m, Py_ssize_t n, void *a, Py_ssize_t
+                 lda, void *tau, void *work, Py_ssize_t lwork, F_INT *info)
+{
+    void *raw_func = NULL;
+    F_INT _m, _n, _lda, _lwork;
+
+    ENSURE_VALID_KIND(kind)
+
+    switch (kind)
+    {
+        case 's':
+            raw_func = get_clapack_sgeqrf();
+            break;
+        case 'd':
+            raw_func = get_clapack_dgeqrf();
+            break;
+        case 'c':
+            raw_func = get_clapack_cgeqrf();
+            break;
+        case 'z':
+            raw_func = get_clapack_zgeqrf();
+            break;
+    }
+    ENSURE_VALID_FUNC(raw_func)
+
+    _m = (F_INT) m;
+    _n = (F_INT) n;
+    _lda = (F_INT) lda;
+    _lwork = (F_INT) lwork;
+
+    (*(xgeqrf_t) raw_func)(&_m, &_n, a, &_lda, tau, work, &_lwork, info);
+    return 0;
+}
+
+/*
+ * Compute the QR factorization of a matrix.
+ * This routine hides the type and general complexity involved with making the
+ * xgeqrf calls. The work space computation and error handling etc is hidden.
+ * Args are as per LAPACK.
+ */
+NUMBA_EXPORT_FUNC(int)
+numba_ez_geqrf(char kind, Py_ssize_t m, Py_ssize_t n, void *a, Py_ssize_t
+               lda, void *tau)
+{
+    F_INT info = 0;
+    Py_ssize_t lwork = -1;
+    size_t base_size = -1;
+    all_dtypes stack_slot;
+    void *work = NULL;
+
+    base_size = kind_size(kind);
+
+    work = &stack_slot;
+
+    /* Compute optimal work size (lwork) */
+    numba_raw_xgeqrf(kind, m, n, a, lda, tau, work, lwork, &info);
+    CATCH_LAPACK_INVALID_ARG("numba_raw_xgeqrf", info);
+
+    /* Allocate work array */
+    lwork = cast_from_X(kind, work);
+    if (checked_PyMem_RawMalloc(&work, base_size * lwork))
+        return STATUS_ERROR;
+
+    numba_raw_xgeqrf(kind, m, n, a, lda, tau, work, lwork, &info);
+    PyMem_RawFree(work);
+    CATCH_LAPACK_INVALID_ARG("numba_raw_xgeqrf", info);
+
+    return 0; /* info cannot be >0 */
+
+}
+
+
+/*
+ * Compute the orthogonal Q matrix (in QR) from elementary relectors.
+ */
+static int
+numba_raw_xxxgqr(char kind, Py_ssize_t m, Py_ssize_t n, Py_ssize_t k, void *a,
+                 Py_ssize_t lda, void *tau, void * work, Py_ssize_t lwork, F_INT *info)
+{
+    void *raw_func = NULL;
+    F_INT _m, _n, _k, _lda, _lwork;
+
+    ENSURE_VALID_KIND(kind)
+
+    switch (kind)
+    {
+        case 's':
+            raw_func = get_clapack_sorgqr();
+            break;
+        case 'd':
+            raw_func = get_clapack_dorgqr();
+            break;
+        case 'c':
+            raw_func = get_clapack_cungqr();
+            break;
+        case 'z':
+            raw_func = get_clapack_zungqr();
+            break;
+    }
+    ENSURE_VALID_FUNC(raw_func)
+
+    _m = (F_INT) m;
+    _n = (F_INT) n;
+    _k = (F_INT) k;
+    _lda = (F_INT) lda;
+    _lwork = (F_INT) lwork;
+
+    (*(xxxgqr_t) raw_func)(&_m, &_n, &_k, a, &_lda, tau, work, &_lwork, info);
+    return 0;
+}
+
+
+/*
+ * Compute the orthogonal Q matrix (in QR) from elementary reflectors.
+ * This routine hides the type and general complexity involved with making the
+ * x{or,un}qrf calls. The work space computation and error handling etc is
+ * hidden. Args are as per LAPACK.
+ */
+NUMBA_EXPORT_FUNC(int)
+numba_ez_xxgqr(char kind, Py_ssize_t m, Py_ssize_t n, Py_ssize_t k, void *a,
+               Py_ssize_t lda, void *tau)
+{
+    F_INT info = 0;
+    Py_ssize_t lwork = -1;
+    size_t base_size = -1;
+    all_dtypes stack_slot;
+    void *work = NULL;
+
+    work = &stack_slot;
+
+    /* Compute optimal work size (lwork) */
+    numba_raw_xxxgqr(kind, m, n, k, a, lda, tau, work, lwork, &info);
+    CATCH_LAPACK_INVALID_ARG("numba_raw_xxxgqr", info);
+
+    base_size = kind_size(kind);
+
+    /* Allocate work array */
+    lwork = cast_from_X(kind, work);
+    if (checked_PyMem_RawMalloc(&work, base_size * lwork))
+        return STATUS_ERROR;
+
+    numba_raw_xxxgqr(kind, m, n, k, a, lda, tau, work, lwork, &info);
+    PyMem_RawFree(work);
+    CATCH_LAPACK_INVALID_ARG("numba_raw_xxxgqr", info);
+
+    return 0;  /* info cannot be >0 */
+
+}
+
+
+/*
+ * Compute the minimum-norm solution to a real linear least squares problem.
+ */
+static int
+numba_raw_rgelsd(char kind, Py_ssize_t m, Py_ssize_t n, Py_ssize_t nrhs,
+                 void *a, Py_ssize_t lda, void *b, Py_ssize_t ldb, void *S,
+                 void * rcond, Py_ssize_t * rank, void * work,
+                 Py_ssize_t lwork, F_INT *iwork, F_INT *info)
+{
+    void *raw_func = NULL;
+    F_INT _m, _n, _nrhs, _lda, _ldb, _rank, _lwork;
+
+    ENSURE_VALID_REAL_KIND(kind)
+
+    switch (kind)
+    {
+        case 's':
+            raw_func = get_clapack_sgelsd();
+            break;
+        case 'd':
+            raw_func = get_clapack_dgelsd();
+            break;
+    }
+    ENSURE_VALID_FUNC(raw_func)
+
+    _m = (F_INT) m;
+    _n = (F_INT) n;
+    _nrhs = (F_INT) nrhs;
+    _lda = (F_INT) lda;
+    _ldb = (F_INT) ldb;
+    _lwork = (F_INT) lwork;
+
+    (*(rgelsd_t) raw_func)(&_m, &_n, &_nrhs, a, &_lda, b, &_ldb, S, rcond,
+                           &_rank, work, &_lwork, iwork, info);
+    *rank = (Py_ssize_t) _rank;
+    return 0;
+}
+
+/*
+ * Compute the minimum-norm solution to a real linear least squares problem.
+ * This routine hides the type and general complexity involved with making the
+ * {s,d}gelsd calls. The work space computation and error handling etc is
+ * hidden. Args are as per LAPACK.
+ */
+static int
+numba_ez_rgelsd(char kind, Py_ssize_t m, Py_ssize_t n, Py_ssize_t nrhs,
+                void *a, Py_ssize_t lda, void *b, Py_ssize_t ldb, void *S,
+                double rcond, Py_ssize_t * rank)
+{
+    F_INT info = 0;
+    Py_ssize_t lwork = -1;
+    size_t base_size = -1;
+    all_dtypes stack_slot;
+    void *work = NULL, *rcond_cast = NULL;
+    F_INT *iwork = NULL;
+    F_INT iwork_tmp;
+    float tmpf;
+
+    ENSURE_VALID_REAL_KIND(kind)
+
+    base_size = kind_size(kind);
+
+    work = &stack_slot;
+    rcond_cast = work; /* stop checks on null ptr complaining */
+
+    /* Compute optimal work size (lwork) */
+    numba_raw_rgelsd(kind, m, n, nrhs, a, lda, b, ldb, S, rcond_cast, rank,
+                     work, lwork, &iwork_tmp, &info);
+    CATCH_LAPACK_INVALID_ARG("numba_raw_rgelsd", info);
+
+    /* Allocate work array */
+    lwork = cast_from_X(kind, work);
+    if (checked_PyMem_RawMalloc(&work, base_size * lwork))
+        return STATUS_ERROR;
+
+    /* Allocate iwork array */
+    if (checked_PyMem_RawMalloc((void **)&iwork, sizeof(F_INT) * iwork_tmp))
+    {
+        PyMem_RawFree(work);
+        return STATUS_ERROR;
+    }
+
+    /* cast rcond to the right type */
+    switch (kind)
+    {
+        case 's':
+            tmpf = (float)rcond;
+            rcond_cast = (void * )&tmpf;
+            break;
+        case 'd':
+            rcond_cast = (void * )&rcond;
+            break;
+    }
+
+    numba_raw_rgelsd(kind, m, n, nrhs, a, lda, b, ldb, S, rcond_cast, rank,
+                     work, lwork, iwork, &info);
+    PyMem_RawFree(work);
+    PyMem_RawFree(iwork);
+    CATCH_LAPACK_INVALID_ARG("numba_raw_rgelsd", info);
+
+    return (int)info;
+}
+
+
+/*
+ * Compute the minimum-norm solution to a complex linear least squares problem.
+ */
+static int
+numba_raw_cgelsd(char kind, Py_ssize_t m, Py_ssize_t n, Py_ssize_t nrhs,
+                 void *a, Py_ssize_t lda, void *b, Py_ssize_t ldb, void *S,
+                 void *rcond, Py_ssize_t * rank, void * work,
+                 Py_ssize_t lwork, void * rwork, F_INT *iwork, F_INT *info)
+{
+    void *raw_func = NULL;
+    F_INT _m, _n, _nrhs, _lda, _ldb, _rank, _lwork;
+
+    ENSURE_VALID_COMPLEX_KIND(kind)
+
+    switch (kind)
+    {
+        case 'c':
+            raw_func = get_clapack_cgelsd();
+            break;
+        case 'z':
+            raw_func = get_clapack_zgelsd();
+            break;
+    }
+    ENSURE_VALID_FUNC(raw_func)
+
+    _m = (F_INT) m;
+    _n = (F_INT) n;
+    _nrhs = (F_INT) nrhs;
+    _lda = (F_INT) lda;
+    _ldb = (F_INT) ldb;
+    _lwork = (F_INT) lwork;
+
+    (*(cgelsd_t) raw_func)(&_m, &_n, &_nrhs, a, &_lda, b, &_ldb, S, rcond,
+                           &_rank, work, &_lwork, rwork, iwork, info);
+    *rank = (Py_ssize_t) _rank;
+    return 0;
+}
+
+
+/*
+ * Compute the minimum-norm solution to a complex linear least squares problem.
+ * This routine hides the type and general complexity involved with making the
+ * {c,z}gelsd calls. The work space computation and error handling etc is
+ * hidden. Args are as per LAPACK.
+ */
+static int
+numba_ez_cgelsd(char kind, Py_ssize_t m, Py_ssize_t n, Py_ssize_t nrhs,
+                void *a, Py_ssize_t lda, void *b, Py_ssize_t ldb, void *S,
+                double rcond, Py_ssize_t * rank)
+{
+    F_INT info = 0;
+    Py_ssize_t lwork = -1;
+    size_t base_size = -1;
+    all_dtypes stack_slot1, stack_slot2;
+    size_t real_base_size = 0;
+    void *work = NULL, *rwork = NULL, *rcond_cast = NULL;
+    Py_ssize_t lrwork;
+    F_INT *iwork = NULL;
+    F_INT iwork_tmp;
+    char real_kind = '-';
+    float tmpf;
+
+    ENSURE_VALID_COMPLEX_KIND(kind)
+
+    base_size = kind_size(kind);
+
+    work = &stack_slot1;
+    rwork = &stack_slot2;
+    rcond_cast = work; /* stop checks on null ptr complaining */
+
+    /* Compute optimal work size */
+    numba_raw_cgelsd(kind, m, n, nrhs, a, lda, b, ldb, S, rcond_cast, rank,
+                     work, lwork, rwork, &iwork_tmp, &info);
+    CATCH_LAPACK_INVALID_ARG("numba_raw_cgelsd", info);
+
+    /* Allocate work array */
+    lwork = cast_from_X(kind, work);
+    if (checked_PyMem_RawMalloc(&work, base_size * lwork))
+        return STATUS_ERROR;
+
+    /* Allocate iwork array */
+    if (checked_PyMem_RawMalloc((void **)&iwork, sizeof(F_INT) * iwork_tmp))
+    {
+        PyMem_RawFree(work);
+        return STATUS_ERROR;
+    }
+
+    switch (kind)
+    {
+        case 'c':
+            real_kind = 's';
+            tmpf = (float)rcond;
+            rcond_cast = (void * )&tmpf;
+            break;
+        case 'z':
+            real_kind = 'd';
+            rcond_cast = (void * )&rcond;
+            break;
+    }
+
+    real_base_size = kind_size(real_kind);
+
+    lrwork = cast_from_X(real_kind, rwork);
+    if (checked_PyMem_RawMalloc((void **)&rwork, real_base_size * lrwork))
+    {
+        PyMem_RawFree(work);
+        PyMem_RawFree(iwork);
+        return STATUS_ERROR;
+    }
+
+    numba_raw_cgelsd(kind, m, n, nrhs, a, lda, b, ldb, S, rcond_cast, rank,
+                     work, lwork, rwork, iwork, &info);
+    PyMem_RawFree(work);
+    PyMem_RawFree(rwork);
+    PyMem_RawFree(iwork);
+    CATCH_LAPACK_INVALID_ARG("numba_raw_cgelsd", info);
+
+    return (int)info;
+}
+
+
+/*
+ * Compute the minimum-norm solution to a linear least squares problems.
+ * This routine hides the type and general complexity involved with making the
+ * calls to *gelsd. The work space computation and error handling etc is hidden.
+ * Args are as per LAPACK.
+ */
+NUMBA_EXPORT_FUNC(int)
+numba_ez_gelsd(char kind, Py_ssize_t m, Py_ssize_t n, Py_ssize_t nrhs,
+               void *a, Py_ssize_t lda, void *b, Py_ssize_t ldb, void *S,
+               double rcond, Py_ssize_t * rank)
+{
+    ENSURE_VALID_KIND(kind)
+
+    switch (kind)
+    {
+        case 's':
+        case 'd':
+            return numba_ez_rgelsd(kind, m, n, nrhs, a, lda, b, ldb, S, rcond,
+                                   rank);
+        case 'c':
+        case 'z':
+            return numba_ez_cgelsd(kind, m, n, nrhs, a, lda, b, ldb, S, rcond,
+                                   rank);
+    }
+    return STATUS_ERROR; /* unreachable */
+}
+
+
+/*
+ * Compute the solution to a system of linear equations
+ */
+NUMBA_EXPORT_FUNC(int)
+numba_xgesv(char kind, Py_ssize_t n, Py_ssize_t nrhs, void *a, Py_ssize_t lda,
+            F_INT *ipiv, void *b, Py_ssize_t ldb)
+{
+    void *raw_func = NULL;
+    F_INT _n, _nrhs, _lda, _ldb, info;
+
+    ENSURE_VALID_KIND(kind)
+
+    switch (kind)
+    {
+        case 's':
+            raw_func = get_clapack_sgesv();
+            break;
+        case 'd':
+            raw_func = get_clapack_dgesv();
+            break;
+        case 'c':
+            raw_func = get_clapack_cgesv();
+            break;
+        case 'z':
+            raw_func = get_clapack_zgesv();
+            break;
+    }
+
+    ENSURE_VALID_FUNC(raw_func)
+
+    _n = (F_INT) n;
+    _nrhs = (F_INT) nrhs;
+    _lda = (F_INT) lda;
+    _ldb = (F_INT) ldb;
+
+    (*(xgesv_t) raw_func)(&_n, &_nrhs, a, &_lda, ipiv, b, &_ldb, &info);
+    CATCH_LAPACK_INVALID_ARG("xgesv", info);
+
+    return (int)info;
+}
+
+/* undef defines and macros */
+#undef STATUS_SUCCESS
+#undef STATUS_ERROR
+#undef ENSURE_VALID_KIND
+#undef ENSURE_VALID_REAL_KIND
+#undef ENSURE_VALID_COMPLEX_KIND
+#undef ENSURE_VALID_FUNC
+#undef F_INT
+#undef EMIT_GET_CLAPACK_FUNC
+#undef CATCH_LAPACK_INVALID_ARG
--- a/numba/_numba_common.h
+++ b/numba/_numba_common.h
+#ifndef NUMBA_COMMON_H_
+#define NUMBA_COMMON_H_
+
+/* __has_attribute() is a clang / gcc-5 macro */
+#ifndef __has_attribute
+#   define __has_attribute(x) 0
+#endif
+
+/* This attribute marks symbols that can be shared across C objects
+ * but are not exposed outside of a shared library or executable.
+ * Note this is default behaviour for global symbols under Windows.
+ */
+#if defined(_MSC_VER)
+    #define VISIBILITY_HIDDEN
+    #define VISIBILITY_GLOBAL __declspec(dllexport)
+#elif (__has_attribute(visibility) || (defined(__GNUC__) && __GNUC__ >= 4))
+    #define VISIBILITY_HIDDEN __attribute__ ((visibility("hidden")))
+    #define VISIBILITY_GLOBAL __attribute__ ((visibility("default")))
+#else
+    #define VISIBILITY_HIDDEN
+    #define VISIBILITY_GLOBAL
+#endif
+
+/*
+ * Numba's version of the PyArray_DescrCheck macro from NumPy, use it as a
+ * direct replacement of NumPy's PyArray_DescrCheck to ensure binary
+ * compatibility.
+ *
+ * Details of why this is needed:
+ * NumPy 1.18 changed the definition of the PyArray_DescrCheck macro here:
+ * https://github.com/numpy/numpy/commit/6108b5d1e138d07e3c9f2a4e3b1933749ad0e698
+ * the result of this being that building against NumPy <1.18 would prevent
+ * Numba running against NumPy >= 1.20 as noted here:
+ * https://github.com/numba/numba/issues/6041#issuecomment-665132199
+ *
+ * This macro definition is copied from:
+ * https://github.com/numpy/numpy/commit/6108b5d1e138d07e3c9f2a4e3b1933749ad0e698#diff-ad2213da23136c5fc5883d9eb2d88666R26
+ *
+ * NOTE: This is the NumPy 1.18 and above version of the macro.
+ */
+#define NUMBA_PyArray_DescrCheck(op) PyObject_TypeCheck(op, &PyArrayDescr_Type)
+
+#endif /* NUMBA_COMMON_H_ */
--- a/numba/_pymodule.h
+++ b/numba/_pymodule.h
+#ifndef NUMBA_PY_MODULE_H_
+#define NUMBA_PY_MODULE_H_
+
+#define PY_SSIZE_T_CLEAN
+
+#include "Python.h"
+#include "structmember.h"
+#include "frameobject.h"
+
+#define MOD_ERROR_VAL NULL
+#define MOD_SUCCESS_VAL(val) val
+#define MOD_INIT(name) PyMODINIT_FUNC PyInit_##name(void)
+#define MOD_DEF(ob, name, doc, methods) { \
+        static struct PyModuleDef moduledef = { \
+          PyModuleDef_HEAD_INIT, name, doc, -1, methods, NULL, NULL, NULL, NULL }; \
+        ob = PyModule_Create(&moduledef); }
+#define MOD_INIT_EXEC(name) PyInit_##name();
+
+#define PyString_AsString PyUnicode_AsUTF8
+#define PyString_Check PyUnicode_Check
+#define PyString_FromFormat PyUnicode_FromFormat
+#define PyString_FromString PyUnicode_FromString
+#define PyString_InternFromString PyUnicode_InternFromString
+#define PyInt_Type PyLong_Type
+#define PyInt_Check PyLong_Check
+#define PyInt_CheckExact PyLong_CheckExact
+#define SetAttrStringFromVoidPointer(m, name) do { \
+        PyObject *tmp = PyLong_FromVoidPtr((void *) &name); \
+        PyObject_SetAttrString(m, #name, tmp); \
+        Py_DECREF(tmp); } while (0)
+
+#endif /* NUMBA_PY_MODULE_H_ */
--- a/numba/_random.c
+++ b/numba/_random.c
+/*
+ * PRNG support.
+ */
+
+#ifdef _MSC_VER
+#define HAVE_PTHREAD_ATFORK 0
+#else
+#define HAVE_PTHREAD_ATFORK 1
+#include <pthread.h>
+#endif
+
+
+/* Magic Mersenne Twister constants */
+#define MT_N 624
+#define MT_M 397
+#define MT_MATRIX_A 0x9908b0dfU
+#define MT_UPPER_MASK 0x80000000U
+#define MT_LOWER_MASK 0x7fffffffU
+
+/*
+ * Note this structure is accessed in numba.targets.randomimpl,
+ * any changes here should be reflected there too.
+ */
+typedef struct {
+    int index;
+    /* unsigned int is sufficient on modern machines as we only need 32 bits */
+    unsigned int mt[MT_N];
+    int has_gauss;
+    double gauss;
+    int is_initialized;
+} rnd_state_t;
+
+/* Some code portions below from CPython's _randommodule.c, some others
+   from Numpy's and Jean-Sebastien Roy's randomkit.c. */
+
+NUMBA_EXPORT_FUNC(void)
+numba_rnd_shuffle(rnd_state_t *state)
+{
+    int i;
+    unsigned int y;
+
+    for (i = 0; i < MT_N - MT_M; i++) {
+        y = (state->mt[i] & MT_UPPER_MASK) | (state->mt[i+1] & MT_LOWER_MASK);
+        state->mt[i] = state->mt[i+MT_M] ^ (y >> 1) ^
+                       (-(int) (y & 1) & MT_MATRIX_A);
+    }
+    for (; i < MT_N - 1; i++) {
+        y = (state->mt[i] & MT_UPPER_MASK) | (state->mt[i+1] & MT_LOWER_MASK);
+        state->mt[i] = state->mt[i+(MT_M-MT_N)] ^ (y >> 1) ^
+                       (-(int) (y & 1) & MT_MATRIX_A);
+    }
+    y = (state->mt[MT_N - 1] & MT_UPPER_MASK) | (state->mt[0] & MT_LOWER_MASK);
+    state->mt[MT_N - 1] = state->mt[MT_M - 1] ^ (y >> 1) ^
+                          (-(int) (y & 1) & MT_MATRIX_A);
+}
+
+/* Initialize mt[] with an integer seed */
+NUMBA_EXPORT_FUNC(void)
+numba_rnd_init(rnd_state_t *state, unsigned int seed)
+{
+    unsigned int pos;
+    seed &= 0xffffffffU;
+
+    /* Knuth's PRNG as used in the Mersenne Twister reference implementation */
+    for (pos = 0; pos < MT_N; pos++) {
+        state->mt[pos] = seed;
+        seed = (1812433253U * (seed ^ (seed >> 30)) + pos + 1) & 0xffffffffU;
+    }
+    state->index = MT_N;
+    state->has_gauss = 0;
+    state->gauss = 0.0;
+    state->is_initialized = 1;
+}
+
+/* Perturb mt[] with a key array */
+static void
+rnd_init_by_array(rnd_state_t *state, unsigned int init_key[], size_t key_length)
+{
+    size_t i, j, k;
+    unsigned int *mt = state->mt;
+
+    numba_rnd_init(state, 19650218U);
+    i = 1; j = 0;
+    k = (MT_N > key_length ? MT_N : key_length);
+    for (; k; k--) {
+        mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1664525U))
+                 + init_key[j] + (unsigned int) j; /* non linear */
+        mt[i] &= 0xffffffffU;
+        i++; j++;
+        if (i >= MT_N) { mt[0] = mt[MT_N - 1]; i = 1; }
+        if (j >= key_length) j = 0;
+    }
+    for (k = MT_N - 1; k; k--) {
+        mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1566083941U))
+                 - (unsigned int) i; /* non linear */
+        mt[i] &= 0xffffffffU;
+        i++;
+        if (i >= MT_N) { mt[0] = mt[MT_N - 1]; i=1; }
+    }
+
+    mt[0] = 0x80000000U; /* MSB is 1; ensuring non-zero initial array */
+    state->index = MT_N;
+    state->has_gauss = 0;
+    state->gauss = 0.0;
+    state->is_initialized = 1;
+}
+
+/*
+ * Management of thread-local random state.
+ */
+
+static int rnd_globally_initialized;
+
+#ifdef _MSC_VER
+#define THREAD_LOCAL(ty) __declspec(thread) ty
+#else
+/* Non-standard C99 extension that's understood by gcc and clang */
+#define THREAD_LOCAL(ty) __thread ty
+#endif
+
+static THREAD_LOCAL(rnd_state_t) numba_py_random_state;
+static THREAD_LOCAL(rnd_state_t) numba_np_random_state;
+static THREAD_LOCAL(rnd_state_t) numba_internal_random_state;
+
+/* Seed the state with random bytes */
+static int
+rnd_seed_with_bytes(rnd_state_t *state, Py_buffer *buf)
+{
+    unsigned int *keys;
+    unsigned char *bytes;
+    size_t i, nkeys;
+
+    nkeys = buf->len / sizeof(unsigned int);
+    keys = (unsigned int *) PyMem_Malloc(nkeys * sizeof(unsigned int));
+    if (keys == NULL) {
+        PyBuffer_Release(buf);
+        return -1;
+    }
+    bytes = (unsigned char *) buf->buf;
+    /* Convert input bytes to int32 keys, without violating alignment
+     * constraints.
+     */
+    for (i = 0; i < nkeys; i++, bytes += 4) {
+        keys[i] =
+            ((unsigned int)bytes[3] << 24) +
+            ((unsigned int)bytes[2] << 16) +
+            ((unsigned int)bytes[1] << 8) +
+            ((unsigned int)bytes[0] << 0);
+    }
+    PyBuffer_Release(buf);
+    rnd_init_by_array(state, keys, nkeys);
+    PyMem_Free(keys);
+    return 0;
+}
+
+#if HAVE_PTHREAD_ATFORK
+/* After a fork(), the child should reseed its random states.
+ * Since only the main thread survives in the child, it's enough to mark
+ * the current thread-local states as uninitialized.
+ */
+static void
+rnd_atfork_child(void)
+{
+    numba_py_random_state.is_initialized = 0;
+    numba_np_random_state.is_initialized = 0;
+    numba_internal_random_state.is_initialized = 0;
+}
+#endif
+
+/* Global initialization routine.  It must be called as early as possible.
+ */
+NUMBA_EXPORT_FUNC(void)
+numba_rnd_ensure_global_init(void)
+{
+    if (!rnd_globally_initialized) {
+#if HAVE_PTHREAD_ATFORK
+        pthread_atfork(NULL, NULL, rnd_atfork_child);
+#endif
+        numba_py_random_state.is_initialized = 0;
+        numba_np_random_state.is_initialized = 0;
+        numba_internal_random_state.is_initialized = 0;
+        rnd_globally_initialized = 1;
+    }
+}
+
+/* First-time init a random state */
+static void
+rnd_implicit_init(rnd_state_t *state)
+{
+    /* Initialize with random bytes.  The easiest way to get good-quality
+     * cross-platform random bytes is still to call os.urandom()
+     * using the Python interpreter...
+     */
+    PyObject *module, *bufobj;
+    Py_buffer buf;
+    PyGILState_STATE gilstate = PyGILState_Ensure();
+
+    module = PyImport_ImportModuleNoBlock("os");
+    if (module == NULL)
+        goto error;
+    /* Read as many bytes as necessary to get the full entropy
+     * exploitable by the MT generator.
+     */
+    bufobj = PyObject_CallMethod(module, "urandom", "i",
+                                 (int) (MT_N * sizeof(unsigned int)));
+    Py_DECREF(module);
+    if (bufobj == NULL)
+        goto error;
+    if (PyObject_GetBuffer(bufobj, &buf, PyBUF_SIMPLE))
+        goto error;
+    Py_DECREF(bufobj);
+    if (rnd_seed_with_bytes(state, &buf))
+        goto error;
+    /* state->is_initialized is set now */
+
+    PyGILState_Release(gilstate);
+    return;
+
+error:
+    /* In normal conditions, os.urandom() and PyMem_Malloc() shouldn't fail,
+     * and we don't want the caller to deal with errors, so just bail out.
+     */
+    if (PyErr_Occurred())
+        PyErr_Print();
+    Py_FatalError(NULL);
+}
+
+/* Functions returning the thread-local random state pointer.
+ * The LLVM JIT doesn't support thread-local variables so we rely
+ * on the C compiler instead.
+ */
+
+NUMBA_EXPORT_FUNC(rnd_state_t *)
+numba_get_py_random_state(void)
+{
+    rnd_state_t *state = &numba_py_random_state;
+    if (!state->is_initialized)
+        rnd_implicit_init(state);
+    return state;
+}
+
+NUMBA_EXPORT_FUNC(rnd_state_t *)
+numba_get_np_random_state(void)
+{
+    rnd_state_t *state = &numba_np_random_state;
+    if (!state->is_initialized)
+        rnd_implicit_init(state);
+    return state;
+}
+
+NUMBA_EXPORT_FUNC(rnd_state_t *)
+numba_get_internal_random_state(void)
+{
+    rnd_state_t *state = &numba_internal_random_state;
+    if (!state->is_initialized)
+        rnd_implicit_init(state);
+    return state;
+}
+
+/*
+ * Python-exposed helpers for state management and testing.
+ */
+static int
+rnd_state_converter(PyObject *obj, rnd_state_t **state)
+{
+    *state = (rnd_state_t *) PyLong_AsVoidPtr(obj);
+    return (*state != NULL || !PyErr_Occurred());
+}
+
+NUMBA_EXPORT_FUNC(PyObject *)
+_numba_rnd_get_py_state_ptr(PyObject *self)
+{
+    return PyLong_FromVoidPtr(numba_get_py_random_state());
+}
+
+NUMBA_EXPORT_FUNC(PyObject *)
+_numba_rnd_get_np_state_ptr(PyObject *self)
+{
+    return PyLong_FromVoidPtr(numba_get_np_random_state());
+}
+
+NUMBA_EXPORT_FUNC(PyObject *)
+_numba_rnd_shuffle(PyObject *self, PyObject *arg)
+{
+    rnd_state_t *state;
+    if (!rnd_state_converter(arg, &state))
+        return NULL;
+    numba_rnd_shuffle(state);
+    Py_RETURN_NONE;
+}
+
+NUMBA_EXPORT_FUNC(PyObject *)
+_numba_rnd_set_state(PyObject *self, PyObject *args)
+{
+    int i, index;
+    rnd_state_t *state;
+    PyObject *tuplearg, *intlist;
+
+    if (!PyArg_ParseTuple(args, "O&O!:rnd_set_state",
+                          rnd_state_converter, &state,
+                          &PyTuple_Type, &tuplearg))
+        return NULL;
+    if (!PyArg_ParseTuple(tuplearg, "iO!", &index, &PyList_Type, &intlist))
+        return NULL;
+    if (PyList_GET_SIZE(intlist) != MT_N) {
+        PyErr_SetString(PyExc_ValueError, "list object has wrong size");
+        return NULL;
+    }
+    state->index = index;
+    for (i = 0; i < MT_N; i++) {
+        PyObject *v = PyList_GET_ITEM(intlist, i);
+        unsigned long x = PyLong_AsUnsignedLong(v);
+        if (x == (unsigned long) -1 && PyErr_Occurred())
+            return NULL;
+        state->mt[i] = (unsigned int) x;
+    }
+    state->has_gauss = 0;
+    state->gauss = 0.0;
+    state->is_initialized = 1;
+    Py_RETURN_NONE;
+}
+
+NUMBA_EXPORT_FUNC(PyObject *)
+_numba_rnd_get_state(PyObject *self, PyObject *arg)
+{
+    PyObject *intlist;
+    int i;
+    rnd_state_t *state;
+    if (!rnd_state_converter(arg, &state))
+        return NULL;
+
+    intlist = PyList_New(MT_N);
+    if (intlist == NULL)
+        return NULL;
+    for (i = 0; i < MT_N; i++) {
+        PyObject *v = PyLong_FromUnsignedLong(state->mt[i]);
+        if (v == NULL) {
+            Py_DECREF(intlist);
+            return NULL;
+        }
+        PyList_SET_ITEM(intlist, i, v);
+    }
+    return Py_BuildValue("iN", state->index, intlist);
+}
+
+NUMBA_EXPORT_FUNC(PyObject *)
+_numba_rnd_seed(PyObject *self, PyObject *args)
+{
+    unsigned int seed;
+    rnd_state_t *state;
+
+    if (!PyArg_ParseTuple(args, "O&I:rnd_seed",
+                          rnd_state_converter, &state, &seed)) {
+        /* rnd_seed_*(bytes-like object) */
+        Py_buffer buf;
+
+        PyErr_Clear();
+        if (!PyArg_ParseTuple(args, "O&s*:rnd_seed",
+                              rnd_state_converter, &state, &buf))
+            return NULL;
+
+        if (rnd_seed_with_bytes(state, &buf))
+            return NULL;
+        else
+            Py_RETURN_NONE;
+    }
+    else {
+        /* rnd_seed_*(int32) */
+        numba_rnd_init(state, seed);
+        Py_RETURN_NONE;
+    }
+}
+
+/*
+ * Random distribution helpers.
+ * Most code straight from Numpy's distributions.c.
+ */
+
+#ifndef M_PI
+#define M_PI 3.14159265358979323846264338328
+#endif
+
+NUMBA_EXPORT_FUNC(unsigned int)
+get_next_int32(rnd_state_t *state)
+{
+    unsigned int y;
+
+    if (state->index == MT_N) {
+        numba_rnd_shuffle(state);
+        state->index = 0;
+    }
+    y = state->mt[state->index++];
+    /* Tempering */
+    y ^= (y >> 11);
+    y ^= (y << 7) & 0x9d2c5680U;
+    y ^= (y << 15) & 0xefc60000U;
+    y ^= (y >> 18);
+    return y;
+}
+
+NUMBA_EXPORT_FUNC(double)
+get_next_double(rnd_state_t *state)
+{
+    double a = get_next_int32(state) >> 5;
+    double b = get_next_int32(state) >> 6;
+    return (a * 67108864.0 + b) / 9007199254740992.0;
+}
+
+NUMBA_EXPORT_FUNC(double)
+loggam(double x)
+{
+    double x0, x2, xp, gl, gl0;
+    long k, n;
+
+    static double a[10] = {8.333333333333333e-02,-2.777777777777778e-03,
+         7.936507936507937e-04,-5.952380952380952e-04,
+         8.417508417508418e-04,-1.917526917526918e-03,
+         6.410256410256410e-03,-2.955065359477124e-02,
+         1.796443723688307e-01,-1.39243221690590e+00};
+    x0 = x;
+    n = 0;
+    if ((x == 1.0) || (x == 2.0))
+    {
+        return 0.0;
+    }
+    else if (x <= 7.0)
+    {
+        n = (long)(7 - x);
+        x0 = x + n;
+    }
+    x2 = 1.0/(x0*x0);
+    xp = 2*M_PI;
+    gl0 = a[9];
+    for (k=8; k>=0; k--)
+    {
+        gl0 *= x2;
+        gl0 += a[k];
+    }
+    gl = gl0/x0 + 0.5*log(xp) + (x0-0.5)*log(x0) - x0;
+    if (x <= 7.0)
+    {
+        for (k=1; k<=n; k++)
+        {
+            gl -= log(x0-1.0);
+            x0 -= 1.0;
+        }
+    }
+    return gl;
+}
+
+
+NUMBA_EXPORT_FUNC(int64_t)
+numba_poisson_ptrs(rnd_state_t *state, double lam)
+{
+    /* This method is invoked only if the parameter lambda of this
+     * distribution is big enough ( >= 10 ). The algorithm used is
+     * described in "Hörmann, W. 1992. 'The Transformed Rejection
+     * Method for Generating Poisson Random Variables'.
+     * The implementation comes straight from Numpy.
+     */
+    int64_t k;
+    double U, V, slam, loglam, a, b, invalpha, vr, us;
+
+    slam = sqrt(lam);
+    loglam = log(lam);
+    b = 0.931 + 2.53*slam;
+    a = -0.059 + 0.02483*b;
+    invalpha = 1.1239 + 1.1328/(b-3.4);
+    vr = 0.9277 - 3.6224/(b-2);
+
+    while (1)
+    {
+        U = get_next_double(state) - 0.5;
+        V = get_next_double(state);
+        us = 0.5 - fabs(U);
+        k = (int64_t) floor((2*a/us + b)*U + lam + 0.43);
+        if ((us >= 0.07) && (V <= vr))
+        {
+            return k;
+        }
+        if ((k < 0) ||
+            ((us < 0.013) && (V > us)))
+        {
+            continue;
+        }
+        if ((log(V) + log(invalpha) - log(a/(us*us)+b)) <=
+            (-lam + (double) k*loglam - loggam((double) k+1)))
+        {
+            return k;
+        }
+    }
+}
--- a/numba/_typeof.cpp
+++ b/numba/_typeof.cpp
+#include "_pymodule.h"
+
+#include <string.h>
+#include <time.h>
+#include <assert.h>
+
+#include "_numba_common.h"
+#include "_typeof.h"
+#include "_hashtable.h"
+#include "_devicearray.h"
+#include "pyerrors.h"
+
+#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
+#include <numpy/ndarrayobject.h>
+
+
+/* Cached typecodes for basic scalar types */
+static int tc_int8;
+static int tc_int16;
+static int tc_int32;
+static int tc_int64;
+static int tc_uint8;
+static int tc_uint16;
+static int tc_uint32;
+static int tc_uint64;
+static int tc_float32;
+static int tc_float64;
+static int tc_complex64;
+static int tc_complex128;
+static int BASIC_TYPECODES[12];
+
+static int tc_intp;
+
+/* The type object for the numba .dispatcher.OmittedArg class
+ * that wraps omitted arguments.
+ */
+static PyObject *omittedarg_type;
+
+static PyObject *typecache;
+static PyObject *ndarray_typecache;
+static PyObject *structured_dtypes;
+
+static PyObject *str_typeof_pyval = NULL;
+static PyObject *str_value = NULL;
+static PyObject *str_numba_type = NULL;
+
+/* CUDA device array API */
+void **DeviceArray_API;
+
+/*
+ * Type fingerprint computation.
+ */
+
+typedef struct {
+    /* A buffer the fingerprint will be written to */
+    char *buf;
+    size_t n;
+    size_t allocated;
+    /* A preallocated buffer, sufficient to fit the fingerprint for most types */
+    char static_buf[40];
+} string_writer_t;
+
+static void
+string_writer_init(string_writer_t *w)
+{
+    w->buf = w->static_buf;
+    w->n = 0;
+    w->allocated = sizeof(w->static_buf) / sizeof(unsigned char);
+}
+
+static void
+string_writer_clear(string_writer_t *w)
+{
+    if (w->buf != w->static_buf)
+        free(w->buf);
+}
+
+static void
+string_writer_move(string_writer_t *dest, const string_writer_t *src)
+{
+    dest->n = src->n;
+    dest->allocated = src->allocated;
+    if (src->buf == src->static_buf) {
+        dest->buf = dest->static_buf;
+        memcpy(dest->buf, src->buf, src->n);
+    }
+    else {
+        dest->buf = src->buf;
+    }
+}
+
+/* Ensure at least *bytes* can be appended to the string writer's buffer. */
+static int
+string_writer_ensure(string_writer_t *w, size_t bytes)
+{
+    size_t newsize;
+    bytes += w->n;
+    if (bytes <= w->allocated)
+        return 0;
+    newsize = (w->allocated << 2) + 1;
+    if (newsize < bytes)
+        newsize = bytes;
+    if (w->buf == w->static_buf)
+        w->buf = (char *) malloc(newsize);
+    else
+        w->buf = (char *) realloc(w->buf, newsize);
+    if (w->buf) {
+        w->allocated = newsize;
+        return 0;
+    }
+    else {
+        PyErr_NoMemory();
+        return -1;
+    }
+}
+
+static int
+string_writer_put_char(string_writer_t *w, unsigned char c)
+{
+    if (string_writer_ensure(w, 1))
+        return -1;
+    w->buf[w->n++] = c;
+    return 0;
+}
+
+static int
+string_writer_put_int32(string_writer_t *w, unsigned int v)
+{
+    if (string_writer_ensure(w, 4))
+        return -1;
+    w->buf[w->n] = v & 0xff;
+    w->buf[w->n + 1] = (v >> 8) & 0xff;
+    w->buf[w->n + 2] = (v >> 16) & 0xff;
+    w->buf[w->n + 3] = (v >> 24) & 0xff;
+    w->n += 4;
+    return 0;
+}
+
+static int
+string_writer_put_intp(string_writer_t *w, npy_intp v)
+{
+    if (string_writer_ensure(w, NPY_SIZEOF_PY_INTPTR_T))
+        return -1;
+    w->buf[w->n] = v & 0xff;
+    w->buf[w->n + 1] = (v >> 8) & 0xff;
+    w->buf[w->n + 2] = (v >> 16) & 0xff;
+    w->buf[w->n + 3] = (v >> 24) & 0xff;
+#if NPY_SIZEOF_PY_INTPTR_T == 8
+    w->buf[w->n + 4] = (v >> 32) & 0xff;
+    w->buf[w->n + 5] = (v >> 40) & 0xff;
+    w->buf[w->n + 6] = (v >> 48) & 0xff;
+    w->buf[w->n + 7] = (v >> 56) & 0xff;
+#endif
+    w->n += NPY_SIZEOF_PY_INTPTR_T;
+    return 0;
+}
+
+static int
+string_writer_put_string(string_writer_t *w, const char *s)
+{
+    if (s == NULL) {
+        return string_writer_put_char(w, 0);
+    }
+    else {
+        size_t N = strlen(s) + 1;
+        if (string_writer_ensure(w, N))
+            return -1;
+        memcpy(w->buf + w->n, s, N);
+        w->n += N;
+        return 0;
+    }
+}
+
+enum opcode {
+    OP_START_TUPLE = '(',
+    OP_END_TUPLE = ')',
+    OP_INT = 'i',
+    OP_FLOAT = 'f',
+    OP_COMPLEX = 'c',
+    OP_BOOL = '?',
+    OP_OMITTED = '!',
+
+    OP_BYTEARRAY = 'a',
+    OP_BYTES = 'b',
+    OP_NONE = 'n',
+    OP_LIST = '[',
+    OP_SET = '{',
+
+    OP_BUFFER = 'B',
+    OP_NP_SCALAR = 'S',
+    OP_NP_ARRAY = 'A',
+    OP_NP_DTYPE = 'D'
+};
+
+#define TRY(func, w, arg) \
+    do { \
+        if (func(w, arg)) return -1; \
+    } while (0)
+
+
+static int
+fingerprint_unrecognized(void)
+{
+    PyErr_SetString(PyExc_NotImplementedError,
+                    "cannot compute type fingerprint for value");
+    return -1;
+}
+
+static int
+compute_dtype_fingerprint(string_writer_t *w, PyArray_Descr *descr)
+{
+    int typenum = descr->type_num;
+    if (typenum < NPY_OBJECT)
+        return string_writer_put_char(w, (char) typenum);
+    if (typenum == NPY_VOID) {
+        /* Structured dtype: serialize the dtype pointer.  Unfortunately,
+         * some structured dtypes can be ephemeral, so we have to
+         * intern them to avoid pointer reuse and fingerprint collisions.
+         * (e.g. np.recarray(dtype=some_dtype) creates a new dtype
+         *  equal to some_dtype)
+         */
+        PyObject *interned = PyDict_GetItem(structured_dtypes,
+                                            (PyObject *) descr);
+        if (interned == NULL) {
+            interned = (PyObject *) descr;
+            if (PyDict_SetItem(structured_dtypes, interned, interned))
+                return -1;
+        }
+        TRY(string_writer_put_char, w, (char) typenum);
+        return string_writer_put_intp(w, (npy_intp) interned);
+    }
+#if NPY_API_VERSION >= 0x00000007
+    if (PyTypeNum_ISDATETIME(typenum)) {
+        PyArray_DatetimeMetaData *md;
+        md = &(((PyArray_DatetimeDTypeMetaData *)descr->c_metadata)->meta);
+        TRY(string_writer_put_char, w, (char) typenum);
+        TRY(string_writer_put_char, w, (char) md->base);
+        return string_writer_put_int32(w, (char) md->num);
+    }
+#endif
+
+    return fingerprint_unrecognized();
+}
+
+static int
+compute_fingerprint(string_writer_t *w, PyObject *val)
+{
+    /*
+     * Implementation note: for performance, we start with common
+     * types that can be tested with fast checks.
+     */
+    if (val == Py_None)
+        return string_writer_put_char(w, OP_NONE);
+    if (PyBool_Check(val))
+        return string_writer_put_char(w, OP_BOOL);
+    /* Note we avoid matching int subclasses such as IntEnum */
+    if (PyInt_CheckExact(val) || PyLong_CheckExact(val))
+        return string_writer_put_char(w, OP_INT);
+    if (PyFloat_Check(val))
+        return string_writer_put_char(w, OP_FLOAT);
+    if (PyComplex_CheckExact(val))
+        return string_writer_put_char(w, OP_COMPLEX);
+    if (PyTuple_Check(val)) {
+        if(PyTuple_CheckExact(val)) {
+            Py_ssize_t i, n;
+            n = PyTuple_GET_SIZE(val);
+            TRY(string_writer_put_char, w, OP_START_TUPLE);
+            for (i = 0; i < n; i++)
+                TRY(compute_fingerprint, w, PyTuple_GET_ITEM(val, i));
+            TRY(string_writer_put_char, w, OP_END_TUPLE);
+            return 0;
+        }
+        /* as per typeof.py, check "_asdict" for namedtuple. */
+        else if(PyObject_HasAttrString(val, "_asdict"))
+        {
+            /*
+             * This encodes the class name and field names of a namedtuple into
+             * the fingerprint on the condition that the number of fields is
+             * small (<10) and that the class name and field names are encodable
+             * as ASCII.
+             */
+            PyObject * clazz = NULL;
+            PyObject * name = NULL;
+            PyObject * _fields =  PyObject_GetAttrString(val, "_fields");
+            PyObject * field = NULL;
+            PyObject * ascii_str = NULL;
+            Py_ssize_t i, n, j, flen;
+            char * buf = NULL;
+            int ret;
+
+            clazz = PyObject_GetAttrString(val, "__class__");
+            if (clazz == NULL)
+                return -1;
+
+            name = PyObject_GetAttrString(clazz, "__name__");
+            Py_DECREF(clazz);
+            if (name == NULL)
+                return -1;
+
+            ascii_str = PyUnicode_AsEncodedString(name, "ascii", "ignore");
+            Py_DECREF(name);
+            if (ascii_str == NULL)
+                return -1;
+            ret = PyBytes_AsStringAndSize(ascii_str, &buf, &flen);
+
+            if (ret == -1)
+                return -1;
+            for(j = 0; j < flen; j++) {
+                TRY(string_writer_put_char, w, buf[j]);
+            }
+            Py_DECREF(ascii_str);
+
+            if (_fields == NULL)
+                return -1;
+
+            n = PyTuple_GET_SIZE(val);
+
+            TRY(string_writer_put_char, w, OP_START_TUPLE);
+            for (i = 0; i < n; i++) {
+                field = PyTuple_GET_ITEM(_fields, i);
+                if (field == NULL)
+                    return -1;
+                ascii_str = PyUnicode_AsEncodedString(field, "ascii", "ignore");
+                if (ascii_str == NULL)
+                    return -1;
+                ret = PyBytes_AsStringAndSize(ascii_str, &buf, &flen);
+                if (ret == -1)
+                    return -1;
+                for(j = 0; j < flen; j++) {
+                    TRY(string_writer_put_char, w, buf[j]);
+                }
+                Py_DECREF(ascii_str);
+                TRY(compute_fingerprint, w, PyTuple_GET_ITEM(val, i));
+            }
+            TRY(string_writer_put_char, w, OP_END_TUPLE);
+            Py_DECREF(_fields);
+            return 0;
+        }
+    }
+    if (PyBytes_Check(val))
+        return string_writer_put_char(w, OP_BYTES);
+    if (PyByteArray_Check(val))
+        return string_writer_put_char(w, OP_BYTEARRAY);
+    if ((PyObject *) Py_TYPE(val) == omittedarg_type) {
+        PyObject *default_val = PyObject_GetAttr(val, str_value);
+        if (default_val == NULL)
+            return -1;
+        TRY(string_writer_put_char, w, OP_OMITTED);
+        TRY(compute_fingerprint, w, default_val);
+        Py_DECREF(default_val);
+        return 0;
+    }
+    if (PyArray_IsScalar(val, Generic)) {
+        /* Note: PyArray_DescrFromScalar() may be a bit slow on
+           non-trivial types. */
+        PyArray_Descr *descr = PyArray_DescrFromScalar(val);
+        if (descr == NULL)
+            return -1;
+        TRY(string_writer_put_char, w, OP_NP_SCALAR);
+        TRY(compute_dtype_fingerprint, w, descr);
+        Py_DECREF(descr);
+        return 0;
+    }
+    if (PyArray_Check(val)) {
+        PyArrayObject *ary = (PyArrayObject *) val;
+        int ndim = PyArray_NDIM(ary);
+
+        TRY(string_writer_put_char, w, OP_NP_ARRAY);
+        TRY(string_writer_put_int32, w, ndim);
+        if (PyArray_IS_C_CONTIGUOUS(ary))
+            TRY(string_writer_put_char, w, 'C');
+        else if (PyArray_IS_F_CONTIGUOUS(ary))
+            TRY(string_writer_put_char, w, 'F');
+        else
+            TRY(string_writer_put_char, w, 'A');
+        if (PyArray_ISWRITEABLE(ary))
+            TRY(string_writer_put_char, w, 'W');
+        else
+            TRY(string_writer_put_char, w, 'R');
+        return compute_dtype_fingerprint(w, PyArray_DESCR(ary));
+    }
+    if (PyList_Check(val)) {
+        Py_ssize_t n = PyList_GET_SIZE(val);
+        if (n == 0) {
+            PyErr_SetString(PyExc_ValueError,
+                            "cannot compute fingerprint of empty list");
+            return -1;
+        }
+        /* Only the first item is considered, as in typeof.py */
+        TRY(string_writer_put_char, w, OP_LIST);
+        TRY(compute_fingerprint, w, PyList_GET_ITEM(val, 0));
+        return 0;
+    }
+    /* Note we only accept sets, not frozensets */
+    if (Py_TYPE(val) == &PySet_Type) {
+        Py_hash_t h;
+        PyObject *item;
+        Py_ssize_t pos = 0;
+        /* Only one item is considered, as in typeof.py */
+        if (!_PySet_NextEntry(val, &pos, &item, &h)) {
+            /* Empty set */
+            PyErr_SetString(PyExc_ValueError,
+                            "cannot compute fingerprint of empty set");
+            return -1;
+        }
+        TRY(string_writer_put_char, w, OP_SET);
+        TRY(compute_fingerprint, w, item);
+        return 0;
+    }
+    if (PyObject_CheckBuffer(val)) {
+        Py_buffer buf;
+        int flags = PyBUF_ND | PyBUF_STRIDES | PyBUF_FORMAT;
+        char contig;
+        int ndim;
+        char readonly;
+
+        /* Attempt to get a writable buffer, then fallback on read-only */
+        if (PyObject_GetBuffer(val, &buf, flags | PyBUF_WRITABLE)) {
+            PyErr_Clear();
+            if (PyObject_GetBuffer(val, &buf, flags))
+                goto _unrecognized;
+        }
+        if (PyBuffer_IsContiguous(&buf, 'C'))
+            contig = 'C';
+        else if (PyBuffer_IsContiguous(&buf, 'F'))
+            contig = 'F';
+        else
+            contig = 'A';
+        ndim = buf.ndim;
+        readonly = buf.readonly ? 'R' : 'W';
+        if (string_writer_put_char(w, OP_BUFFER) ||
+            string_writer_put_int32(w, ndim) ||
+            string_writer_put_char(w, contig) ||
+            string_writer_put_char(w, readonly) ||
+            string_writer_put_string(w, buf.format) ||
+            /* We serialize the object's Python type as well, to
+               distinguish between types which have Numba specializations
+               (e.g. array.array() vs. memoryview)
+            */
+            string_writer_put_intp(w, (npy_intp) Py_TYPE(val))) {
+            PyBuffer_Release(&buf);
+            return -1;
+        }
+        PyBuffer_Release(&buf);
+        return 0;
+    }
+    if (NUMBA_PyArray_DescrCheck(val)) {
+        TRY(string_writer_put_char, w, OP_NP_DTYPE);
+        return compute_dtype_fingerprint(w, (PyArray_Descr *) val);
+    }
+
+_unrecognized:
+    /* Type not recognized */
+    return fingerprint_unrecognized();
+}
+
+PyObject *
+typeof_compute_fingerprint(PyObject *val)
+{
+    PyObject *res;
+    string_writer_t w;
+
+    string_writer_init(&w);
+
+    if (compute_fingerprint(&w, val))
+        goto error;
+    res = PyBytes_FromStringAndSize(w.buf, w.n);
+
+    string_writer_clear(&w);
+    return res;
+
+error:
+    string_writer_clear(&w);
+    return NULL;
+}
+
+/*
+ * Getting the typecode from a Type object.
+ */
+static int
+_typecode_from_type_object(PyObject *tyobj) {
+    int typecode;
+    PyObject *tmpcode = PyObject_GetAttrString(tyobj, "_code");
+    if (tmpcode == NULL) {
+        return -1;
+    }
+    typecode = PyLong_AsLong(tmpcode);
+    Py_DECREF(tmpcode);
+    return typecode;
+}
+
+/* When we want to cache the type's typecode for later lookup, we need to
+   keep a reference to the returned type object so that it cannot be
+   deleted. This is because of the following events occurring when first
+   using a @jit function for a given set of types:
+
+    1. typecode_fallback requests a new typecode for an arbitrary Python value;
+       this implies creating a Numba type object (on the first dispatcher call);
+       the typecode cache is then populated.
+    2. matching of the typecode list in _dispatcherimpl.cpp fails, since the
+       typecode is new.
+    3. we have to compile: compile_and_invoke() is called, it will invoke
+       Dispatcher_Insert to register the new signature.
+
+   The reference to the Numba type object returned in step 1 is deleted as
+   soon as we call Py_DECREF() on it, since we are holding the only
+   reference. If this happens and we use the typecode we got to populate the
+   cache, then the cache won't ever return the correct typecode, and the
+   dispatcher will never successfully match the typecodes with those of
+   some already-compiled instance. So we need to make sure that we don't
+   call Py_DECREF() on objects whose typecode will be used to populate the
+   cache. This is ensured by calling _typecode_fallback with
+   retain_reference == 0.
+
+   Note that technically we are leaking the reference, since we do not continue
+   to hold a pointer to the type object that we get back from typeof_pyval.
+   However, we don't need to refer to it again, we just need to make sure that
+   it is never deleted.
+*/
+static int
+_typecode_fallback(PyObject *dispatcher, PyObject *val,
+                   int retain_reference) {
+    PyObject *numba_type;
+    int typecode;
+
+    /*
+     * For values that define "_numba_type_", which holds a numba Type
+     * instance that should be used as the type of the value.
+     * Note this is done here, not in typeof_typecode(), so that
+     * some values can still benefit from fingerprint caching.
+     */
+    if (PyObject_HasAttr(val, str_numba_type)) {
+        numba_type = PyObject_GetAttrString(val, "_numba_type_");
+        if (!numba_type)
+            return -1;
+    }
+    else {
+        // Go back to the interpreter
+        numba_type = PyObject_CallMethodObjArgs((PyObject *) dispatcher,
+                                                str_typeof_pyval, val, NULL);
+    }
+    if (!numba_type)
+        return -1;
+    typecode = _typecode_from_type_object(numba_type);
+    if (!retain_reference)
+        Py_DECREF(numba_type);
+    return typecode;
+}
+
+/* Variations on _typecode_fallback for convenience */
+
+static
+int typecode_fallback(PyObject *dispatcher, PyObject *val) {
+    return _typecode_fallback(dispatcher, val, 0);
+}
+
+static
+int typecode_fallback_keep_ref(PyObject *dispatcher, PyObject *val) {
+    return _typecode_fallback(dispatcher, val, 1);
+}
+
+
+/* A cache mapping fingerprints (string_writer_t *) to typecodes (int). */
+static _Numba_hashtable_t *fingerprint_hashtable = NULL;
+
+static Py_uhash_t
+hash_writer(const void *key)
+{
+    string_writer_t *writer = (string_writer_t *) key;
+    Py_uhash_t x = 0;
+
+    /* The old FNV algorithm used by Python 2 */
+    if (writer->n > 0) {
+        unsigned char *p = (unsigned char *) writer->buf;
+        Py_ssize_t len = writer->n;
+        x ^= *p << 7;
+        while (--len >= 0)
+            x = (1000003*x) ^ *p++;
+        x ^= writer->n;
+        if (x == (Py_uhash_t) -1)
+            x = -2;
+    }
+    return x;
+}
+
+static int
+compare_writer(const void *key, const _Numba_hashtable_entry_t *entry)
+{
+    string_writer_t *v = (string_writer_t *) key;
+    string_writer_t *w = (string_writer_t *) entry->key;
+    if (v->n != w->n)
+        return 0;
+    return memcmp(v->buf, w->buf, v->n) == 0;
+}
+
+/* Try to compute *val*'s typecode using its fingerprint and the
+ * fingerprint->typecode cache.
+ */
+static int
+typecode_using_fingerprint(PyObject *dispatcher, PyObject *val)
+{
+    int typecode;
+    string_writer_t w;
+
+    string_writer_init(&w);
+
+    if (compute_fingerprint(&w, val)) {
+        string_writer_clear(&w);
+        if (PyErr_ExceptionMatches(PyExc_NotImplementedError)) {
+            /* Can't compute a type fingerprint for the given value,
+               fall back on typeof() without caching. */
+            PyErr_Clear();
+            return typecode_fallback(dispatcher, val);
+        }
+        return -1;
+    }
+    if (_Numba_HASHTABLE_GET(fingerprint_hashtable, &w, typecode) > 0) {
+        /* Cache hit */
+        string_writer_clear(&w);
+        return typecode;
+    }
+
+    /* Not found in cache: invoke pure Python typeof() and cache result.
+     * Note we have to keep the type alive forever as explained
+     * above in _typecode_fallback().
+     */
+    typecode = typecode_fallback_keep_ref(dispatcher, val);
+    if (typecode >= 0) {
+        string_writer_t *key = (string_writer_t *) malloc(sizeof(string_writer_t));
+        if (key == NULL) {
+            string_writer_clear(&w);
+            PyErr_NoMemory();
+            return -1;
+        }
+        /* Ownership of the string writer's buffer will be transferred
+         * to the hash table.
+         */
+        string_writer_move(key, &w);
+        if (_Numba_HASHTABLE_SET(fingerprint_hashtable, key, typecode)) {
+            string_writer_clear(&w);
+            PyErr_NoMemory();
+            return -1;
+        }
+    }
+    return typecode;
+}
+
+
+/*
+ * Direct lookup table for extra-fast typecode resolution of simple array types.
+ */
+
+#define N_DTYPES 12
+#define N_NDIM 5    /* Fast path for up to 5D array */
+#define N_LAYOUT 3
+static int cached_arycode[N_NDIM][N_LAYOUT][N_DTYPES];
+
+/* Convert a Numpy dtype number to an internal index into cached_arycode.
+   The returned value must also be a valid index into BASIC_TYPECODES. */
+static int dtype_num_to_typecode(int type_num) {
+    int dtype;
+    switch(type_num) {
+    case NPY_INT8:
+        dtype = 0;
+        break;
+    case NPY_INT16:
+        dtype = 1;
+        break;
+    case NPY_INT32:
+        dtype = 2;
+        break;
+    case NPY_INT64:
+        dtype = 3;
+        break;
+    case NPY_UINT8:
+        dtype = 4;
+        break;
+    case NPY_UINT16:
+        dtype = 5;
+        break;
+    case NPY_UINT32:
+        dtype = 6;
+        break;
+    case NPY_UINT64:
+        dtype = 7;
+        break;
+    case NPY_FLOAT32:
+        dtype = 8;
+        break;
+    case NPY_FLOAT64:
+        dtype = 9;
+        break;
+    case NPY_COMPLEX64:
+        dtype = 10;
+        break;
+    case NPY_COMPLEX128:
+        dtype = 11;
+        break;
+    default:
+        /* Type not included in the global lookup table */
+        dtype = -1;
+    }
+    return dtype;
+}
+
+static
+int get_cached_typecode(PyArray_Descr* descr) {
+    PyObject* tmpobject = PyDict_GetItem(typecache, (PyObject*)descr);
+    if (tmpobject == NULL)
+        return -1;
+
+    return PyLong_AsLong(tmpobject);
+}
+
+static
+void cache_typecode(PyArray_Descr* descr, int typecode) {
+    PyObject* value = PyLong_FromLong(typecode);
+    PyDict_SetItem(typecache, (PyObject*)descr, value);
+    Py_DECREF(value);
+}
+
+static
+PyObject* ndarray_key(int ndim, int layout, PyArray_Descr* descr) {
+    PyObject* tmpndim = PyLong_FromLong(ndim);
+    PyObject* tmplayout = PyLong_FromLong(layout);
+    PyObject* key = PyTuple_Pack(3, tmpndim, tmplayout, descr);
+    Py_DECREF(tmpndim);
+    Py_DECREF(tmplayout);
+    return key;
+}
+
+static
+int get_cached_ndarray_typecode(int ndim, int layout, PyArray_Descr* descr) {
+    PyObject* key = ndarray_key(ndim, layout, descr);
+    PyObject *tmpobject = PyDict_GetItem(ndarray_typecache, key);
+    if (tmpobject == NULL)
+        return -1;
+
+    Py_DECREF(key);
+    return PyLong_AsLong(tmpobject);
+}
+
+static
+void cache_ndarray_typecode(int ndim, int layout, PyArray_Descr* descr,
+                            int typecode) {
+    PyObject* key = ndarray_key(ndim, layout, descr);
+    PyObject* value = PyLong_FromLong(typecode);
+    PyDict_SetItem(ndarray_typecache, key, value);
+    Py_DECREF(key);
+    Py_DECREF(value);
+}
+
+static
+int typecode_ndarray(PyObject *dispatcher, PyArrayObject *ary) {
+    int typecode;
+    int dtype;
+    int ndim = PyArray_NDIM(ary);
+    int layout = 0;
+
+    /* The order in which we check for the right contiguous-ness is important.
+       The order must match the order by numba.numpy_support.map_layout.
+       Further, only *contiguous-ness* is checked, not alignment, byte order or
+       write permissions.
+    */
+    if (PyArray_IS_C_CONTIGUOUS(ary)){
+        layout = 1;
+    } else if (PyArray_IS_F_CONTIGUOUS(ary)) {
+        layout = 2;
+    }
+
+    /* the typecode cache by convention is for "behaved" arrays (aligned and
+     * writeable), all others must be forced to the fall back */
+    if (!PyArray_ISBEHAVED(ary)) goto FALLBACK;
+
+    if (ndim <= 0 || ndim > N_NDIM) goto FALLBACK;
+
+    dtype = dtype_num_to_typecode(PyArray_TYPE(ary));
+    if (dtype == -1) goto FALLBACK;
+
+    /* Fast path, using direct table lookup */
+    assert(layout < N_LAYOUT);
+    assert(ndim <= N_NDIM);
+    assert(dtype < N_DTYPES);
+
+    typecode = cached_arycode[ndim - 1][layout][dtype];
+    if (typecode == -1) {
+        /* First use of this table entry, so it requires populating */
+        typecode = typecode_fallback_keep_ref(dispatcher, (PyObject*)ary);
+        cached_arycode[ndim - 1][layout][dtype] = typecode;
+    }
+    return typecode;
+
+FALLBACK:
+    /* Slower path, for non-trivial array types */
+
+    /* If this isn't a structured array then we can't use the cache */
+    if (PyArray_TYPE(ary) != NPY_VOID)
+        return typecode_using_fingerprint(dispatcher, (PyObject *) ary);
+
+    /* Check type cache */
+    typecode = get_cached_ndarray_typecode(ndim, layout, PyArray_DESCR(ary));
+    if (typecode == -1) {
+        /* First use of this type, use fallback and populate the cache */
+        typecode = typecode_fallback_keep_ref(dispatcher, (PyObject*)ary);
+        cache_ndarray_typecode(ndim, layout, PyArray_DESCR(ary), typecode);
+    }
+    return typecode;
+}
+
+static
+int typecode_arrayscalar(PyObject *dispatcher, PyObject* aryscalar) {
+    int typecode;
+    PyArray_Descr *descr;
+    descr = PyArray_DescrFromScalar(aryscalar);
+    if (!descr)
+        return typecode_using_fingerprint(dispatcher, aryscalar);
+
+    /* Is it a structured scalar? */
+    if (descr->type_num == NPY_VOID) {
+        typecode = get_cached_typecode(descr);
+        if (typecode == -1) {
+            /* Resolve through fallback then populate cache */
+            typecode = typecode_fallback_keep_ref(dispatcher, aryscalar);
+            cache_typecode(descr, typecode);
+        }
+        Py_DECREF(descr);
+        return typecode;
+    }
+
+    /* Is it one of the well-known basic types? */
+    typecode = dtype_num_to_typecode(descr->type_num);
+    Py_DECREF(descr);
+    if (typecode == -1)
+        return typecode_using_fingerprint(dispatcher, aryscalar);
+    return BASIC_TYPECODES[typecode];
+}
+
+static
+int typecode_devicendarray(PyObject *dispatcher, PyObject *ary)
+{
+    int typecode;
+    int dtype;
+    int ndim;
+    int layout = 0;
+    PyObject *ndim_obj = nullptr;
+    PyObject *num_obj = nullptr;
+    PyObject *dtype_obj = nullptr;
+    int dtype_num = 0;
+
+    PyObject* flags = PyObject_GetAttrString(ary, "flags");
+    if (flags == NULL)
+    {
+        PyErr_Clear();
+        goto FALLBACK;
+    }
+
+    if (PyDict_GetItemString(flags, "C_CONTIGUOUS") == Py_True) {
+        layout = 1;
+    } else if (PyDict_GetItemString(flags, "F_CONTIGUOUS") == Py_True) {
+        layout = 2;
+    }
+
+    Py_DECREF(flags);
+
+    ndim_obj = PyObject_GetAttrString(ary, "ndim");
+    if (ndim_obj == NULL) {
+        /* If there's no ndim, try to proceed by clearing the error and using the
+         * fallback. */
+        PyErr_Clear();
+        goto FALLBACK;
+    }
+
+    ndim = PyLong_AsLong(ndim_obj);
+    Py_DECREF(ndim_obj);
+
+    if (PyErr_Occurred()) {
+        /* ndim wasn't an integer for some reason - unlikely to happen, but try
+         * the fallback. */
+        PyErr_Clear();
+        goto FALLBACK;
+    }
+
+    if (ndim <= 0 || ndim > N_NDIM)
+        goto FALLBACK;
+
+    dtype_obj = PyObject_GetAttrString(ary, "dtype");
+    if (dtype_obj == NULL) {
+        /* No dtype: try the fallback. */
+        PyErr_Clear();
+        goto FALLBACK;
+    }
+
+    num_obj = PyObject_GetAttrString(dtype_obj, "num");
+    Py_DECREF(dtype_obj);
+
+    if (num_obj == NULL) {
+        /* This strange dtype has no num - try the fallback. */
+        PyErr_Clear();
+        goto FALLBACK;
+    }
+
+    dtype_num = PyLong_AsLong(num_obj);
+    Py_DECREF(num_obj);
+
+    if (PyErr_Occurred()) {
+        /* num wasn't an integer for some reason - unlikely to happen, but try
+         * the fallback. */
+        PyErr_Clear();
+        goto FALLBACK;
+    }
+
+    dtype = dtype_num_to_typecode(dtype_num);
+    if (dtype == -1) {
+        /* Not a dtype we have in the global lookup table. */
+        goto FALLBACK;
+    }
+
+    /* Fast path, using direct table lookup */
+    assert(layout < N_LAYOUT);
+    assert(ndim <= N_NDIM);
+    assert(dtype < N_DTYPES);
+    typecode = cached_arycode[ndim - 1][layout][dtype];
+
+    if (typecode == -1) {
+        /* First use of this table entry, so it requires populating */
+        typecode = typecode_fallback_keep_ref(dispatcher, (PyObject*)ary);
+        cached_arycode[ndim - 1][layout][dtype] = typecode;
+    }
+
+    return typecode;
+
+FALLBACK:
+    /* Slower path, for non-trivial array types. At present this always uses
+       the fingerprinting to get the typecode. Future optimization might
+       implement a cache, but this would require some fast equivalent of
+       PyArray_DESCR for a device array. */
+
+    return typecode_using_fingerprint(dispatcher, (PyObject *) ary);
+}
+
+extern "C" int
+typeof_typecode(PyObject *dispatcher, PyObject *val)
+{
+    PyTypeObject *tyobj = Py_TYPE(val);
+    int subtype_attr;
+    /* This needs to be kept in sync with Dispatcher.typeof_pyval(),
+     * otherwise funny things may happen.
+     */
+    if (tyobj == &PyInt_Type || tyobj == &PyLong_Type) {
+#if SIZEOF_VOID_P < 8
+        /* On 32-bit platforms, choose between tc_intp (32-bit) and tc_int64 */
+        PY_LONG_LONG ll = PyLong_AsLongLong(val);
+        if (ll == -1 && PyErr_Occurred()) {
+            /* The integer is too large, let us truncate it */
+            PyErr_Clear();
+            return tc_int64;
+        }
+        if ((ll & 0xffffffff) != ll)
+            return tc_int64;
+#endif
+        return tc_intp;
+    }
+    else if (tyobj == &PyFloat_Type)
+        return tc_float64;
+    else if (tyobj == &PyComplex_Type)
+        return tc_complex128;
+    /* Array scalar handling */
+    else if (PyArray_CheckScalar(val)) {
+        return typecode_arrayscalar(dispatcher, val);
+    }
+    /* Array handling */
+    else if (tyobj == &PyArray_Type) {
+        return typecode_ndarray(dispatcher, (PyArrayObject*)val);
+    }
+    /* Subtype of CUDA device array */
+    else if (PyType_IsSubtype(tyobj, &DeviceArrayType)) {
+        return typecode_devicendarray(dispatcher, val);
+    }
+    /* Subtypes of Array handling */
+    else if (PyType_IsSubtype(tyobj, &PyArray_Type)) {
+        /* By default, Numba will treat all numpy.ndarray subtypes as if they
+           were the base numpy.ndarray type.  In this way, ndarray subtypes
+           can easily use all of the support that Numba has for ndarray
+           methods.
+           EXPERIMENTAL: There may be cases where a programmer would NOT want
+           ndarray subtypes to be treated exactly like the base numpy.ndarray.
+           For this purpose, a currently experimental feature allows a
+           programmer to add an attribute named
+           __numba_array_subtype_dispatch__ to their ndarray subtype.  This
+           attribute can have any value as Numba only checks for the presence
+           of the attribute and not its value.  When present, a ndarray subtype
+           will NOT be typed by Numba as a regular ndarray but this code will
+           fallthrough to the typecode_using_fingerprint call, which will
+           create a new unique Numba typecode for this ndarray subtype.  This
+           behavior has several significant effects.  First, since this
+           ndarray subtype will be treated as a different type by Numba,
+           the Numba dispatcher would then specialize on this type.  So, if
+           there was a function that had several parameters that were
+           expected to be either numpy.ndarray or a subtype of ndarray, then
+           Numba would compile a custom version of this function for each
+           combination of base and subtypes that were actually passed to the
+           function.  Second, because this subtype would now be treated as
+           a totally separate type, it will cease to function in Numba unless
+           an implementation of that type is provided to Numba through the
+           Numba type extension mechanisms (e.g., overload).  This would
+           typically start with defining a Numba type corresponding to the
+           ndarray subtype. This is the same concept as how Numba has a
+           corollary of numpy.ndarray in its type system as types.Array.
+           Next, one would typically defining boxing and unboxing routines
+           and the associated memory model.  Then, overloads for NumPy
+           functions on that type would be created.  However,
+           if the same default array memory model is used then there are tricks
+           one can do to look at Numba's internal types.Array registries and
+           to quickly apply those to the subtype as well.  In this manner,
+           only those cases where the base ndarray and the ndarray subtype
+           behavior differ would new custom functions need to be written for
+           the subtype. Finally,
+           after adding support for the new type, you would have a separate
+           ndarray subtype that could operate with other objects of the same
+           subtype but would not support interoperation with regular NumPy
+           ndarrays.  In standard Python, this interoperation is provided
+           through the __array_ufunc__ magic method in the ndarray subtype
+           class and in that case the function operates on ndarrays or their
+           subtypes.  This idea is extended into Numba such that
+           __array_ufunc__ can be present in a Numba array type object.
+           In this case, this function is consulted during Numba typing and
+           so the arguments to __array_ufunc__ are Numba types instead of
+           ndarray subtypes.  The array type __array_ufunc__ returns the
+           type of the output of the given ufunc.
+         */
+        subtype_attr = PyObject_HasAttrString(val, "__numba_array_subtype_dispatch__");
+        if (!subtype_attr) {
+            return typecode_ndarray(dispatcher, (PyArrayObject*)val);
+        }
+    }
+
+    return typecode_using_fingerprint(dispatcher, val);
+}
+
+
+static
+void* wrap_import_array(void) {
+    import_array(); /* import array returns NULL on failure */
+    return (void*)1;
+}
+
+
+static
+int init_numpy(void) {
+    return wrap_import_array() != NULL;
+}
+
+
+/*
+ * typeof_init(omittedarg_type, typecode_dict)
+ * (called from dispatcher.py to fill in missing information)
+ */
+extern "C" PyObject *
+typeof_init(PyObject *self, PyObject *args)
+{
+    PyObject *tmpobj;
+    PyObject *dict;
+    int index = 0;
+
+    if (!PyArg_ParseTuple(args, "O!O!:typeof_init",
+                          &PyType_Type, &omittedarg_type,
+                          &PyDict_Type, &dict))
+        return NULL;
+
+    /* Initialize Numpy API */
+    if ( ! init_numpy() ) {
+        return NULL;
+    }
+
+    #define UNWRAP_TYPE(S)                                              \
+        if(!(tmpobj = PyDict_GetItemString(dict, #S))) return NULL;     \
+        else {  tc_##S = PyLong_AsLong(tmpobj);                         \
+                BASIC_TYPECODES[index++] = tc_##S;  }
+
+    UNWRAP_TYPE(int8)
+    UNWRAP_TYPE(int16)
+    UNWRAP_TYPE(int32)
+    UNWRAP_TYPE(int64)
+
+    UNWRAP_TYPE(uint8)
+    UNWRAP_TYPE(uint16)
+    UNWRAP_TYPE(uint32)
+    UNWRAP_TYPE(uint64)
+
+    UNWRAP_TYPE(float32)
+    UNWRAP_TYPE(float64)
+
+    UNWRAP_TYPE(complex64)
+    UNWRAP_TYPE(complex128)
+
+    switch(sizeof(void*)) {
+    case 4:
+        tc_intp = tc_int32;
+        break;
+    case 8:
+        tc_intp = tc_int64;
+        break;
+    default:
+        PyErr_SetString(PyExc_AssertionError, "sizeof(void*) != {4, 8}");
+        return NULL;
+    }
+
+    #undef UNWRAP_TYPE
+
+    typecache = PyDict_New();
+    ndarray_typecache = PyDict_New();
+    structured_dtypes = PyDict_New();
+    if (typecache == NULL || ndarray_typecache == NULL ||
+        structured_dtypes == NULL) {
+        PyErr_SetString(PyExc_RuntimeError, "failed to create type cache");
+        return NULL;
+    }
+
+    fingerprint_hashtable = _Numba_hashtable_new(sizeof(int),
+                                              hash_writer,
+                                              compare_writer);
+    if (fingerprint_hashtable == NULL) {
+        PyErr_NoMemory();
+        return NULL;
+    }
+
+    /* initialize cached_arycode to all ones (in bits) */
+    memset(cached_arycode, 0xFF, sizeof(cached_arycode));
+
+    str_typeof_pyval = PyString_InternFromString("typeof_pyval");
+    str_value = PyString_InternFromString("value");
+    str_numba_type = PyString_InternFromString("_numba_type_");
+    if (!str_value || !str_typeof_pyval || !str_numba_type)
+        return NULL;
+
+    Py_RETURN_NONE;
+}
--- a/numba/_typeof.h
+++ b/numba/_typeof.h
+#ifndef NUMBA_TYPEOF_H_
+#define NUMBA_TYPEOF_H_
+
+#ifdef __cplusplus
+    extern "C" {
+#endif
+
+extern PyObject *typeof_init(PyObject *self, PyObject *args);
+extern int typeof_typecode(PyObject *dispatcher, PyObject *val);
+extern PyObject *typeof_compute_fingerprint(PyObject *val);
+
+#ifdef __cplusplus
+    }
+#endif
+
+#endif  /* NUMBA_TYPEOF_H_ */
--- a/numba/_unicodetype_db.h
+++ b/numba/_unicodetype_db.h
--- a/numba/_version.py
+++ b/numba/_version.py
+
+# This file helps to compute a version number in source trees obtained from
+# git-archive tarball (such as those provided by githubs download-from-tag
+# feature). Distribution tarballs (built by setup.py sdist) and build
+# directories (produced by setup.py build) will contain a much shorter file
+# that just contains the computed version number.
+
+# This file is released into the public domain.
+# Generated by versioneer-0.28
+# https://github.com/python-versioneer/python-versioneer
+
+"""Git implementation of _version.py."""
+
+import errno
+import os
+import re
+import subprocess
+import sys
+from typing import Callable, Dict
+import functools
+
+
+def get_keywords():
+    """Get the keywords needed to look up the version information."""
+    # these strings will be replaced by git during git-archive.
+    # setup.py/versioneer.py will grep for the variable names, so they must
+    # each be defined on a line of their own. _version.py will just call
+    # get_keywords().
+    git_refnames = " (tag: 0.58.1, release0.58)"
+    git_full = "d4460feb8c91213e7b89f97b632d19e34a776cd3"
+    git_date = "2023-10-16 15:33:43 +0200"
+    keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
+    return keywords
+
+
+class VersioneerConfig:
+    """Container for Versioneer configuration parameters."""
+
+
+def get_config():
+    """Create, populate and return the VersioneerConfig() object."""
+    # these strings are filled in when 'setup.py versioneer' creates
+    # _version.py
+    cfg = VersioneerConfig()
+    cfg.VCS = "git"
+    cfg.style = "pep440"
+    cfg.tag_prefix = ""
+    cfg.parentdir_prefix = "numba-"
+    cfg.versionfile_source = "numba/_version.py"
+    cfg.verbose = False
+    return cfg
+
+
+class NotThisMethod(Exception):
+    """Exception raised if a method is not valid for the current scenario."""
+
+
+LONG_VERSION_PY: Dict[str, str] = {}
+HANDLERS: Dict[str, Dict[str, Callable]] = {}
+
+
+def register_vcs_handler(vcs, method):  # decorator
+    """Create decorator to mark a method as the handler of a VCS."""
+    def decorate(f):
+        """Store f in HANDLERS[vcs][method]."""
+        if vcs not in HANDLERS:
+            HANDLERS[vcs] = {}
+        HANDLERS[vcs][method] = f
+        return f
+    return decorate
+
+
+def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
+                env=None):
+    """Call the given command(s)."""
+    assert isinstance(commands, list)
+    process = None
+
+    popen_kwargs = {}
+    if sys.platform == "win32":
+        # This hides the console window if pythonw.exe is used
+        startupinfo = subprocess.STARTUPINFO()
+        startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
+        popen_kwargs["startupinfo"] = startupinfo
+
+    for command in commands:
+        try:
+            dispcmd = str([command] + args)
+            # remember shell=False, so use git.cmd on windows, not just git
+            process = subprocess.Popen([command] + args, cwd=cwd, env=env,
+                                       stdout=subprocess.PIPE,
+                                       stderr=(subprocess.PIPE if hide_stderr
+                                               else None), **popen_kwargs)
+            break
+        except OSError:
+            e = sys.exc_info()[1]
+            if e.errno == errno.ENOENT:
+                continue
+            if verbose:
+                print("unable to run %s" % dispcmd)
+                print(e)
+            return None, None
+    else:
+        if verbose:
+            print("unable to find command, tried %s" % (commands,))
+        return None, None
+    stdout = process.communicate()[0].strip().decode()
+    if process.returncode != 0:
+        if verbose:
+            print("unable to run %s (error)" % dispcmd)
+            print("stdout was %s" % stdout)
+        return None, process.returncode
+    return stdout, process.returncode
+
+
+def versions_from_parentdir(parentdir_prefix, root, verbose):
+    """Try to determine the version from the parent directory name.
+
+    Source tarballs conventionally unpack into a directory that includes both
+    the project name and a version string. We will also support searching up
+    two directory levels for an appropriately named parent directory
+    """
+    rootdirs = []
+
+    for _ in range(3):
+        dirname = os.path.basename(root)
+        if dirname.startswith(parentdir_prefix):
+            return {"version": dirname[len(parentdir_prefix):],
+                    "full-revisionid": None,
+                    "dirty": False, "error": None, "date": None}
+        rootdirs.append(root)
+        root = os.path.dirname(root)  # up a level
+
+    if verbose:
+        print("Tried directories %s but none started with prefix %s" %
+              (str(rootdirs), parentdir_prefix))
+    raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
+
+
+@register_vcs_handler("git", "get_keywords")
+def git_get_keywords(versionfile_abs):
+    """Extract version information from the given file."""
+    # the code embedded in _version.py can just fetch the value of these
+    # keywords. When used from setup.py, we don't want to import _version.py,
+    # so we do it with a regexp instead. This function is not used from
+    # _version.py.
+    keywords = {}
+    try:
+        with open(versionfile_abs, "r") as fobj:
+            for line in fobj:
+                if line.strip().startswith("git_refnames ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["refnames"] = mo.group(1)
+                if line.strip().startswith("git_full ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["full"] = mo.group(1)
+                if line.strip().startswith("git_date ="):
+                    mo = re.search(r'=\s*"(.*)"', line)
+                    if mo:
+                        keywords["date"] = mo.group(1)
+    except OSError:
+        pass
+    return keywords
+
+
+@register_vcs_handler("git", "keywords")
+def git_versions_from_keywords(keywords, tag_prefix, verbose):
+    """Get version information from git keywords."""
+    if "refnames" not in keywords:
+        raise NotThisMethod("Short version file found")
+    date = keywords.get("date")
+    if date is not None:
+        # Use only the last line.  Previous lines may contain GPG signature
+        # information.
+        date = date.splitlines()[-1]
+
+        # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
+        # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
+        # -like" string, which we must then edit to make compliant), because
+        # it's been around since git-1.5.3, and it's too difficult to
+        # discover which version we're using, or to work around using an
+        # older one.
+        date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+    refnames = keywords["refnames"].strip()
+    if refnames.startswith("$Format"):
+        if verbose:
+            print("keywords are unexpanded, not using")
+        raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
+    refs = {r.strip() for r in refnames.strip("()").split(",")}
+    # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
+    # just "foo-1.0". If we see a "tag: " prefix, prefer those.
+    TAG = "tag: "
+    tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
+    if not tags:
+        # Either we're using git < 1.8.3, or there really are no tags. We use
+        # a heuristic: assume all version tags have a digit. The old git %d
+        # expansion behaves like git log --decorate=short and strips out the
+        # refs/heads/ and refs/tags/ prefixes that would let us distinguish
+        # between branches and tags. By ignoring refnames without digits, we
+        # filter out many common branch names like "release" and
+        # "stabilization", as well as "HEAD" and "master".
+        tags = {r for r in refs if re.search(r'\d', r)}
+        if verbose:
+            print("discarding '%s', no digits" % ",".join(refs - tags))
+    if verbose:
+        print("likely tags: %s" % ",".join(sorted(tags)))
+    for ref in sorted(tags):
+        # sorting will prefer e.g. "2.0" over "2.0rc1"
+        if ref.startswith(tag_prefix):
+            r = ref[len(tag_prefix):]
+            # Filter out refs that exactly match prefix or that don't start
+            # with a number once the prefix is stripped (mostly a concern
+            # when prefix is '')
+            if not re.match(r'\d', r):
+                continue
+            if verbose:
+                print("picking %s" % r)
+            return {"version": r,
+                    "full-revisionid": keywords["full"].strip(),
+                    "dirty": False, "error": None,
+                    "date": date}
+    # no suitable tags, so version is "0+unknown", but full hex is still there
+    if verbose:
+        print("no suitable tags, using unknown + full revision id")
+    return {"version": "0+unknown",
+            "full-revisionid": keywords["full"].strip(),
+            "dirty": False, "error": "no suitable tags", "date": None}
+
+
+@register_vcs_handler("git", "pieces_from_vcs")
+def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
+    """Get version from 'git describe' in the root of the source tree.
+
+    This only gets called if the git-archive 'subst' keywords were *not*
+    expanded, and _version.py hasn't already been rewritten with a short
+    version string, meaning we're inside a checked out source tree.
+    """
+    GITS = ["git"]
+    if sys.platform == "win32":
+        GITS = ["git.cmd", "git.exe"]
+
+    # GIT_DIR can interfere with correct operation of Versioneer.
+    # It may be intended to be passed to the Versioneer-versioned project,
+    # but that should not change where we get our version from.
+    env = os.environ.copy()
+    env.pop("GIT_DIR", None)
+    runner = functools.partial(runner, env=env)
+
+    _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
+                   hide_stderr=not verbose)
+    if rc != 0:
+        if verbose:
+            print("Directory %s not under git control" % root)
+        raise NotThisMethod("'git rev-parse --git-dir' returned error")
+
+    # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
+    # if there isn't one, this yields HEX[-dirty] (no NUM)
+    describe_out, rc = runner(GITS, [
+        "describe", "--tags", "--dirty", "--always", "--long",
+        "--match", f"{tag_prefix}[[:digit:]]*"
+    ], cwd=root)
+    # --long was added in git-1.5.5
+    if describe_out is None:
+        raise NotThisMethod("'git describe' failed")
+    describe_out = describe_out.strip()
+    full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
+    if full_out is None:
+        raise NotThisMethod("'git rev-parse' failed")
+    full_out = full_out.strip()
+
+    pieces = {}
+    pieces["long"] = full_out
+    pieces["short"] = full_out[:7]  # maybe improved later
+    pieces["error"] = None
+
+    branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
+                             cwd=root)
+    # --abbrev-ref was added in git-1.6.3
+    if rc != 0 or branch_name is None:
+        raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
+    branch_name = branch_name.strip()
+
+    if branch_name == "HEAD":
+        # If we aren't exactly on a branch, pick a branch which represents
+        # the current commit. If all else fails, we are on a branchless
+        # commit.
+        branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
+        # --contains was added in git-1.5.4
+        if rc != 0 or branches is None:
+            raise NotThisMethod("'git branch --contains' returned error")
+        branches = branches.split("\n")
+
+        # Remove the first line if we're running detached
+        if "(" in branches[0]:
+            branches.pop(0)
+
+        # Strip off the leading "* " from the list of branches.
+        branches = [branch[2:] for branch in branches]
+        if "master" in branches:
+            branch_name = "master"
+        elif not branches:
+            branch_name = None
+        else:
+            # Pick the first branch that is returned. Good or bad.
+            branch_name = branches[0]
+
+    pieces["branch"] = branch_name
+
+    # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
+    # TAG might have hyphens.
+    git_describe = describe_out
+
+    # look for -dirty suffix
+    dirty = git_describe.endswith("-dirty")
+    pieces["dirty"] = dirty
+    if dirty:
+        git_describe = git_describe[:git_describe.rindex("-dirty")]
+
+    # now we have TAG-NUM-gHEX or HEX
+
+    if "-" in git_describe:
+        # TAG-NUM-gHEX
+        mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
+        if not mo:
+            # unparsable. Maybe git-describe is misbehaving?
+            pieces["error"] = ("unable to parse git-describe output: '%s'"
+                               % describe_out)
+            return pieces
+
+        # tag
+        full_tag = mo.group(1)
+        if not full_tag.startswith(tag_prefix):
+            if verbose:
+                fmt = "tag '%s' doesn't start with prefix '%s'"
+                print(fmt % (full_tag, tag_prefix))
+            pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
+                               % (full_tag, tag_prefix))
+            return pieces
+        pieces["closest-tag"] = full_tag[len(tag_prefix):]
+
+        # distance: number of commits since tag
+        pieces["distance"] = int(mo.group(2))
+
+        # commit: short hex revision ID
+        pieces["short"] = mo.group(3)
+
+    else:
+        # HEX: no tags
+        pieces["closest-tag"] = None
+        out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root)
+        pieces["distance"] = len(out.split())  # total number of commits
+
+    # commit date: see ISO-8601 comment in git_versions_from_keywords()
+    date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
+    # Use only the last line.  Previous lines may contain GPG signature
+    # information.
+    date = date.splitlines()[-1]
+    pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
+
+    return pieces
+
+
+def plus_or_dot(pieces):
+    """Return a + if we don't already have one, else return a ."""
+    if "+" in pieces.get("closest-tag", ""):
+        return "."
+    return "+"
+
+
+def render_pep440(pieces):
+    """Build up version string, with post-release "local version identifier".
+
+    Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
+    get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
+
+    Exceptions:
+    1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_branch(pieces):
+    """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
+
+    The ".dev0" means not master branch. Note that .dev0 sorts backwards
+    (a feature branch will appear "older" than the master branch).
+
+    Exceptions:
+    1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0"
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+untagged.%d.g%s" % (pieces["distance"],
+                                          pieces["short"])
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def pep440_split_post(ver):
+    """Split pep440 version string at the post-release segment.
+
+    Returns the release segments before the post-release and the
+    post-release version number (or -1 if no post-release segment is present).
+    """
+    vc = str.split(ver, ".post")
+    return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
+
+
+def render_pep440_pre(pieces):
+    """TAG[.postN.devDISTANCE] -- No -dirty.
+
+    Exceptions:
+    1: no tags. 0.post0.devDISTANCE
+    """
+    if pieces["closest-tag"]:
+        if pieces["distance"]:
+            # update the post release segment
+            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
+            rendered = tag_version
+            if post_version is not None:
+                rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"])
+            else:
+                rendered += ".post0.dev%d" % (pieces["distance"])
+        else:
+            # no commits, use the tag as the version
+            rendered = pieces["closest-tag"]
+    else:
+        # exception #1
+        rendered = "0.post0.dev%d" % pieces["distance"]
+    return rendered
+
+
+def render_pep440_post(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX] .
+
+    The ".dev0" means dirty. Note that .dev0 sorts backwards
+    (a dirty tree will appear "older" than the corresponding clean one),
+    but you shouldn't be releasing software with -dirty anyways.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+    return rendered
+
+
+def render_pep440_post_branch(pieces):
+    """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
+
+    The ".dev0" means not master branch.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["branch"] != "master":
+                rendered += ".dev0"
+            rendered += plus_or_dot(pieces)
+            rendered += "g%s" % pieces["short"]
+            if pieces["dirty"]:
+                rendered += ".dirty"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["branch"] != "master":
+            rendered += ".dev0"
+        rendered += "+g%s" % pieces["short"]
+        if pieces["dirty"]:
+            rendered += ".dirty"
+    return rendered
+
+
+def render_pep440_old(pieces):
+    """TAG[.postDISTANCE[.dev0]] .
+
+    The ".dev0" means dirty.
+
+    Exceptions:
+    1: no tags. 0.postDISTANCE[.dev0]
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"] or pieces["dirty"]:
+            rendered += ".post%d" % pieces["distance"]
+            if pieces["dirty"]:
+                rendered += ".dev0"
+    else:
+        # exception #1
+        rendered = "0.post%d" % pieces["distance"]
+        if pieces["dirty"]:
+            rendered += ".dev0"
+    return rendered
+
+
+def render_git_describe(pieces):
+    """TAG[-DISTANCE-gHEX][-dirty].
+
+    Like 'git describe --tags --dirty --always'.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        if pieces["distance"]:
+            rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render_git_describe_long(pieces):
+    """TAG-DISTANCE-gHEX[-dirty].
+
+    Like 'git describe --tags --dirty --always -long'.
+    The distance/hash is unconditional.
+
+    Exceptions:
+    1: no tags. HEX[-dirty]  (note: no 'g' prefix)
+    """
+    if pieces["closest-tag"]:
+        rendered = pieces["closest-tag"]
+        rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
+    else:
+        # exception #1
+        rendered = pieces["short"]
+    if pieces["dirty"]:
+        rendered += "-dirty"
+    return rendered
+
+
+def render(pieces, style):
+    """Render the given version pieces into the requested style."""
+    if pieces["error"]:
+        return {"version": "unknown",
+                "full-revisionid": pieces.get("long"),
+                "dirty": None,
+                "error": pieces["error"],
+                "date": None}
+
+    if not style or style == "default":
+        style = "pep440"  # the default
+
+    if style == "pep440":
+        rendered = render_pep440(pieces)
+    elif style == "pep440-branch":
+        rendered = render_pep440_branch(pieces)
+    elif style == "pep440-pre":
+        rendered = render_pep440_pre(pieces)
+    elif style == "pep440-post":
+        rendered = render_pep440_post(pieces)
+    elif style == "pep440-post-branch":
+        rendered = render_pep440_post_branch(pieces)
+    elif style == "pep440-old":
+        rendered = render_pep440_old(pieces)
+    elif style == "git-describe":
+        rendered = render_git_describe(pieces)
+    elif style == "git-describe-long":
+        rendered = render_git_describe_long(pieces)
+    else:
+        raise ValueError("unknown style '%s'" % style)
+
+    return {"version": rendered, "full-revisionid": pieces["long"],
+            "dirty": pieces["dirty"], "error": None,
+            "date": pieces.get("date")}
+
+
+def get_versions():
+    """Get version information or return default if unable to do so."""
+    # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
+    # __file__, we can work backwards from there to the root. Some
+    # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
+    # case we can only use expanded keywords.
+
+    cfg = get_config()
+    verbose = cfg.verbose
+
+    try:
+        return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
+                                          verbose)
+    except NotThisMethod:
+        pass
+
+    try:
+        root = os.path.realpath(__file__)
+        # versionfile_source is the relative path from the top of the source
+        # tree (where the .git directory might live) to this file. Invert
+        # this to find the root from __file__.
+        for _ in cfg.versionfile_source.split('/'):
+            root = os.path.dirname(root)
+    except NameError:
+        return {"version": "0+unknown", "full-revisionid": None,
+                "dirty": None,
+                "error": "unable to find root of source tree",
+                "date": None}
+
+    try:
+        pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
+        return render(pieces, cfg.style)
+    except NotThisMethod:
+        pass
+
+    try:
+        if cfg.parentdir_prefix:
+            return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
+    except NotThisMethod:
+        pass
+
+    return {"version": "0+unknown", "full-revisionid": None,
+            "dirty": None,
+            "error": "unable to compute version", "date": None}