Commit 1fb0017a authored by dugupeiwen's avatar dugupeiwen
Browse files

init 0.58

parents
"""
Expose top-level symbols that are safe for import *
"""
import platform
import re
import sys
import warnings
# ---------------------- WARNING WARNING WARNING ----------------------------
# THIS MUST RUN FIRST, DO NOT MOVE... SEE DOCSTRING IN _ensure_critical_deps
def _ensure_critical_deps():
"""
Make sure the Python, NumPy and SciPy present are supported versions.
This has to be done _before_ importing anything from Numba such that
incompatible versions can be reported to the user. If this occurs _after_
importing things from Numba and there's an issue in e.g. a Numba c-ext, a
SystemError might have occurred which prevents reporting the likely cause of
the problem (incompatible versions of critical dependencies).
"""
#NOTE THIS CODE SHOULD NOT IMPORT ANYTHING FROM NUMBA!
def extract_version(mod):
return tuple(map(int, mod.__version__.split('.')[:2]))
PYVERSION = sys.version_info[:2]
if PYVERSION < (3, 8):
msg = ("Numba needs Python 3.8 or greater. Got Python "
f"{PYVERSION[0]}.{PYVERSION[1]}.")
raise ImportError(msg)
import numpy as np
numpy_version = extract_version(np)
if numpy_version < (1, 22):
msg = (f"Numba needs NumPy 1.22 or greater. Got NumPy "
f"{numpy_version[0]}.{numpy_version[1]}.")
raise ImportError(msg)
elif numpy_version > (1, 26):
raise ImportError("Numba needs NumPy 1.26 or less")
try:
import scipy
except ImportError:
pass
else:
sp_version = extract_version(scipy)
if sp_version < (1, 0):
msg = ("Numba requires SciPy version 1.0 or greater. Got SciPy "
f"{scipy.__version__}.")
raise ImportError(msg)
_ensure_critical_deps()
# END DO NOT MOVE
# ---------------------- WARNING WARNING WARNING ----------------------------
from ._version import get_versions
from numba.misc.init_utils import generate_version_info
__version__ = get_versions()['version']
version_info = generate_version_info(__version__)
del get_versions
del generate_version_info
from numba.core import config
from numba.core import types, errors
# Re-export typeof
from numba.misc.special import (
typeof, prange, pndindex, gdb, gdb_breakpoint, gdb_init,
literally, literal_unroll,
)
# Re-export error classes
from numba.core.errors import *
# Re-export types itself
import numba.core.types as types
# Re-export all type names
from numba.core.types import *
# Re-export decorators
from numba.core.decorators import (cfunc, generated_jit, jit, njit, stencil,
jit_module)
# Re-export vectorize decorators and the thread layer querying function
from numba.np.ufunc import (vectorize, guvectorize, threading_layer,
get_num_threads, set_num_threads,
set_parallel_chunksize, get_parallel_chunksize,
get_thread_id)
# Re-export Numpy helpers
from numba.np.numpy_support import carray, farray, from_dtype
# Re-export experimental
from numba import experimental
# Initialize withcontexts
import numba.core.withcontexts
from numba.core.withcontexts import objmode_context as objmode
from numba.core.withcontexts import parallel_chunksize
# Initialize target extensions
import numba.core.target_extension
# Initialize typed containers
import numba.typed
# Keep this for backward compatibility.
def test(argv, **kwds):
# To speed up the import time, avoid importing `unittest` and other test
# dependencies unless the user is actually trying to run tests.
from numba.testing import _runtests as runtests
return runtests.main(argv, **kwds)
__all__ = """
cfunc
from_dtype
guvectorize
jit
experimental
njit
stencil
jit_module
typeof
prange
gdb
gdb_breakpoint
gdb_init
vectorize
objmode
literal_unroll
get_num_threads
set_num_threads
set_parallel_chunksize
get_parallel_chunksize
parallel_chunksize
""".split() + types.__all__ + errors.__all__
_min_llvmlite_version = (0, 41, 0)
_min_llvm_version = (14, 0, 0)
def _ensure_llvm():
"""
Make sure llvmlite is operational.
"""
import warnings
import llvmlite
# Only look at the the major, minor and bugfix version numbers.
# Ignore other stuffs
regex = re.compile(r'(\d+)\.(\d+).(\d+)')
m = regex.match(llvmlite.__version__)
if m:
ver = tuple(map(int, m.groups()))
if ver < _min_llvmlite_version:
msg = ("Numba requires at least version %d.%d.%d of llvmlite.\n"
"Installed version is %s.\n"
"Please update llvmlite." %
(_min_llvmlite_version + (llvmlite.__version__,)))
raise ImportError(msg)
else:
# Not matching?
warnings.warn("llvmlite version format not recognized!")
from llvmlite.binding import llvm_version_info, check_jit_execution
if llvm_version_info < _min_llvm_version:
msg = ("Numba requires at least version %d.%d.%d of LLVM.\n"
"Installed llvmlite is built against version %d.%d.%d.\n"
"Please update llvmlite." %
(_min_llvm_version + llvm_version_info))
raise ImportError(msg)
check_jit_execution()
def _try_enable_svml():
"""
Tries to enable SVML if configuration permits use and the library is found.
"""
if not config.DISABLE_INTEL_SVML:
try:
if sys.platform.startswith('linux'):
llvmlite.binding.load_library_permanently("libsvml.so")
elif sys.platform.startswith('darwin'):
llvmlite.binding.load_library_permanently("libsvml.dylib")
elif sys.platform.startswith('win'):
llvmlite.binding.load_library_permanently("svml_dispmd")
else:
return False
# The SVML library is loaded, therefore SVML *could* be supported.
# Now see if LLVM has been compiled with the SVML support patch.
# If llvmlite has the checking function `has_svml` and it returns
# True, then LLVM was compiled with SVML support and the the setup
# for SVML can proceed. We err on the side of caution and if the
# checking function is missing, regardless of that being fine for
# most 0.23.{0,1} llvmlite instances (i.e. conda or pip installed),
# we assume that SVML was not compiled in. llvmlite 0.23.2 is a
# bugfix release with the checking function present that will always
# produce correct behaviour. For context see: #3006.
try:
if not getattr(llvmlite.binding.targets, "has_svml")():
# has detection function, but no svml compiled in, therefore
# disable SVML
return False
except AttributeError:
if platform.machine() == 'x86_64' and config.DEBUG:
msg = ("SVML was found but llvmlite >= 0.23.2 is "
"needed to support it.")
warnings.warn(msg)
# does not have detection function, cannot detect reliably,
# disable SVML.
return False
# All is well, detection function present and reports SVML is
# compiled in, set the vector library to SVML.
llvmlite.binding.set_option('SVML', '-vector-library=SVML')
return True
except:
if platform.machine() == 'x86_64' and config.DEBUG:
warnings.warn("SVML was not found/could not be loaded.")
return False
_ensure_llvm()
# we know llvmlite is working as the above tests passed, import it now as SVML
# needs to mutate runtime options (sets the `-vector-library`).
import llvmlite
"""
Is set to True if Intel SVML is in use.
"""
config.USING_SVML = _try_enable_svml()
# ---------------------- WARNING WARNING WARNING ----------------------------
# The following imports occur below here (SVML init) because somewhere in their
# import sequence they have a `@njit` wrapped function. This triggers too early
# a bind to the underlying LLVM libraries which then irretrievably sets the LLVM
# SVML state to "no SVML". See https://github.com/numba/numba/issues/4689 for
# context.
# ---------------------- WARNING WARNING WARNING ----------------------------
"""Expose Numba command via ``python -m numba``."""
import sys
from numba.misc.numba_entry import main
if __name__ == '__main__':
sys.exit(main())
#ifndef NUMBA_ARYSTRUCT_H_
#define NUMBA_ARYSTRUCT_H_
/*
* Fill in the *arystruct* with information from the Numpy array *obj*.
* *arystruct*'s layout is defined in numba.targets.arrayobj (look
* for the ArrayTemplate class).
*/
typedef struct {
void *meminfo; /* see _nrt_python.c and nrt.h in numba/core/runtime */
PyObject *parent;
npy_intp nitems;
npy_intp itemsize;
void *data;
npy_intp shape_and_strides[];
} arystruct_t;
#endif /* NUMBA_ARYSTRUCT_H_ */
/* This file contains the base class implementation for all device arrays. The
* base class is implemented in C so that computing typecodes for device arrays
* can be implemented efficiently. */
#include "_pymodule.h"
/* Include _devicearray., but make sure we don't get the definitions intended
* for consumers of the Device Array API.
*/
#define NUMBA_IN_DEVICEARRAY_CPP_
#include "_devicearray.h"
/* DeviceArray PyObject implementation. Note that adding more members here is
* presently prohibited because mapped and managed arrays derive from both
* DeviceArray and NumPy's ndarray, which is also a C extension class - the
* layout of the object cannot be resolved if this class also has members beyond
* PyObject_HEAD. */
class DeviceArray {
PyObject_HEAD
};
/* Trivial traversal - DeviceArray instances own nothing. */
static int
DeviceArray_traverse(DeviceArray *self, visitproc visit, void *arg)
{
return 0;
}
/* Trivial clear of all references - DeviceArray instances own nothing. */
static int
DeviceArray_clear(DeviceArray *self)
{
return 0;
}
/* The _devicearray.DeviceArray type */
PyTypeObject DeviceArrayType = {
PyVarObject_HEAD_INIT(NULL, 0)
"_devicearray.DeviceArray", /* tp_name */
sizeof(DeviceArray), /* tp_basicsize */
0, /* tp_itemsize */
0, /* tp_dealloc */
0, /* tp_vectorcall_offset */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_as_async */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call*/
0, /* tp_str*/
0, /* tp_getattro*/
0, /* tp_setattro*/
0, /* tp_as_buffer*/
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
/* tp_flags*/
"DeviceArray object", /* tp_doc */
(traverseproc) DeviceArray_traverse, /* tp_traverse */
(inquiry) DeviceArray_clear, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
0, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
0, /* tp_new */
0, /* tp_free */
0, /* tp_is_gc */
0, /* tp_bases */
0, /* tp_mro */
0, /* tp_cache */
0, /* tp_subclasses */
0, /* tp_weaklist */
0, /* tp_del */
0, /* tp_version_tag */
0, /* tp_finalize */
/* The docs suggest Python 3.8 has no tp_vectorcall
* https://github.com/python/cpython/blob/d917cfe4051d45b2b755c726c096ecfcc4869ceb/Doc/c-api/typeobj.rst?plain=1#L146
* but the header has it:
* https://github.com/python/cpython/blob/d917cfe4051d45b2b755c726c096ecfcc4869ceb/Include/cpython/object.h#L257
*/
0, /* tp_vectorcall */
#if (PY_MAJOR_VERSION == 3) && (PY_MINOR_VERSION == 8)
/* This is Python 3.8 only.
* See: https://github.com/python/cpython/blob/3.8/Include/cpython/object.h
* there's a tp_print preserved for backwards compatibility. xref:
* https://github.com/python/cpython/blob/d917cfe4051d45b2b755c726c096ecfcc4869ceb/Include/cpython/object.h#L260
*/
0, /* tp_print */
#endif
/* WARNING: Do not remove this, only modify it! It is a version guard to
* act as a reminder to update this struct on Python version update! */
#if (PY_MAJOR_VERSION == 3)
#if ! ((PY_MINOR_VERSION == 8) || (PY_MINOR_VERSION == 9) || (PY_MINOR_VERSION == 10) || (PY_MINOR_VERSION == 11))
#error "Python minor version is not supported."
#endif
#else
#error "Python major version is not supported."
#endif
/* END WARNING*/
};
/* CUDA device array C API */
static void *_DeviceArray_API[1] = {
(void*)&DeviceArrayType
};
MOD_INIT(_devicearray) {
PyObject *m = nullptr;
PyObject *d = nullptr;
PyObject *c_api = nullptr;
int error = 0;
MOD_DEF(m, "_devicearray", "No docs", NULL)
if (m == NULL)
goto error_occurred;
c_api = PyCapsule_New((void *)_DeviceArray_API, "numba._devicearray._DEVICEARRAY_API", NULL);
if (c_api == NULL)
goto error_occurred;
DeviceArrayType.tp_new = PyType_GenericNew;
if (PyType_Ready(&DeviceArrayType) < 0)
goto error_occurred;
Py_INCREF(&DeviceArrayType);
error = PyModule_AddObject(m, "DeviceArray", (PyObject*)(&DeviceArrayType));
if (error)
goto error_occurred;
d = PyModule_GetDict(m);
if (d == NULL)
goto error_occurred;
error = PyDict_SetItemString(d, "_DEVICEARRAY_API", c_api);
/* Decref and set c_api to NULL, Py_XDECREF in error_occurred will have no
* effect. */
Py_CLEAR(c_api);
if (error)
goto error_occurred;
return MOD_SUCCESS_VAL(m);
error_occurred:
Py_XDECREF(m);
Py_XDECREF(c_api);
Py_XDECREF((PyObject*)&DeviceArrayType);
return MOD_ERROR_VAL;
}
#ifndef NUMBA_DEVICEARRAY_H_
#define NUMBA_DEVICEARRAY_H_
#ifdef __cplusplus
extern "C" {
#endif
/* These definitions should only be used by consumers of the Device Array API.
* Consumers access the API through the opaque pointer stored in
* _devicearray._DEVICEARRAY_API. We don't want these definitions in
* _devicearray.cpp itself because they would conflict with the actual
* implementations there.
*/
#ifndef NUMBA_IN_DEVICEARRAY_CPP_
extern void **DeviceArray_API;
#define DeviceArrayType (*(PyTypeObject*)DeviceArray_API[0])
#endif /* ndef NUMBA_IN_DEVICEARRAY_CPP */
#ifdef __cplusplus
}
#endif
#endif /* NUMBA_DEVICEARRAY_H_ */
#include "_pymodule.h"
#include <cstring>
#include <ctime>
#include <cassert>
#include <vector>
#include "_typeof.h"
#include "frameobject.h"
#include "traceback.h"
#include "core/typeconv/typeconv.hpp"
#include "_devicearray.h"
/*
* Notes on the C_TRACE macro:
*
* The original C_TRACE macro (from ceval.c) would call
* PyTrace_C_CALL et al., for which the frame argument wouldn't
* be usable. Since we explicitly synthesize a frame using the
* original Python code object, we call PyTrace_CALL instead so
* the profiler can report the correct source location.
*
* Likewise, while ceval.c would call PyTrace_C_EXCEPTION in case
* of error, the profiler would simply expect a RETURN in case of
* a Python function, so we generate that here (making sure the
* exception state is preserved correctly).
*
*/
#if (PY_MAJOR_VERSION >= 3) && (PY_MINOR_VERSION >= 11)
#ifndef Py_BUILD_CORE
#define Py_BUILD_CORE 1
#endif
#include "internal/pycore_frame.h"
#include "internal/pycore_pyerrors.h"
/*
* Code originally from:
* https://github.com/python/cpython/blob/deaf509e8fc6e0363bd6f26d52ad42f976ec42f2/Python/ceval.c#L6804
*/
static int
call_trace(Py_tracefunc func, PyObject *obj,
PyThreadState *tstate, PyFrameObject *frame,
int what, PyObject *arg)
{
int result;
if (tstate->tracing) {
return 0;
}
if (frame == NULL) {
return -1;
}
int old_what = tstate->tracing_what;
tstate->tracing_what = what;
PyThreadState_EnterTracing(tstate);
result = func(obj, frame, what, NULL);
PyThreadState_LeaveTracing(tstate);
tstate->tracing_what = old_what;
return result;
}
/*
* Code originally from:
* https://github.com/python/cpython/blob/d5650a1738fe34f6e1db4af5f4c4edb7cae90a36/Python/ceval.c#L4220-L4240
*/
static int
call_trace_protected(Py_tracefunc func, PyObject *obj,
PyThreadState *tstate, PyFrameObject *frame,
int what, PyObject *arg)
{
PyObject *type, *value, *traceback;
int err;
_PyErr_Fetch(tstate, &type, &value, &traceback);
err = call_trace(func, obj, tstate, frame, what, arg);
if (err == 0)
{
_PyErr_Restore(tstate, type, value, traceback);
return 0;
}
else {
Py_XDECREF(type);
Py_XDECREF(value);
Py_XDECREF(traceback);
return -1;
}
}
/*
* Code originally from:
* https://github.com/python/cpython/blob/deaf509e8fc6e0363bd6f26d52ad42f976ec42f2/Python/ceval.c#L7245
* NOTE: The state test https://github.com/python/cpython/blob/d5650a1738fe34f6e1db4af5f4c4edb7cae90a36/Python/ceval.c#L4521
* has been removed, it's dealt with in call_cfunc.
*/
#define C_TRACE(x, call, frame) \
if (call_trace(tstate->c_profilefunc, tstate->c_profileobj, \
tstate, frame, \
PyTrace_CALL, cfunc)) { \
x = NULL; \
} \
else { \
x = call; \
if (tstate->c_profilefunc != NULL) { \
if (x == NULL) { \
call_trace_protected(tstate->c_profilefunc, \
tstate->c_profileobj, \
tstate, frame, \
PyTrace_RETURN, cfunc); \
/* XXX should pass (type, value, tb) */ \
} else { \
if (call_trace(tstate->c_profilefunc, \
tstate->c_profileobj, \
tstate, frame, \
PyTrace_RETURN, cfunc)) { \
Py_DECREF(x); \
x = NULL; \
} \
} \
} \
} \
#elif (PY_MAJOR_VERSION >= 3) && (PY_MINOR_VERSION >= 10)
/*
* Code originally from:
* https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Python/ceval.c#L36-L40
*/
typedef struct {
PyCodeObject *code; // The code object for the bounds. May be NULL.
PyCodeAddressRange bounds; // Only valid if code != NULL.
CFrame cframe;
} PyTraceInfo;
/*
* Code originally from:
* https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Objects/codeobject.c#L1257-L1266
* NOTE: The function is renamed.
*/
static void
_nb_PyLineTable_InitAddressRange(const char *linetable, Py_ssize_t length, int firstlineno, PyCodeAddressRange *range)
{
range->opaque.lo_next = linetable;
range->opaque.limit = range->opaque.lo_next + length;
range->ar_start = -1;
range->ar_end = 0;
range->opaque.computed_line = firstlineno;
range->ar_line = -1;
}
/*
* Code originally from:
* https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Objects/codeobject.c#L1269-L1275
* NOTE: The function is renamed.
*/
static int
_nb_PyCode_InitAddressRange(PyCodeObject* co, PyCodeAddressRange *bounds)
{
const char *linetable = PyBytes_AS_STRING(co->co_linetable);
Py_ssize_t length = PyBytes_GET_SIZE(co->co_linetable);
_nb_PyLineTable_InitAddressRange(linetable, length, co->co_firstlineno, bounds);
return bounds->ar_line;
}
/*
* Code originally from:
* https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Python/ceval.c#L5468-L5475
* NOTE: The call to _PyCode_InitAddressRange is renamed.
*/
static void
initialize_trace_info(PyTraceInfo *trace_info, PyFrameObject *frame)
{
if (trace_info->code != frame->f_code) {
trace_info->code = frame->f_code;
_nb_PyCode_InitAddressRange(frame->f_code, &trace_info->bounds);
}
}
/*
* Code originally from:
* https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Python/ceval.c#L5477-L5501
*/
static int
call_trace(Py_tracefunc func, PyObject *obj,
PyThreadState *tstate, PyFrameObject *frame,
PyTraceInfo *trace_info,
int what, PyObject *arg)
{
int result;
if (tstate->tracing)
return 0;
tstate->tracing++;
tstate->cframe->use_tracing = 0;
if (frame->f_lasti < 0) {
frame->f_lineno = frame->f_code->co_firstlineno;
}
else {
initialize_trace_info(trace_info, frame);
frame->f_lineno = _PyCode_CheckLineNumber(frame->f_lasti*sizeof(_Py_CODEUNIT), &trace_info->bounds);
}
result = func(obj, frame, what, arg);
frame->f_lineno = 0;
tstate->cframe->use_tracing = ((tstate->c_tracefunc != NULL)
|| (tstate->c_profilefunc != NULL));
tstate->tracing--;
return result;
}
/*
* Code originally from:
* https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Python/ceval.c#L5445-L5466
*/
static int
call_trace_protected(Py_tracefunc func, PyObject *obj,
PyThreadState *tstate, PyFrameObject *frame,
PyTraceInfo *trace_info,
int what, PyObject *arg)
{
PyObject *type, *value, *traceback;
int err;
PyErr_Fetch(&type, &value, &traceback);
err = call_trace(func, obj, tstate, frame, trace_info, what, arg);
if (err == 0)
{
PyErr_Restore(type, value, traceback);
return 0;
}
else
{
Py_XDECREF(type);
Py_XDECREF(value);
Py_XDECREF(traceback);
return -1;
}
}
/*
* Code originally from:
* https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Python/ceval.c#L5810-L5839
* NOTE: The state test https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Python/ceval.c#L5811
* has been removed, it's dealt with in call_cfunc.
*/
#define C_TRACE(x, call) \
if (call_trace(tstate->c_profilefunc, tstate->c_profileobj, \
tstate, tstate->frame, &trace_info, PyTrace_CALL,\
cfunc)) \
x = NULL; \
else \
{ \
x = call; \
if (tstate->c_profilefunc != NULL) \
{ \
if (x == NULL) \
{ \
call_trace_protected(tstate->c_profilefunc, \
tstate->c_profileobj, \
tstate, tstate->frame, \
&trace_info, \
PyTrace_RETURN, cfunc); \
/* XXX should pass (type, value, tb) */ \
} \
else \
{ \
if (call_trace(tstate->c_profilefunc, \
tstate->c_profileobj, \
tstate, tstate->frame, \
&trace_info, \
PyTrace_RETURN, cfunc)) \
{ \
Py_DECREF(x); \
x = NULL; \
} \
} \
} \
}
#else // Python <3.10
/*
* Code originally from:
* https://github.com/python/cpython/blob/d5650a1738fe34f6e1db4af5f4c4edb7cae90a36/Python/ceval.c#L4242-L4257
*/
static int
call_trace(Py_tracefunc func, PyObject *obj,
PyThreadState *tstate, PyFrameObject *frame,
int what, PyObject *arg)
{
int result;
if (tstate->tracing)
return 0;
tstate->tracing++;
tstate->use_tracing = 0;
result = func(obj, frame, what, arg);
tstate->use_tracing = ((tstate->c_tracefunc != NULL)
|| (tstate->c_profilefunc != NULL));
tstate->tracing--;
return result;
}
/*
* Code originally from:
* https://github.com/python/cpython/blob/d5650a1738fe34f6e1db4af5f4c4edb7cae90a36/Python/ceval.c#L4220-L4240
*/
static int
call_trace_protected(Py_tracefunc func, PyObject *obj,
PyThreadState *tstate, PyFrameObject *frame,
int what, PyObject *arg)
{
PyObject *type, *value, *traceback;
int err;
PyErr_Fetch(&type, &value, &traceback);
err = call_trace(func, obj, tstate, frame, what, arg);
if (err == 0)
{
PyErr_Restore(type, value, traceback);
return 0;
}
else
{
Py_XDECREF(type);
Py_XDECREF(value);
Py_XDECREF(traceback);
return -1;
}
}
/*
* Code originally from:
* https://github.com/python/cpython/blob/d5650a1738fe34f6e1db4af5f4c4edb7cae90a36/Python/ceval.c#L4520-L4549
* NOTE: The state test https://github.com/python/cpython/blob/d5650a1738fe34f6e1db4af5f4c4edb7cae90a36/Python/ceval.c#L4521
* has been removed, it's dealt with in call_cfunc.
*/
#define C_TRACE(x, call) \
if (call_trace(tstate->c_profilefunc, tstate->c_profileobj, \
tstate, tstate->frame, PyTrace_CALL, cfunc)) \
x = NULL; \
else \
{ \
x = call; \
if (tstate->c_profilefunc != NULL) \
{ \
if (x == NULL) \
{ \
call_trace_protected(tstate->c_profilefunc, \
tstate->c_profileobj, \
tstate, tstate->frame, \
PyTrace_RETURN, cfunc); \
/* XXX should pass (type, value, tb) */ \
} \
else \
{ \
if (call_trace(tstate->c_profilefunc, \
tstate->c_profileobj, \
tstate, tstate->frame, \
PyTrace_RETURN, cfunc)) \
{ \
Py_DECREF(x); \
x = NULL; \
} \
} \
} \
}
#endif
typedef std::vector<Type> TypeTable;
typedef std::vector<PyObject*> Functions;
/* The Dispatcher class is the base class of all dispatchers in the CPU and
CUDA targets. Its main responsibilities are:
- Resolving the best overload to call for a given set of arguments, and
- Calling the resolved overload.
This logic is implemented within this class for efficiency (lookup of the
appropriate overload needs to be fast) and ease of implementation (calling
directly into a compiled function using a function pointer is easier within
the C++ code where the overload has been resolved). */
class Dispatcher {
public:
PyObject_HEAD
/* Whether compilation of new overloads is permitted */
char can_compile;
/* Whether fallback to object mode is permitted */
char can_fallback;
/* Whether types must match exactly when resolving overloads.
If not, conversions (e.g. float32 -> float64) are permitted when
searching for a match. */
char exact_match_required;
/* Borrowed reference */
PyObject *fallbackdef;
/* Whether to fold named arguments and default values
(false for lifted loops) */
int fold_args;
/* Whether the last positional argument is a stararg */
int has_stararg;
/* Tuple of argument names */
PyObject *argnames;
/* Tuple of default values */
PyObject *defargs;
/* Number of arguments to function */
int argct;
/* Used for selecting overloaded function implementations */
TypeManager *tm;
/* An array of overloads */
Functions functions;
/* A flattened array of argument types to all overloads
* (invariant: sizeof(overloads) == argct * sizeof(functions)) */
TypeTable overloads;
/* Add a new overload. Parameters:
- args: An array of Type objects, one for each parameter
- callable: The callable implementing this overload. */
void addDefinition(Type args[], PyObject *callable) {
overloads.reserve(argct + overloads.size());
for (int i=0; i<argct; ++i) {
overloads.push_back(args[i]);
}
functions.push_back(callable);
}
/* Given a list of types, find the overloads that have a matching signature.
Returns the best match, as well as the number of matches found.
Parameters:
- sig: an array of Type objects, one for each parameter.
- matches: the number of matches found (mutated by this function).
- allow_unsafe: whether to match overloads that would require an unsafe
cast.
- exact_match_required: Whether all arguments types must match the
overload's types exactly. When false,
overloads that would require a type conversion
can also be matched. */
PyObject* resolve(Type sig[], int &matches, bool allow_unsafe,
bool exact_match_required) const {
const int ovct = functions.size();
int selected;
matches = 0;
if (0 == ovct) {
// No overloads registered
return NULL;
}
if (argct == 0) {
// Nullary function: trivial match on first overload
matches = 1;
selected = 0;
}
else {
matches = tm->selectOverload(sig, &overloads[0], selected, argct,
ovct, allow_unsafe,
exact_match_required);
}
if (matches == 1) {
return functions[selected];
}
return NULL;
}
/* Remove all overloads */
void clear() {
functions.clear();
overloads.clear();
}
};
static int
Dispatcher_traverse(Dispatcher *self, visitproc visit, void *arg)
{
Py_VISIT(self->defargs);
return 0;
}
static void
Dispatcher_dealloc(Dispatcher *self)
{
Py_XDECREF(self->argnames);
Py_XDECREF(self->defargs);
self->clear();
Py_TYPE(self)->tp_free((PyObject*)self);
}
static int
Dispatcher_init(Dispatcher *self, PyObject *args, PyObject *kwds)
{
PyObject *tmaddrobj;
void *tmaddr;
int argct;
int can_fallback;
int has_stararg = 0;
int exact_match_required = 0;
if (!PyArg_ParseTuple(args, "OiiO!O!i|ii", &tmaddrobj, &argct,
&self->fold_args,
&PyTuple_Type, &self->argnames,
&PyTuple_Type, &self->defargs,
&can_fallback,
&has_stararg,
&exact_match_required
)) {
return -1;
}
Py_INCREF(self->argnames);
Py_INCREF(self->defargs);
tmaddr = PyLong_AsVoidPtr(tmaddrobj);
self->tm = static_cast<TypeManager*>(tmaddr);
self->argct = argct;
self->can_compile = 1;
self->can_fallback = can_fallback;
self->fallbackdef = NULL;
self->has_stararg = has_stararg;
self->exact_match_required = exact_match_required;
return 0;
}
static PyObject *
Dispatcher_clear(Dispatcher *self, PyObject *args)
{
self->clear();
Py_RETURN_NONE;
}
static
PyObject*
Dispatcher_Insert(Dispatcher *self, PyObject *args, PyObject *kwds)
{
/* The cuda kwarg is a temporary addition until CUDA overloads are compiled
* functions. Once they are compiled functions, kwargs can be removed from
* this function. */
static char *keywords[] = {
(char*)"sig",
(char*)"func",
(char*)"objectmode",
(char*)"cuda",
NULL
};
PyObject *sigtup, *cfunc;
int i, sigsz;
int *sig;
int objectmode = 0;
int cuda = 0;
if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO|ip", keywords, &sigtup,
&cfunc, &objectmode, &cuda)) {
return NULL;
}
if (!cuda && !PyObject_TypeCheck(cfunc, &PyCFunction_Type) ) {
PyErr_SetString(PyExc_TypeError, "must be builtin_function_or_method");
return NULL;
}
sigsz = PySequence_Fast_GET_SIZE(sigtup);
sig = new int[sigsz];
for (i = 0; i < sigsz; ++i) {
sig[i] = PyLong_AsLong(PySequence_Fast_GET_ITEM(sigtup, i));
}
/* The reference to cfunc is borrowed; this only works because the
derived Python class also stores an (owned) reference to cfunc. */
self->addDefinition(sig, cfunc);
/* Add pure python fallback */
if (!self->fallbackdef && objectmode){
self->fallbackdef = cfunc;
}
delete[] sig;
Py_RETURN_NONE;
}
static
void explain_issue(PyObject *dispatcher, PyObject *args, PyObject *kws,
const char *method_name, const char *default_msg)
{
PyObject *callback, *result;
callback = PyObject_GetAttrString(dispatcher, method_name);
if (!callback) {
PyErr_SetString(PyExc_TypeError, default_msg);
return;
}
result = PyObject_Call(callback, args, kws);
Py_DECREF(callback);
if (result != NULL) {
PyErr_Format(PyExc_RuntimeError, "%s must raise an exception",
method_name);
Py_DECREF(result);
}
}
static
void explain_ambiguous(PyObject *dispatcher, PyObject *args, PyObject *kws)
{
explain_issue(dispatcher, args, kws, "_explain_ambiguous",
"Ambiguous overloading");
}
static
void explain_matching_error(PyObject *dispatcher, PyObject *args, PyObject *kws)
{
explain_issue(dispatcher, args, kws, "_explain_matching_error",
"No matching definition");
}
static
int search_new_conversions(PyObject *dispatcher, PyObject *args, PyObject *kws)
{
PyObject *callback, *result;
int res;
callback = PyObject_GetAttrString(dispatcher,
"_search_new_conversions");
if (!callback) {
return -1;
}
result = PyObject_Call(callback, args, kws);
Py_DECREF(callback);
if (result == NULL) {
return -1;
}
if (!PyBool_Check(result)) {
Py_DECREF(result);
PyErr_SetString(PyExc_TypeError,
"_search_new_conversions() should return a boolean");
return -1;
}
res = (result == Py_True) ? 1 : 0;
Py_DECREF(result);
return res;
}
/* A custom, fast, inlinable version of PyCFunction_Call() */
static PyObject *
call_cfunc(Dispatcher *self, PyObject *cfunc, PyObject *args, PyObject *kws, PyObject *locals)
{
PyCFunctionWithKeywords fn;
PyThreadState *tstate;
assert(PyCFunction_Check(cfunc));
assert(PyCFunction_GET_FLAGS(cfunc) == (METH_VARARGS | METH_KEYWORDS));
fn = (PyCFunctionWithKeywords) PyCFunction_GET_FUNCTION(cfunc);
tstate = PyThreadState_GET();
#if (PY_MAJOR_VERSION >= 3) && (PY_MINOR_VERSION >= 11)
/*
* On Python 3.11, _PyEval_EvalFrameDefault stops using PyTraceInfo since
* it's now baked into ThreadState.
* https://github.com/python/cpython/pull/26623
*/
if (tstate->cframe->use_tracing && tstate->c_profilefunc)
#elif (PY_MAJOR_VERSION >= 3) && (PY_MINOR_VERSION == 10)
/*
* On Python 3.10+ trace_info comes from somewhere up in PyFrameEval et al,
* Numba doesn't have access to that so creates an equivalent struct and
* wires it up against the cframes. This is passed into the tracing
* functions.
*
* Code originally from:
* https://github.com/python/cpython/blob/c5bfb88eb6f82111bb1603ae9d78d0476b552d66/Python/ceval.c#L1611-L1622
*/
PyTraceInfo trace_info;
trace_info.code = NULL; // not initialized
CFrame *prev_cframe = tstate->cframe;
trace_info.cframe.use_tracing = prev_cframe->use_tracing;
trace_info.cframe.previous = prev_cframe;
if (trace_info.cframe.use_tracing && tstate->c_profilefunc)
#else
/*
* On Python prior to 3.10, tracing state is a member of the threadstate
*/
if (tstate->use_tracing && tstate->c_profilefunc)
#endif
{
/*
* The following code requires some explaining:
*
* We want the jit-compiled function to be visible to the profiler, so we
* need to synthesize a frame for it.
* The PyFrame_New() constructor doesn't do anything with the 'locals' value if the 'code's
* 'CO_NEWLOCALS' flag is set (which is always the case nowadays).
* So, to get local variables into the frame, we have to manually set the 'f_locals'
* member, then call `PyFrame_LocalsToFast`, where a subsequent call to the `frame.f_locals`
* property (by virtue of the `frame_getlocals` function in frameobject.c) will find them.
*/
PyCodeObject *code = (PyCodeObject*)PyObject_GetAttrString((PyObject*)self, "__code__");
PyObject *globals = PyDict_New();
PyObject *builtins = PyEval_GetBuiltins();
PyFrameObject *frame = NULL;
PyObject *result = NULL;
if (!code) {
PyErr_Format(PyExc_RuntimeError, "No __code__ attribute found.");
goto error;
}
/* Populate builtins, which is required by some JITted functions */
if (PyDict_SetItemString(globals, "__builtins__", builtins)) {
goto error;
}
/* unset the CO_OPTIMIZED flag, make the frame get a new locals dict */
code->co_flags &= 0xFFFE;
frame = PyFrame_New(tstate, code, globals, locals);
if (frame == NULL) {
goto error;
}
/* Populate the 'fast locals' in `frame` */
PyFrame_LocalsToFast(frame, 0);
#if (PY_MAJOR_VERSION >= 3) && (PY_MINOR_VERSION >= 11)
C_TRACE(result, fn(PyCFunction_GET_SELF(cfunc), args, kws), frame);
#else
tstate->frame = frame;
C_TRACE(result, fn(PyCFunction_GET_SELF(cfunc), args, kws));
/* write changes back to locals? */
PyFrame_FastToLocals(frame);
tstate->frame = frame->f_back;
#endif
error:
Py_XDECREF(frame);
Py_XDECREF(globals);
Py_XDECREF(code);
return result;
}
else
{
return fn(PyCFunction_GET_SELF(cfunc), args, kws);
}
}
static
PyObject*
compile_and_invoke(Dispatcher *self, PyObject *args, PyObject *kws, PyObject *locals)
{
/* Compile a new one */
PyObject *cfa, *cfunc, *retval;
cfa = PyObject_GetAttrString((PyObject*)self, "_compile_for_args");
if (cfa == NULL)
return NULL;
/* NOTE: we call the compiled function ourselves instead of
letting the Python derived class do it. This is for proper
behaviour of globals() in jitted functions (issue #476). */
cfunc = PyObject_Call(cfa, args, kws);
Py_DECREF(cfa);
if (cfunc == NULL)
return NULL;
if (PyObject_TypeCheck(cfunc, &PyCFunction_Type)) {
retval = call_cfunc(self, cfunc, args, kws, locals);
} else {
/* Re-enter interpreter */
retval = PyObject_Call(cfunc, args, kws);
}
Py_DECREF(cfunc);
return retval;
}
/* A copy of compile_and_invoke, that only compiles. This is needed for CUDA
* kernels, because its overloads are Python instances of the _Kernel class,
* rather than compiled functions. Once CUDA overloads are compiled functions,
* cuda_compile_only can be removed. */
static
PyObject*
cuda_compile_only(Dispatcher *self, PyObject *args, PyObject *kws, PyObject *locals)
{
/* Compile a new one */
PyObject *cfa, *cfunc;
cfa = PyObject_GetAttrString((PyObject*)self, "_compile_for_args");
if (cfa == NULL)
return NULL;
cfunc = PyObject_Call(cfa, args, kws);
Py_DECREF(cfa);
return cfunc;
}
static int
find_named_args(Dispatcher *self, PyObject **pargs, PyObject **pkws)
{
PyObject *oldargs = *pargs, *newargs;
PyObject *kws = *pkws;
Py_ssize_t pos_args = PyTuple_GET_SIZE(oldargs);
Py_ssize_t named_args, total_args, i;
Py_ssize_t func_args = PyTuple_GET_SIZE(self->argnames);
Py_ssize_t defaults = PyTuple_GET_SIZE(self->defargs);
/* Last parameter with a default value */
Py_ssize_t last_def = (self->has_stararg)
? func_args - 2
: func_args - 1;
/* First parameter with a default value */
Py_ssize_t first_def = last_def - defaults + 1;
/* Minimum number of required arguments */
Py_ssize_t minargs = first_def;
if (kws != NULL)
named_args = PyDict_Size(kws);
else
named_args = 0;
total_args = pos_args + named_args;
if (!self->has_stararg && total_args > func_args) {
PyErr_Format(PyExc_TypeError,
"too many arguments: expected %d, got %d",
(int) func_args, (int) total_args);
return -1;
}
else if (total_args < minargs) {
if (minargs == func_args)
PyErr_Format(PyExc_TypeError,
"not enough arguments: expected %d, got %d",
(int) minargs, (int) total_args);
else
PyErr_Format(PyExc_TypeError,
"not enough arguments: expected at least %d, got %d",
(int) minargs, (int) total_args);
return -1;
}
newargs = PyTuple_New(func_args);
if (!newargs)
return -1;
/* First pack the stararg */
if (self->has_stararg) {
Py_ssize_t stararg_size = Py_MAX(0, pos_args - func_args + 1);
PyObject *stararg = PyTuple_New(stararg_size);
if (!stararg) {
Py_DECREF(newargs);
return -1;
}
for (i = 0; i < stararg_size; i++) {
PyObject *value = PyTuple_GET_ITEM(oldargs, func_args - 1 + i);
Py_INCREF(value);
PyTuple_SET_ITEM(stararg, i, value);
}
/* Put it in last position */
PyTuple_SET_ITEM(newargs, func_args - 1, stararg);
}
for (i = 0; i < pos_args; i++) {
PyObject *value = PyTuple_GET_ITEM(oldargs, i);
if (self->has_stararg && i >= func_args - 1) {
/* Skip stararg */
break;
}
Py_INCREF(value);
PyTuple_SET_ITEM(newargs, i, value);
}
/* Iterate over missing positional arguments, try to find them in
named arguments or default values. */
for (i = pos_args; i < func_args; i++) {
PyObject *name = PyTuple_GET_ITEM(self->argnames, i);
if (self->has_stararg && i >= func_args - 1) {
/* Skip stararg */
break;
}
if (kws != NULL) {
/* Named argument? */
PyObject *value = PyDict_GetItem(kws, name);
if (value != NULL) {
Py_INCREF(value);
PyTuple_SET_ITEM(newargs, i, value);
named_args--;
continue;
}
}
if (i >= first_def && i <= last_def) {
/* Argument has a default value? */
PyObject *value = PyTuple_GET_ITEM(self->defargs, i - first_def);
Py_INCREF(value);
PyTuple_SET_ITEM(newargs, i, value);
continue;
}
else if (i < func_args - 1 || !self->has_stararg) {
PyErr_Format(PyExc_TypeError,
"missing argument '%s'",
PyString_AsString(name));
Py_DECREF(newargs);
return -1;
}
}
if (named_args) {
PyErr_Format(PyExc_TypeError,
"some keyword arguments unexpected");
Py_DECREF(newargs);
return -1;
}
*pargs = newargs;
*pkws = NULL;
return 0;
}
/*
* Management of thread-local
*/
#ifdef _MSC_VER
#define THREAD_LOCAL(ty) __declspec(thread) ty
#else
/* Non-standard C99 extension that's understood by gcc and clang */
#define THREAD_LOCAL(ty) __thread ty
#endif
static THREAD_LOCAL(bool) use_tls_target_stack;
struct raii_use_tls_target_stack {
bool old_setting;
raii_use_tls_target_stack(bool new_setting)
: old_setting(use_tls_target_stack)
{
use_tls_target_stack = new_setting;
}
~raii_use_tls_target_stack() {
use_tls_target_stack = old_setting;
}
};
static PyObject*
Dispatcher_call(Dispatcher *self, PyObject *args, PyObject *kws)
{
PyObject *tmptype, *retval = NULL;
int *tys = NULL;
int argct;
int i;
int prealloc[24];
int matches;
PyObject *cfunc;
PyThreadState *ts = PyThreadState_Get();
PyObject *locals = NULL;
// Check TLS target stack
if (use_tls_target_stack) {
raii_use_tls_target_stack turn_off(false);
PyObject * meth_call_tls_target;
meth_call_tls_target = PyObject_GetAttrString((PyObject*)self,
"_call_tls_target");
if (!meth_call_tls_target) return NULL;
// Transfer control to self._call_tls_target
retval = PyObject_Call(meth_call_tls_target, args, kws);
Py_DECREF(meth_call_tls_target);
return retval;
}
/* If compilation is enabled, ensure that an exact match is found and if
* not compile one */
int exact_match_required = self->can_compile ? 1 : self->exact_match_required;
#if (PY_MAJOR_VERSION >= 3) && (PY_MINOR_VERSION >= 10)
if (ts->tracing && ts->c_profilefunc) {
#else
if (ts->use_tracing && ts->c_profilefunc) {
#endif
locals = PyEval_GetLocals();
if (locals == NULL) {
goto CLEANUP;
}
}
if (self->fold_args) {
if (find_named_args(self, &args, &kws))
return NULL;
}
else
Py_INCREF(args);
/* Now we own a reference to args */
argct = PySequence_Fast_GET_SIZE(args);
if (argct < (Py_ssize_t) (sizeof(prealloc) / sizeof(int)))
tys = prealloc;
else
tys = new int[argct];
for (i = 0; i < argct; ++i) {
tmptype = PySequence_Fast_GET_ITEM(args, i);
tys[i] = typeof_typecode((PyObject *) self, tmptype);
if (tys[i] == -1) {
if (self->can_fallback){
/* We will clear the exception if fallback is allowed. */
PyErr_Clear();
} else {
goto CLEANUP;
}
}
}
/* We only allow unsafe conversions if compilation of new specializations
has been disabled.
Note that the number of matches is returned in matches by resolve, which
accepts it as a reference. */
cfunc = self->resolve(tys, matches, !self->can_compile,
exact_match_required);
if (matches == 0 && !self->can_compile) {
/*
* If we can't compile a new specialization, look for
* matching signatures for which conversions haven't been
* registered on the C++ TypeManager.
*/
int res = search_new_conversions((PyObject *) self, args, kws);
if (res < 0) {
retval = NULL;
goto CLEANUP;
}
if (res > 0) {
/* Retry with the newly registered conversions */
cfunc = self->resolve(tys, matches, !self->can_compile,
exact_match_required);
}
}
if (matches == 1) {
/* Definition is found */
retval = call_cfunc(self, cfunc, args, kws, locals);
} else if (matches == 0) {
/* No matching definition */
if (self->can_compile) {
retval = compile_and_invoke(self, args, kws, locals);
} else if (self->fallbackdef) {
/* Have object fallback */
retval = call_cfunc(self, self->fallbackdef, args, kws, locals);
} else {
/* Raise TypeError */
explain_matching_error((PyObject *) self, args, kws);
retval = NULL;
}
} else if (self->can_compile) {
/* Ambiguous, but are allowed to compile */
retval = compile_and_invoke(self, args, kws, locals);
} else {
/* Ambiguous */
explain_ambiguous((PyObject *) self, args, kws);
retval = NULL;
}
CLEANUP:
if (tys != prealloc)
delete[] tys;
Py_DECREF(args);
return retval;
}
/* Based on Dispatcher_call above, with the following differences:
1. It does not invoke the definition of the function.
2. It returns the definition, instead of a value returned by the function.
This is because CUDA functions are, at present, _Kernel objects rather than
compiled functions. */
static PyObject*
Dispatcher_cuda_call(Dispatcher *self, PyObject *args, PyObject *kws)
{
PyObject *tmptype, *retval = NULL;
int *tys = NULL;
int argct;
int i;
int prealloc[24];
int matches;
PyObject *cfunc;
PyThreadState *ts = PyThreadState_Get();
PyObject *locals = NULL;
/* If compilation is enabled, ensure that an exact match is found and if
* not compile one */
int exact_match_required = self->can_compile ? 1 : self->exact_match_required;
#if (PY_MAJOR_VERSION >= 3) && (PY_MINOR_VERSION >= 10)
if (ts->tracing && ts->c_profilefunc) {
#else
if (ts->use_tracing && ts->c_profilefunc) {
#endif
locals = PyEval_GetLocals();
if (locals == NULL) {
goto CLEANUP;
}
}
if (self->fold_args) {
if (find_named_args(self, &args, &kws))
return NULL;
}
else
Py_INCREF(args);
/* Now we own a reference to args */
argct = PySequence_Fast_GET_SIZE(args);
if (argct < (Py_ssize_t) (sizeof(prealloc) / sizeof(int)))
tys = prealloc;
else
tys = new int[argct];
for (i = 0; i < argct; ++i) {
tmptype = PySequence_Fast_GET_ITEM(args, i);
tys[i] = typeof_typecode((PyObject *) self, tmptype);
if (tys[i] == -1) {
if (self->can_fallback){
/* We will clear the exception if fallback is allowed. */
PyErr_Clear();
} else {
goto CLEANUP;
}
}
}
/* We only allow unsafe conversions if compilation of new specializations
has been disabled. */
cfunc = self->resolve(tys, matches, !self->can_compile,
exact_match_required);
if (matches == 0 && !self->can_compile) {
/*
* If we can't compile a new specialization, look for
* matching signatures for which conversions haven't been
* registered on the C++ TypeManager.
*/
int res = search_new_conversions((PyObject *) self, args, kws);
if (res < 0) {
retval = NULL;
goto CLEANUP;
}
if (res > 0) {
/* Retry with the newly registered conversions */
cfunc = self->resolve(tys, matches, !self->can_compile,
exact_match_required);
}
}
if (matches == 1) {
/* Definition is found */
retval = cfunc;
Py_INCREF(retval);
} else if (matches == 0) {
/* No matching definition */
if (self->can_compile) {
retval = cuda_compile_only(self, args, kws, locals);
} else if (self->fallbackdef) {
/* Have object fallback */
retval = call_cfunc(self, self->fallbackdef, args, kws, locals);
} else {
/* Raise TypeError */
explain_matching_error((PyObject *) self, args, kws);
retval = NULL;
}
} else if (self->can_compile) {
/* Ambiguous, but are allowed to compile */
retval = cuda_compile_only(self, args, kws, locals);
} else {
/* Ambiguous */
explain_ambiguous((PyObject *) self, args, kws);
retval = NULL;
}
CLEANUP:
if (tys != prealloc)
delete[] tys;
Py_DECREF(args);
return retval;
}
static int
import_devicearray(void)
{
PyObject *devicearray = PyImport_ImportModule("numba._devicearray");
if (devicearray == NULL) {
return -1;
}
Py_DECREF(devicearray);
DeviceArray_API = (void**)PyCapsule_Import("numba._devicearray._DEVICEARRAY_API", 0);
if (DeviceArray_API == NULL) {
return -1;
}
return 0;
}
static PyMethodDef Dispatcher_methods[] = {
{ "_clear", (PyCFunction)Dispatcher_clear, METH_NOARGS, NULL },
{ "_insert", (PyCFunction)Dispatcher_Insert, METH_VARARGS | METH_KEYWORDS,
"insert new definition"},
{ "_cuda_call", (PyCFunction)Dispatcher_cuda_call,
METH_VARARGS | METH_KEYWORDS, "CUDA call resolution" },
{ NULL },
};
static PyMemberDef Dispatcher_members[] = {
{(char*)"_can_compile", T_BOOL, offsetof(Dispatcher, can_compile), 0, NULL },
{NULL} /* Sentinel */
};
static PyTypeObject DispatcherType = {
PyVarObject_HEAD_INIT(NULL, 0)
"_dispatcher.Dispatcher", /* tp_name */
sizeof(Dispatcher), /* tp_basicsize */
0, /* tp_itemsize */
(destructor)Dispatcher_dealloc, /* tp_dealloc */
0, /* tp_vectorcall_offset */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_as_async */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
(PyCFunctionWithKeywords)Dispatcher_call, /* tp_call*/
0, /* tp_str*/
0, /* tp_getattro*/
0, /* tp_setattro*/
0, /* tp_as_buffer*/
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /* tp_flags*/
"Dispatcher object", /* tp_doc */
(traverseproc) Dispatcher_traverse, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
Dispatcher_methods, /* tp_methods */
Dispatcher_members, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
(initproc)Dispatcher_init, /* tp_init */
0, /* tp_alloc */
0, /* tp_new */
0, /* tp_free */
0, /* tp_is_gc */
0, /* tp_bases */
0, /* tp_mro */
0, /* tp_cache */
0, /* tp_subclasses */
0, /* tp_weaklist */
0, /* tp_del */
0, /* tp_version_tag */
0, /* tp_finalize */
/* The docs suggest Python 3.8 has no tp_vectorcall
* https://github.com/python/cpython/blob/d917cfe4051d45b2b755c726c096ecfcc4869ceb/Doc/c-api/typeobj.rst?plain=1#L146
* but the header has it:
* https://github.com/python/cpython/blob/d917cfe4051d45b2b755c726c096ecfcc4869ceb/Include/cpython/object.h#L257
*/
0, /* tp_vectorcall */
#if (PY_MAJOR_VERSION == 3) && (PY_MINOR_VERSION == 8)
/* This is Python 3.8 only.
* See: https://github.com/python/cpython/blob/3.8/Include/cpython/object.h
* there's a tp_print preserved for backwards compatibility. xref:
* https://github.com/python/cpython/blob/d917cfe4051d45b2b755c726c096ecfcc4869ceb/Include/cpython/object.h#L260
*/
0, /* tp_print */
#endif
/* WARNING: Do not remove this, only modify it! It is a version guard to
* act as a reminder to update this struct on Python version update! */
#if (PY_MAJOR_VERSION == 3)
#if ! ((PY_MINOR_VERSION == 8) || (PY_MINOR_VERSION == 9) || (PY_MINOR_VERSION == 10) || (PY_MINOR_VERSION == 11))
#error "Python minor version is not supported."
#endif
#else
#error "Python major version is not supported."
#endif
/* END WARNING*/
};
static PyObject *compute_fingerprint(PyObject *self, PyObject *args)
{
PyObject *val;
if (!PyArg_ParseTuple(args, "O:compute_fingerprint", &val))
return NULL;
return typeof_compute_fingerprint(val);
}
static PyObject *set_use_tls_target_stack(PyObject *self, PyObject *args)
{
int val;
if (!PyArg_ParseTuple(args, "p", &val))
return NULL;
bool old = use_tls_target_stack;
use_tls_target_stack = val;
// return the old value
if (old) {
Py_RETURN_TRUE;
} else {
Py_RETURN_FALSE;
}
}
static PyMethodDef ext_methods[] = {
#define declmethod(func) { #func , ( PyCFunction )func , METH_VARARGS , NULL }
declmethod(typeof_init),
declmethod(compute_fingerprint),
declmethod(set_use_tls_target_stack),
{ NULL },
#undef declmethod
};
MOD_INIT(_dispatcher) {
if (import_devicearray() < 0) {
PyErr_Print();
PyErr_SetString(PyExc_ImportError, "numba._devicearray failed to import");
return MOD_ERROR_VAL;
}
PyObject *m;
MOD_DEF(m, "_dispatcher", "No docs", ext_methods)
if (m == NULL)
return MOD_ERROR_VAL;
DispatcherType.tp_new = PyType_GenericNew;
if (PyType_Ready(&DispatcherType) < 0) {
return MOD_ERROR_VAL;
}
Py_INCREF(&DispatcherType);
PyModule_AddObject(m, "Dispatcher", (PyObject*)(&DispatcherType));
return MOD_SUCCESS_VAL(m);
}
/*
* Definition of Environment and Closure objects.
* This module is included by _dynfuncmod.c and by pycc-compiled modules.
*/
#include "_pymodule.h"
#include <string.h>
/* NOTE: EnvironmentObject and ClosureObject must be kept in sync with
* the definitions in numba/targets/base.py (EnvBody and ClosureBody).
*/
/*
* EnvironmentObject hosts data needed for execution of compiled functions.
*/
typedef struct {
PyObject_HEAD
PyObject *globals;
/* Assorted "constants" that are needed at runtime to execute
the compiled function. This can include frozen closure variables,
lifted loops, etc. */
PyObject *consts;
} EnvironmentObject;
static PyMemberDef env_members[] = {
{"globals", T_OBJECT, offsetof(EnvironmentObject, globals), READONLY, NULL},
{"consts", T_OBJECT, offsetof(EnvironmentObject, consts), READONLY, NULL},
{NULL} /* Sentinel */
};
static int
env_traverse(EnvironmentObject *env, visitproc visit, void *arg)
{
Py_VISIT(env->globals);
Py_VISIT(env->consts);
return 0;
}
static int
env_clear(EnvironmentObject *env)
{
Py_CLEAR(env->globals);
Py_CLEAR(env->consts);
return 0;
}
static void
env_dealloc(EnvironmentObject *env)
{
PyObject_GC_UnTrack((PyObject *) env);
env_clear(env);
Py_TYPE(env)->tp_free((PyObject *) env);
}
static EnvironmentObject *
env_new_empty(PyTypeObject* type)
{
return (EnvironmentObject *) PyType_GenericNew(type, NULL, NULL);
}
static PyObject *
env_new(PyTypeObject* type, PyObject* args, PyObject* kwds)
{
PyObject *globals;
EnvironmentObject *env;
static char *kwlist[] = {"globals", 0};
if (!PyArg_ParseTupleAndKeywords(
args, kwds, "O!:function", kwlist,
&PyDict_Type, &globals))
return NULL;
env = env_new_empty(type);
if (env == NULL)
return NULL;
Py_INCREF(globals);
env->globals = globals;
env->consts = PyList_New(0);
if (!env->consts) {
Py_DECREF(env);
return NULL;
}
return (PyObject *) env;
}
static PyTypeObject EnvironmentType = {
PyVarObject_HEAD_INIT(NULL, 0)
"_dynfunc.Environment", /* tp_name */
sizeof(EnvironmentObject), /* tp_basicsize */
0, /* tp_itemsize */
(destructor) env_dealloc, /* tp_dealloc */
0, /* tp_vectorcall_offset */
0, /* tp_getattr*/
0, /* tp_setattr */
0, /* tp_as_async */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC, /* tp_flags */
0, /* tp_doc */
(traverseproc) env_traverse, /* tp_traverse */
(inquiry) env_clear, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
0, /* tp_methods */
env_members, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
env_new, /* tp_new */
0, /* tp_free */
0, /* tp_is_gc */
0, /* tp_bases */
0, /* tp_mro */
0, /* tp_cache */
0, /* tp_subclasses */
0, /* tp_weaklist */
0, /* tp_del */
0, /* tp_version_tag */
0, /* tp_finalize */
/* The docs suggest Python 3.8 has no tp_vectorcall
* https://github.com/python/cpython/blob/d917cfe4051d45b2b755c726c096ecfcc4869ceb/Doc/c-api/typeobj.rst?plain=1#L146
* but the header has it:
* https://github.com/python/cpython/blob/d917cfe4051d45b2b755c726c096ecfcc4869ceb/Include/cpython/object.h#L257
*/
0, /* tp_vectorcall */
#if (PY_MAJOR_VERSION == 3) && (PY_MINOR_VERSION == 8)
/* This is Python 3.8 only.
* See: https://github.com/python/cpython/blob/3.8/Include/cpython/object.h
* there's a tp_print preserved for backwards compatibility. xref:
* https://github.com/python/cpython/blob/d917cfe4051d45b2b755c726c096ecfcc4869ceb/Include/cpython/object.h#L260
*/
0, /* tp_print */
#endif
/* WARNING: Do not remove this, only modify it! It is a version guard to
* act as a reminder to update this struct on Python version update! */
#if (PY_MAJOR_VERSION == 3)
#if ! ((PY_MINOR_VERSION == 8) || (PY_MINOR_VERSION == 9) || (PY_MINOR_VERSION == 10) || (PY_MINOR_VERSION == 11))
#error "Python minor version is not supported."
#endif
#else
#error "Python major version is not supported."
#endif
/* END WARNING*/
};
/* A closure object is created for each call to make_function(), and stored
as the resulting PyCFunction object's "self" pointer. It points to an
EnvironmentObject which is constructed during compilation. This allows
for two things:
- lifetime management of dependent data (e.g. lifted loop dispatchers)
- access to the execution environment by the compiled function
(for example the globals module)
*/
/* Closure is a variable-sized object for binary compatibility with
Generator (see below). */
#define CLOSURE_HEAD \
PyObject_VAR_HEAD \
EnvironmentObject *env;
typedef struct {
CLOSURE_HEAD
/* The dynamically-filled method definition for the PyCFunction object
using this closure. */
PyMethodDef def;
/* Arbitrary object to keep alive during the closure's lifetime.
(put a tuple to put several objects alive).
In practice, this helps keep the LLVM module and its generated
code alive. */
PyObject *keepalive;
PyObject *weakreflist;
} ClosureObject;
static int
closure_traverse(ClosureObject *clo, visitproc visit, void *arg)
{
Py_VISIT(clo->env);
Py_VISIT(clo->keepalive);
return 0;
}
static void
closure_dealloc(ClosureObject *clo)
{
PyObject_GC_UnTrack((PyObject *) clo);
if (clo->weakreflist != NULL)
PyObject_ClearWeakRefs((PyObject *) clo);
PyObject_Free((void *) clo->def.ml_name);
PyObject_Free((void *) clo->def.ml_doc);
Py_XDECREF(clo->env);
Py_XDECREF(clo->keepalive);
Py_TYPE(clo)->tp_free((PyObject *) clo);
}
static PyTypeObject ClosureType = {
PyVarObject_HEAD_INIT(NULL, 0)
"_dynfunc._Closure", /* tp_name */
sizeof(ClosureObject), /* tp_basicsize */
0, /* tp_itemsize */
(destructor) closure_dealloc, /* tp_dealloc */
0, /* tp_vectorcall_offset */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_as_async */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
0, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
0, /* tp_doc */
(traverseproc) closure_traverse, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
offsetof(ClosureObject, weakreflist), /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
0, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
0, /* tp_new */
0, /* tp_free */
0, /* tp_is_gc */
0, /* tp_bases */
0, /* tp_mro */
0, /* tp_cache */
0, /* tp_subclasses */
0, /* tp_weaklist */
0, /* tp_del */
0, /* tp_version_tag */
0, /* tp_finalize */
/* The docs suggest Python 3.8 has no tp_vectorcall
* https://github.com/python/cpython/blob/d917cfe4051d45b2b755c726c096ecfcc4869ceb/Doc/c-api/typeobj.rst?plain=1#L146
* but the header has it:
* https://github.com/python/cpython/blob/d917cfe4051d45b2b755c726c096ecfcc4869ceb/Include/cpython/object.h#L257
*/
0, /* tp_vectorcall */
#if (PY_MAJOR_VERSION == 3) && (PY_MINOR_VERSION == 8)
/* This is Python 3.8 only.
* See: https://github.com/python/cpython/blob/3.8/Include/cpython/object.h
* there's a tp_print preserved for backwards compatibility. xref:
* https://github.com/python/cpython/blob/d917cfe4051d45b2b755c726c096ecfcc4869ceb/Include/cpython/object.h#L260
*/
0, /* tp_print */
#endif
/* WARNING: Do not remove this, only modify it! It is a version guard to
* act as a reminder to update this struct on Python version update! */
#if (PY_MAJOR_VERSION == 3)
#if ! ((PY_MINOR_VERSION == 8) || (PY_MINOR_VERSION == 9) || (PY_MINOR_VERSION == 10 || (PY_MINOR_VERSION == 11)))
#error "Python minor version is not supported."
#endif
#else
#error "Python major version is not supported."
#endif
/* END WARNING*/
};
/* Return an owned piece of character data duplicating a Python string
object's value. */
static char *
dup_string(PyObject *strobj)
{
const char *tmp = NULL;
char *str;
tmp = PyString_AsString(strobj);
if (tmp == NULL)
return NULL;
/* Using PyObject_Malloc allows this memory to be tracked for
leaks. */
str = PyObject_Malloc(strlen(tmp) + 1);
if (str == NULL) {
PyErr_NoMemory();
return NULL;
}
strcpy(str, tmp);
return str;
}
/* Create and initialize a new Closure object */
static ClosureObject *
closure_new(PyObject *name, PyObject *doc, PyCFunction fnaddr,
EnvironmentObject *env, PyObject *keepalive)
{
ClosureObject *clo = (ClosureObject *) PyType_GenericAlloc(&ClosureType, 0);
if (clo == NULL)
return NULL;
clo->def.ml_name = dup_string(name);
if (!clo->def.ml_name) {
Py_DECREF(clo);
return NULL;
}
clo->def.ml_meth = fnaddr;
clo->def.ml_flags = METH_VARARGS | METH_KEYWORDS;
clo->def.ml_doc = dup_string(doc);
if (!clo->def.ml_doc) {
Py_DECREF(clo);
return NULL;
}
Py_INCREF(env);
clo->env = env;
Py_XINCREF(keepalive);
clo->keepalive = keepalive;
return clo;
}
/* Create a new PyCFunction object wrapping a closure defined by
the given arguments. */
static PyObject *
pycfunction_new(PyObject *module, PyObject *name, PyObject *doc,
PyCFunction fnaddr, EnvironmentObject *env, PyObject *keepalive)
{
PyObject *funcobj;
PyObject *modname = NULL;
ClosureObject *closure = NULL;
closure = closure_new(name, doc, fnaddr, env, keepalive);
if (closure == NULL) goto FAIL;
modname = PyObject_GetAttrString(module, "__name__");
if (modname == NULL) goto FAIL;
funcobj = PyCFunction_NewEx(&closure->def, (PyObject *) closure, modname);
Py_DECREF(closure);
Py_DECREF(modname);
return funcobj;
FAIL:
Py_XDECREF(closure);
Py_XDECREF(modname);
return NULL;
}
/*
* Python-facing wrapper for Numba-compiled generator.
* Note the Environment's offset inside the struct is the same as in the
* Closure object. This is required to simplify generation of Python wrappers.
*/
typedef void (*gen_finalizer_t)(void *);
typedef struct {
CLOSURE_HEAD
PyCFunctionWithKeywords nextfunc;
gen_finalizer_t finalizer;
PyObject *weakreflist;
union {
double dummy; /* Force alignment */
char state[0];
};
} GeneratorObject;
static int
generator_traverse(GeneratorObject *gen, visitproc visit, void *arg)
{
/* XXX this doesn't traverse the state, which can own references to
PyObjects */
Py_VISIT(gen->env);
return 0;
}
static int
generator_clear(GeneratorObject *gen)
{
if (gen->finalizer != NULL) {
gen->finalizer(gen->state);
gen->finalizer = NULL;
}
Py_CLEAR(gen->env);
gen->nextfunc = NULL;
return 0;
}
static void
generator_dealloc(GeneratorObject *gen)
{
PyObject_GC_UnTrack((PyObject *) gen);
if (gen->weakreflist != NULL)
PyObject_ClearWeakRefs((PyObject *) gen);
/* XXX The finalizer may be called after the LLVM module has been
destroyed (typically at interpreter shutdown) */
if (!_Py_IsFinalizing())
if (gen->finalizer != NULL)
gen->finalizer(gen->state);
Py_XDECREF(gen->env);
Py_TYPE(gen)->tp_free((PyObject *) gen);
}
static PyObject *
generator_iternext(GeneratorObject *gen)
{
PyObject *res, *args;
if (gen->nextfunc == NULL) {
PyErr_SetString(PyExc_RuntimeError,
"cannot call next() on finalized generator");
return NULL;
}
args = PyTuple_Pack(1, (PyObject *) gen);
if (args == NULL)
return NULL;
res = (*gen->nextfunc)((PyObject *) gen, args, NULL);
Py_DECREF(args);
return res;
}
static PyTypeObject GeneratorType = {
PyVarObject_HEAD_INIT(NULL, 0)
"_dynfunc._Generator", /* tp_name*/
offsetof(GeneratorObject, state), /* tp_basicsize*/
1, /* tp_itemsize*/
(destructor) generator_dealloc, /* tp_dealloc*/
0, /* tp_vectorcall_offset*/
0, /* tp_getattr*/
0, /* tp_setattr*/
0, /* tp_as_async*/
0, /* tp_repr*/
0, /* tp_as_number*/
0, /* tp_as_sequence*/
0, /* tp_as_mapping*/
0, /* tp_hash */
0, /* tp_call*/
0, /* tp_str*/
0, /* tp_getattro*/
0, /* tp_setattro*/
0, /* tp_as_buffer*/
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC
| Py_TPFLAGS_BASETYPE, /* tp_flags*/
0, /* tp_doc */
(traverseproc) generator_traverse, /* tp_traverse */
(inquiry) generator_clear, /* tp_clear */
0, /* tp_richcompare */
offsetof(GeneratorObject, weakreflist), /* tp_weaklistoffset */
PyObject_SelfIter, /* tp_iter */
(iternextfunc) generator_iternext, /* tp_iternext */
0, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
0, /* tp_new */
0, /* tp_free */
0, /* tp_is_gc */
0, /* tp_bases */
0, /* tp_mro */
0, /* tp_cache */
0, /* tp_subclasses */
0, /* tp_weaklist */
0, /* tp_del */
0, /* tp_version_tag */
0, /* tp_finalize */
/* The docs suggest Python 3.8 has no tp_vectorcall
* https://github.com/python/cpython/blob/d917cfe4051d45b2b755c726c096ecfcc4869ceb/Doc/c-api/typeobj.rst?plain=1#L146
* but the header has it:
* https://github.com/python/cpython/blob/d917cfe4051d45b2b755c726c096ecfcc4869ceb/Include/cpython/object.h#L257
*/
0, /* tp_vectorcall */
#if (PY_MAJOR_VERSION == 3) && (PY_MINOR_VERSION == 8)
/* This is Python 3.8 only.
* See: https://github.com/python/cpython/blob/3.8/Include/cpython/object.h
* there's a tp_print preserved for backwards compatibility. xref:
* https://github.com/python/cpython/blob/d917cfe4051d45b2b755c726c096ecfcc4869ceb/Include/cpython/object.h#L260
*/
0, /* tp_print */
#endif
/* WARNING: Do not remove this, only modify it! It is a version guard to
* act as a reminder to update this struct on Python version update! */
#if (PY_MAJOR_VERSION == 3)
#if ! ((PY_MINOR_VERSION == 8) || (PY_MINOR_VERSION == 9) || (PY_MINOR_VERSION == 10) || (PY_MINOR_VERSION == 11))
#error "Python minor version is not supported."
#endif
#else
#error "Python major version is not supported."
#endif
/* END WARNING*/
};
/* Dynamically create a new generator object */
static PyObject *
Numba_make_generator(Py_ssize_t gen_state_size,
void *initial_state,
PyCFunctionWithKeywords nextfunc,
gen_finalizer_t finalizer,
EnvironmentObject *env)
{
GeneratorObject *gen;
gen = (GeneratorObject *) PyType_GenericAlloc(&GeneratorType, gen_state_size);
if (gen == NULL)
return NULL;
memcpy(gen->state, initial_state, gen_state_size);
gen->nextfunc = nextfunc;
Py_XINCREF(env);
gen->env = env;
gen->finalizer = finalizer;
return (PyObject *) gen;
}
/* Initialization subroutine for use by modules including this */
static int
init_dynfunc_module(PyObject *module)
{
if (PyType_Ready(&ClosureType))
return -1;
if (PyType_Ready(&EnvironmentType))
return -1;
if (PyType_Ready(&GeneratorType))
return -1;
return 0;
}
#include "_dynfunc.c"
/* Python-facing function to dynamically create a new C function object */
static PyObject*
make_function(PyObject *self, PyObject *args)
{
PyObject *module, *fname, *fdoc, *fnaddrobj;
void *fnaddr;
EnvironmentObject *env;
PyObject *keepalive;
if (!PyArg_ParseTuple(args, "OOOOO!|O",
&module, &fname, &fdoc, &fnaddrobj, &EnvironmentType, &env,
&keepalive)) {
return NULL;
}
fnaddr = PyLong_AsVoidPtr(fnaddrobj);
if (fnaddr == NULL && PyErr_Occurred())
return NULL;
return pycfunction_new(module, fname, fdoc, fnaddr, env, keepalive);
}
static PyMethodDef ext_methods[] = {
#define declmethod(func) { #func , ( PyCFunction )func , METH_VARARGS , NULL }
declmethod(make_function),
{ NULL },
#undef declmethod
};
static PyObject *
build_c_helpers_dict(void)
{
PyObject *dct = PyDict_New();
if (dct == NULL)
goto error;
#define _declpointer(name, value) do { \
PyObject *o = PyLong_FromVoidPtr(value); \
if (o == NULL) goto error; \
if (PyDict_SetItemString(dct, name, o)) { \
Py_DECREF(o); \
goto error; \
} \
Py_DECREF(o); \
} while (0)
#define declmethod(func) _declpointer(#func, &Numba_##func)
#define declpointer(ptr) _declpointer(#ptr, &ptr)
declmethod(make_generator);
#undef declmethod
return dct;
error:
Py_XDECREF(dct);
return NULL;
}
MOD_INIT(_dynfunc) {
PyObject *m, *impl_info;
MOD_DEF(m, "_dynfunc", "No docs", ext_methods)
if (m == NULL)
return MOD_ERROR_VAL;
if (init_dynfunc_module(m))
return MOD_ERROR_VAL;
impl_info = Py_BuildValue(
"{snsnsn}",
"offsetof_closure_body", offsetof(ClosureObject, env),
"offsetof_env_body", offsetof(EnvironmentObject, globals),
"offsetof_generator_state", offsetof(GeneratorObject, state)
);
if (impl_info == NULL)
return MOD_ERROR_VAL;
PyModule_AddObject(m, "_impl_info", impl_info);
Py_INCREF(&ClosureType);
PyModule_AddObject(m, "_Closure", (PyObject *) (&ClosureType));
Py_INCREF(&EnvironmentType);
PyModule_AddObject(m, "Environment", (PyObject *) (&EnvironmentType));
Py_INCREF(&GeneratorType);
PyModule_AddObject(m, "_Generator", (PyObject *) (&GeneratorType));
PyModule_AddObject(m, "c_helpers", build_c_helpers_dict());
return MOD_SUCCESS_VAL(m);
}
/*
* This file and _hashtable.h are from CPython 3.5. The symbols have been
* renamed from _Py_hashxxx to _Numba_hashxxx to avoid name clashes with
* the CPython definitions (including at runtime through dynamic linking).
* Those CPython APIs are private and can change in incompatible ways at
* any time.
*
* Command line used for renaming:
* $ sed -i -r 's/\b_Py_(has[h]table)/_Numba_\1/ig' numba/_hashtable.h numba/_hashtable.c
*/
/* The implementation of the hash table (_Numba_hashtable_t) is based on the cfuhash
project:
http://sourceforge.net/projects/libcfu/
Copyright of cfuhash:
----------------------------------
Creation date: 2005-06-24 21:22:40
Authors: Don
Change log:
Copyright (c) 2005 Don Owens
All rights reserved.
This code is released under the BSD license:
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.
* Neither the name of the author nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
----------------------------------
*/
#include "_pymodule.h"
#include "_hashtable.h"
#define HASHTABLE_MIN_SIZE 16
#define HASHTABLE_HIGH 0.50
#define HASHTABLE_LOW 0.10
#define HASHTABLE_REHASH_FACTOR 2.0 / (HASHTABLE_LOW + HASHTABLE_HIGH)
#define BUCKETS_HEAD(SLIST) \
((_Numba_hashtable_entry_t *)_Py_SLIST_HEAD(&(SLIST)))
#define TABLE_HEAD(HT, BUCKET) \
((_Numba_hashtable_entry_t *)_Py_SLIST_HEAD(&(HT)->buckets[BUCKET]))
#define ENTRY_NEXT(ENTRY) \
((_Numba_hashtable_entry_t *)_Py_SLIST_ITEM_NEXT(ENTRY))
#define HASHTABLE_ITEM_SIZE(HT) \
(sizeof(_Numba_hashtable_entry_t) + (HT)->data_size)
/* Forward declaration */
static void hashtable_rehash(_Numba_hashtable_t *ht);
static void
_Py_slist_init(_Py_slist_t *list)
{
list->head = NULL;
}
static void
_Py_slist_prepend(_Py_slist_t *list, _Py_slist_item_t *item)
{
item->next = list->head;
list->head = item;
}
static void
_Py_slist_remove(_Py_slist_t *list, _Py_slist_item_t *previous,
_Py_slist_item_t *item)
{
if (previous != NULL)
previous->next = item->next;
else
list->head = item->next;
}
extern "C" Py_uhash_t
_Numba_hashtable_hash_int(const void *key)
{
return (Py_uhash_t)key;
}
extern "C" Py_uhash_t
_Numba_hashtable_hash_ptr(const void *key)
{
return (Py_uhash_t)_Py_HashPointer((void *)key);
}
extern "C" int
_Numba_hashtable_compare_direct(const void *key, const _Numba_hashtable_entry_t *entry)
{
return entry->key == key;
}
/* makes sure the real size of the buckets array is a power of 2 */
static size_t
round_size(size_t s)
{
size_t i;
if (s < HASHTABLE_MIN_SIZE)
return HASHTABLE_MIN_SIZE;
i = 1;
while (i < s)
i <<= 1;
return i;
}
extern "C" _Numba_hashtable_t *
_Numba_hashtable_new_full(size_t data_size, size_t init_size,
_Numba_hashtable_hash_func hash_func,
_Numba_hashtable_compare_func compare_func,
_Numba_hashtable_copy_data_func copy_data_func,
_Numba_hashtable_free_data_func free_data_func,
_Numba_hashtable_get_data_size_func get_data_size_func,
_Numba_hashtable_allocator_t *allocator)
{
_Numba_hashtable_t *ht;
size_t buckets_size;
_Numba_hashtable_allocator_t alloc;
if (allocator == NULL) {
alloc.malloc = PyMem_RawMalloc;
alloc.free = PyMem_RawFree;
}
else
alloc = *allocator;
ht = (_Numba_hashtable_t *)alloc.malloc(sizeof(_Numba_hashtable_t));
if (ht == NULL)
return ht;
ht->num_buckets = round_size(init_size);
ht->entries = 0;
ht->data_size = data_size;
buckets_size = ht->num_buckets * sizeof(ht->buckets[0]);
ht->buckets = (_Py_slist_t *) alloc.malloc(buckets_size);
if (ht->buckets == NULL) {
alloc.free(ht);
return NULL;
}
memset(ht->buckets, 0, buckets_size);
ht->hash_func = hash_func;
ht->compare_func = compare_func;
ht->copy_data_func = copy_data_func;
ht->free_data_func = free_data_func;
ht->get_data_size_func = get_data_size_func;
ht->alloc = alloc;
return ht;
}
extern "C" _Numba_hashtable_t *
_Numba_hashtable_new(size_t data_size,
_Numba_hashtable_hash_func hash_func,
_Numba_hashtable_compare_func compare_func)
{
return _Numba_hashtable_new_full(data_size, HASHTABLE_MIN_SIZE,
hash_func, compare_func,
NULL, NULL, NULL, NULL);
}
extern "C" size_t
_Numba_hashtable_size(_Numba_hashtable_t *ht)
{
size_t size;
size_t hv;
size = sizeof(_Numba_hashtable_t);
/* buckets */
size += ht->num_buckets * sizeof(_Numba_hashtable_entry_t *);
/* entries */
size += ht->entries * HASHTABLE_ITEM_SIZE(ht);
/* data linked from entries */
if (ht->get_data_size_func) {
for (hv = 0; hv < ht->num_buckets; hv++) {
_Numba_hashtable_entry_t *entry;
for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) {
void *data;
data = _Numba_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry);
size += ht->get_data_size_func(data);
}
}
}
return size;
}
#ifdef Py_DEBUG
extern "C" void
_Numba_hashtable_print_stats(_Numba_hashtable_t *ht)
{
size_t size;
size_t chain_len, max_chain_len, total_chain_len, nchains;
_Numba_hashtable_entry_t *entry;
size_t hv;
double load;
size = _Numba_hashtable_size(ht);
load = (double)ht->entries / ht->num_buckets;
max_chain_len = 0;
total_chain_len = 0;
nchains = 0;
for (hv = 0; hv < ht->num_buckets; hv++) {
entry = TABLE_HEAD(ht, hv);
if (entry != NULL) {
chain_len = 0;
for (; entry; entry = ENTRY_NEXT(entry)) {
chain_len++;
}
if (chain_len > max_chain_len)
max_chain_len = chain_len;
total_chain_len += chain_len;
nchains++;
}
}
printf("hash table %p: entries=%"
PY_FORMAT_SIZE_T "u/%" PY_FORMAT_SIZE_T "u (%.0f%%), ",
ht, ht->entries, ht->num_buckets, load * 100.0);
if (nchains)
printf("avg_chain_len=%.1f, ", (double)total_chain_len / nchains);
printf("max_chain_len=%" PY_FORMAT_SIZE_T "u, %" PY_FORMAT_SIZE_T "u kB\n",
max_chain_len, size / 1024);
}
#endif
/* Get an entry. Return NULL if the key does not exist. */
extern "C" _Numba_hashtable_entry_t *
_Numba_hashtable_get_entry(_Numba_hashtable_t *ht, const void *key)
{
Py_uhash_t key_hash;
size_t index;
_Numba_hashtable_entry_t *entry;
key_hash = ht->hash_func(key);
index = key_hash & (ht->num_buckets - 1);
for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
if (entry->key_hash == key_hash && ht->compare_func(key, entry))
break;
}
return entry;
}
static int
_hashtable_pop_entry(_Numba_hashtable_t *ht, const void *key, void *data, size_t data_size)
{
Py_uhash_t key_hash;
size_t index;
_Numba_hashtable_entry_t *entry, *previous;
key_hash = ht->hash_func(key);
index = key_hash & (ht->num_buckets - 1);
previous = NULL;
for (entry = TABLE_HEAD(ht, index); entry != NULL; entry = ENTRY_NEXT(entry)) {
if (entry->key_hash == key_hash && ht->compare_func(key, entry))
break;
previous = entry;
}
if (entry == NULL)
return 0;
_Py_slist_remove(&ht->buckets[index], (_Py_slist_item_t *)previous,
(_Py_slist_item_t *)entry);
ht->entries--;
if (data != NULL)
_Numba_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry);
ht->alloc.free(entry);
if ((float)ht->entries / (float)ht->num_buckets < HASHTABLE_LOW)
hashtable_rehash(ht);
return 1;
}
/* Add a new entry to the hash. The key must not be present in the hash table.
Return 0 on success, -1 on memory error. */
extern "C" int
_Numba_hashtable_set(_Numba_hashtable_t *ht, const void *key,
void *data, size_t data_size)
{
Py_uhash_t key_hash;
size_t index;
_Numba_hashtable_entry_t *entry;
assert(data != NULL || data_size == 0);
#ifndef NDEBUG
/* Don't write the assertion on a single line because it is interesting
to know the duplicated entry if the assertion failed. The entry can
be read using a debugger. */
entry = _Numba_hashtable_get_entry(ht, key);
assert(entry == NULL);
#endif
key_hash = ht->hash_func(key);
index = key_hash & (ht->num_buckets - 1);
entry = (_Numba_hashtable_entry_t *) ht->alloc.malloc(HASHTABLE_ITEM_SIZE(ht));
if (entry == NULL) {
/* memory allocation failed */
return -1;
}
entry->key = (void *)key;
entry->key_hash = key_hash;
assert(data_size == ht->data_size);
memcpy(_Numba_HASHTABLE_ENTRY_DATA(entry), data, data_size);
_Py_slist_prepend(&ht->buckets[index], (_Py_slist_item_t*)entry);
ht->entries++;
if ((float)ht->entries / (float)ht->num_buckets > HASHTABLE_HIGH)
hashtable_rehash(ht);
return 0;
}
/* Get data from an entry. Copy entry data into data and return 1 if the entry
exists, return 0 if the entry does not exist. */
extern "C" int
_Numba_hashtable_get(_Numba_hashtable_t *ht, const void *key, void *data, size_t data_size)
{
_Numba_hashtable_entry_t *entry;
assert(data != NULL);
entry = _Numba_hashtable_get_entry(ht, key);
if (entry == NULL)
return 0;
_Numba_HASHTABLE_ENTRY_READ_DATA(ht, data, data_size, entry);
return 1;
}
extern "C" int
_Numba_hashtable_pop(_Numba_hashtable_t *ht, const void *key, void *data, size_t data_size)
{
assert(data != NULL);
assert(ht->free_data_func == NULL);
return _hashtable_pop_entry(ht, key, data, data_size);
}
/* Delete an entry. The entry must exist. */
extern "C" void
_Numba_hashtable_delete(_Numba_hashtable_t *ht, const void *key)
{
#ifndef NDEBUG
int found = _hashtable_pop_entry(ht, key, NULL, 0);
assert(found);
#else
(void)_hashtable_pop_entry(ht, key, NULL, 0);
#endif
}
/* Prototype for a pointer to a function to be called foreach
key/value pair in the hash by hashtable_foreach(). Iteration
stops if a non-zero value is returned. */
extern "C" int
_Numba_hashtable_foreach(_Numba_hashtable_t *ht,
int (*func) (_Numba_hashtable_entry_t *entry, void *arg),
void *arg)
{
_Numba_hashtable_entry_t *entry;
size_t hv;
for (hv = 0; hv < ht->num_buckets; hv++) {
for (entry = TABLE_HEAD(ht, hv); entry; entry = ENTRY_NEXT(entry)) {
int res = func(entry, arg);
if (res)
return res;
}
}
return 0;
}
static void
hashtable_rehash(_Numba_hashtable_t *ht)
{
size_t buckets_size, new_size, bucket;
_Py_slist_t *old_buckets = NULL;
size_t old_num_buckets;
new_size = round_size((size_t)(ht->entries * HASHTABLE_REHASH_FACTOR));
if (new_size == ht->num_buckets)
return;
old_num_buckets = ht->num_buckets;
buckets_size = new_size * sizeof(ht->buckets[0]);
old_buckets = ht->buckets;
ht->buckets = (_Py_slist_t *) ht->alloc.malloc(buckets_size);
if (ht->buckets == NULL) {
/* cancel rehash on memory allocation failure */
ht->buckets = old_buckets ;
/* memory allocation failed */
return;
}
memset(ht->buckets, 0, buckets_size);
ht->num_buckets = new_size;
for (bucket = 0; bucket < old_num_buckets; bucket++) {
_Numba_hashtable_entry_t *entry, *next;
for (entry = BUCKETS_HEAD(old_buckets[bucket]); entry != NULL; entry = next) {
size_t entry_index;
assert(ht->hash_func(entry->key) == entry->key_hash);
next = ENTRY_NEXT(entry);
entry_index = entry->key_hash & (new_size - 1);
_Py_slist_prepend(&ht->buckets[entry_index], (_Py_slist_item_t*)entry);
}
}
ht->alloc.free(old_buckets);
}
extern "C" void
_Numba_hashtable_clear(_Numba_hashtable_t *ht)
{
_Numba_hashtable_entry_t *entry, *next;
size_t i;
for (i=0; i < ht->num_buckets; i++) {
for (entry = TABLE_HEAD(ht, i); entry != NULL; entry = next) {
next = ENTRY_NEXT(entry);
if (ht->free_data_func)
ht->free_data_func(_Numba_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry));
ht->alloc.free(entry);
}
_Py_slist_init(&ht->buckets[i]);
}
ht->entries = 0;
hashtable_rehash(ht);
}
extern "C" void
_Numba_hashtable_destroy(_Numba_hashtable_t *ht)
{
size_t i;
for (i = 0; i < ht->num_buckets; i++) {
_Py_slist_item_t *entry = ht->buckets[i].head;
while (entry) {
_Py_slist_item_t *entry_next = entry->next;
if (ht->free_data_func)
ht->free_data_func(_Numba_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry));
ht->alloc.free(entry);
entry = entry_next;
}
}
ht->alloc.free(ht->buckets);
ht->alloc.free(ht);
}
/* Return a copy of the hash table */
extern "C" _Numba_hashtable_t *
_Numba_hashtable_copy(_Numba_hashtable_t *src)
{
_Numba_hashtable_t *dst;
_Numba_hashtable_entry_t *entry;
size_t bucket;
int err;
void *data, *new_data;
dst = _Numba_hashtable_new_full(src->data_size, src->num_buckets,
src->hash_func, src->compare_func,
src->copy_data_func, src->free_data_func,
src->get_data_size_func, &src->alloc);
if (dst == NULL)
return NULL;
for (bucket=0; bucket < src->num_buckets; bucket++) {
entry = TABLE_HEAD(src, bucket);
for (; entry; entry = ENTRY_NEXT(entry)) {
if (src->copy_data_func) {
data = _Numba_HASHTABLE_ENTRY_DATA_AS_VOID_P(entry);
new_data = src->copy_data_func(data);
if (new_data != NULL)
err = _Numba_hashtable_set(dst, entry->key,
&new_data, src->data_size);
else
err = 1;
}
else {
data = _Numba_HASHTABLE_ENTRY_DATA(entry);
err = _Numba_hashtable_set(dst, entry->key, data, src->data_size);
}
if (err) {
_Numba_hashtable_destroy(dst);
return NULL;
}
}
}
return dst;
}
/*
* See _hashtable.c for more information about this file.
*/
#ifndef Py_HASHTABLE_H
#define Py_HASHTABLE_H
/* The whole API is private */
#ifndef Py_LIMITED_API
typedef struct _Py_slist_item_s {
struct _Py_slist_item_s *next;
} _Py_slist_item_t;
typedef struct {
_Py_slist_item_t *head;
} _Py_slist_t;
#define _Py_SLIST_ITEM_NEXT(ITEM) (((_Py_slist_item_t *)ITEM)->next)
#define _Py_SLIST_HEAD(SLIST) (((_Py_slist_t *)SLIST)->head)
typedef struct {
/* used by _Numba_hashtable_t.buckets to link entries */
_Py_slist_item_t _Py_slist_item;
const void *key;
Py_uhash_t key_hash;
/* data follows */
} _Numba_hashtable_entry_t;
#define _Numba_HASHTABLE_ENTRY_DATA(ENTRY) \
((char *)(ENTRY) + sizeof(_Numba_hashtable_entry_t))
#define _Numba_HASHTABLE_ENTRY_DATA_AS_VOID_P(ENTRY) \
(*(void **)_Numba_HASHTABLE_ENTRY_DATA(ENTRY))
#define _Numba_HASHTABLE_ENTRY_READ_DATA(TABLE, DATA, DATA_SIZE, ENTRY) \
do { \
assert((DATA_SIZE) == (TABLE)->data_size); \
memcpy(DATA, _Numba_HASHTABLE_ENTRY_DATA(ENTRY), DATA_SIZE); \
} while (0)
typedef Py_uhash_t (*_Numba_hashtable_hash_func) (const void *key);
typedef int (*_Numba_hashtable_compare_func) (const void *key, const _Numba_hashtable_entry_t *he);
typedef void* (*_Numba_hashtable_copy_data_func)(void *data);
typedef void (*_Numba_hashtable_free_data_func)(void *data);
typedef size_t (*_Numba_hashtable_get_data_size_func)(void *data);
typedef struct {
/* allocate a memory block */
void* (*malloc) (size_t size);
/* release a memory block */
void (*free) (void *ptr);
} _Numba_hashtable_allocator_t;
typedef struct {
size_t num_buckets;
size_t entries; /* Total number of entries in the table. */
_Py_slist_t *buckets;
size_t data_size;
_Numba_hashtable_hash_func hash_func;
_Numba_hashtable_compare_func compare_func;
_Numba_hashtable_copy_data_func copy_data_func;
_Numba_hashtable_free_data_func free_data_func;
_Numba_hashtable_get_data_size_func get_data_size_func;
_Numba_hashtable_allocator_t alloc;
} _Numba_hashtable_t;
/* hash and compare functions for integers and pointers */
extern "C" PyAPI_FUNC(Py_uhash_t) _Numba_hashtable_hash_ptr(const void *key);
extern "C" PyAPI_FUNC(Py_uhash_t) _Numba_hashtable_hash_int(const void *key);
extern "C" PyAPI_FUNC(int) _Numba_hashtable_compare_direct(const void *key, const _Numba_hashtable_entry_t *entry);
extern "C" PyAPI_FUNC(_Numba_hashtable_t *) _Numba_hashtable_new(
size_t data_size,
_Numba_hashtable_hash_func hash_func,
_Numba_hashtable_compare_func compare_func);
extern "C" PyAPI_FUNC(_Numba_hashtable_t *) _Numba_hashtable_new_full(
size_t data_size,
size_t init_size,
_Numba_hashtable_hash_func hash_func,
_Numba_hashtable_compare_func compare_func,
_Numba_hashtable_copy_data_func copy_data_func,
_Numba_hashtable_free_data_func free_data_func,
_Numba_hashtable_get_data_size_func get_data_size_func,
_Numba_hashtable_allocator_t *allocator);
extern "C" PyAPI_FUNC(_Numba_hashtable_t *) _Numba_hashtable_copy(_Numba_hashtable_t *src);
extern "C" PyAPI_FUNC(void) _Numba_hashtable_clear(_Numba_hashtable_t *ht);
extern "C" PyAPI_FUNC(void) _Numba_hashtable_destroy(_Numba_hashtable_t *ht);
typedef int (*_Numba_hashtable_foreach_func) (_Numba_hashtable_entry_t *entry, void *arg);
extern "C" PyAPI_FUNC(int) _Numba_hashtable_foreach(
_Numba_hashtable_t *ht,
_Numba_hashtable_foreach_func func, void *arg);
extern "C" PyAPI_FUNC(size_t) _Numba_hashtable_size(_Numba_hashtable_t *ht);
extern "C" PyAPI_FUNC(_Numba_hashtable_entry_t*) _Numba_hashtable_get_entry(
_Numba_hashtable_t *ht,
const void *key);
extern "C" PyAPI_FUNC(int) _Numba_hashtable_set(
_Numba_hashtable_t *ht,
const void *key,
void *data,
size_t data_size);
extern "C" PyAPI_FUNC(int) _Numba_hashtable_get(
_Numba_hashtable_t *ht,
const void *key,
void *data,
size_t data_size);
extern "C" PyAPI_FUNC(int) _Numba_hashtable_pop(
_Numba_hashtable_t *ht,
const void *key,
void *data,
size_t data_size);
extern "C" PyAPI_FUNC(void) _Numba_hashtable_delete(
_Numba_hashtable_t *ht,
const void *key);
#define _Numba_HASHTABLE_SET(TABLE, KEY, DATA) \
_Numba_hashtable_set(TABLE, KEY, &(DATA), sizeof(DATA))
#define _Numba_HASHTABLE_GET(TABLE, KEY, DATA) \
_Numba_hashtable_get(TABLE, KEY, &(DATA), sizeof(DATA))
#endif /* Py_LIMITED_API */
#endif
/*
* Helper functions used by Numba at runtime.
* This C file is meant to be included after defining the
* NUMBA_EXPORT_FUNC() and NUMBA_EXPORT_DATA() macros.
*/
#include "_pymodule.h"
#include <stddef.h>
#include <stdio.h>
#include <math.h>
#include <complex.h>
#ifdef _MSC_VER
#define int64_t signed __int64
#define uint64_t unsigned __int64
#define uint32_t unsigned __int32
#define _complex_float_t _Fcomplex
#define _complex_float_ctor(r, i) _FCbuild(r, i)
#else
#include <stdint.h>
#define _complex_float_t complex float
#define _complex_float_ctor(r, i) (r + I * i)
#endif
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#include <numpy/ndarrayobject.h>
#include <numpy/arrayscalars.h>
#include "_arraystruct.h"
#if (PY_MAJOR_VERSION == 3) && (PY_MINOR_VERSION == 11)
/*
* For struct _frame
*/
#include "internal/pycore_frame.h"
#endif
/*
* Other helpers.
*/
/* Fix fmod() and fmodf() for windows x64 VC 9.0 (VS 2008)
https://support.microsoft.com/en-us/kb/982107
*/
static void (*fnclex)(void) = NULL;
NUMBA_EXPORT_FUNC(double)
numba_fixed_fmod(double x, double y){
fnclex(); /* no inline asm in x64 =( */
return fmod(x, y);
}
NUMBA_EXPORT_FUNC(float)
numba_fixed_fmodf(float x, float y) {
fnclex(); /* no inline asm in x64 =( */
return fmodf(x, y);
}
NUMBA_EXPORT_FUNC(void)
numba_set_fnclex(void *fn){
fnclex = fn;
}
/* provide 64-bit division function to 32-bit platforms */
NUMBA_EXPORT_FUNC(int64_t)
numba_sdiv(int64_t a, int64_t b) {
return a / b;
}
NUMBA_EXPORT_FUNC(uint64_t)
numba_udiv(uint64_t a, uint64_t b) {
return a / b;
}
/* provide 64-bit remainder function to 32-bit platforms */
NUMBA_EXPORT_FUNC(int64_t)
numba_srem(int64_t a, int64_t b) {
return a % b;
}
NUMBA_EXPORT_FUNC(uint64_t)
numba_urem(uint64_t a, uint64_t b) {
return a % b;
}
/* provide frexp and ldexp; these wrappers deal with special cases
* (zero, nan, infinity) directly, to sidestep platform differences.
*/
NUMBA_EXPORT_FUNC(double)
numba_frexp(double x, int *exp)
{
if (!Py_IS_FINITE(x) || !x)
*exp = 0;
else
x = frexp(x, exp);
return x;
}
NUMBA_EXPORT_FUNC(float)
numba_frexpf(float x, int *exp)
{
if (Py_IS_NAN(x) || Py_IS_INFINITY(x) || !x)
*exp = 0;
else
x = frexpf(x, exp);
return x;
}
NUMBA_EXPORT_FUNC(double)
numba_ldexp(double x, int exp)
{
if (Py_IS_FINITE(x) && x && exp)
x = ldexp(x, exp);
return x;
}
NUMBA_EXPORT_FUNC(float)
numba_ldexpf(float x, int exp)
{
if (Py_IS_FINITE(x) && x && exp)
x = ldexpf(x, exp);
return x;
}
NUMBA_EXPORT_FUNC(double)
numba_exp2(double x)
{
return exp2(x);
}
NUMBA_EXPORT_FUNC(float)
numba_exp2f(float x)
{
return exp2f(x);
}
NUMBA_EXPORT_FUNC(double)
numba_log2(double x)
{
return log2(x);
}
NUMBA_EXPORT_FUNC(float)
numba_log2f(float x)
{
return log2f(x);
}
/* provide complex power */
NUMBA_EXPORT_FUNC(void)
numba_cpow(Py_complex *a, Py_complex *b, Py_complex *out) {
errno = 0;
*out = _Py_c_pow(*a, *b);
if (errno == EDOM) {
/* _Py_c_pow() doesn't bother returning the right value
in this case, as Python raises ZeroDivisionError */
out->real = out->imag = Py_NAN;
}
}
NUMBA_EXPORT_FUNC(void)
numba_cpowf(_complex_float_t *a, _complex_float_t *b, _complex_float_t *out) {
Py_complex _a, _b, _out;
_a.real = crealf(*a);
_a.imag = cimagf(*a);
_b.real = crealf(*b);
_b.imag = cimagf(*b);
numba_cpow(&_a, &_b, &_out);
*out = _complex_float_ctor((float) _out.real, (float) _out.imag);
}
/* C99 math functions: redirect to system implementations */
NUMBA_EXPORT_FUNC(double)
numba_gamma(double x)
{
return tgamma(x);
}
NUMBA_EXPORT_FUNC(float)
numba_gammaf(float x)
{
return tgammaf(x);
}
NUMBA_EXPORT_FUNC(double)
numba_lgamma(double x)
{
return lgamma(x);
}
NUMBA_EXPORT_FUNC(float)
numba_lgammaf(float x)
{
return lgammaf(x);
}
NUMBA_EXPORT_FUNC(double)
numba_erf(double x)
{
return erf(x);
}
NUMBA_EXPORT_FUNC(float)
numba_erff(float x)
{
return erff(x);
}
NUMBA_EXPORT_FUNC(double)
numba_erfc(double x)
{
return erfc(x);
}
NUMBA_EXPORT_FUNC(float)
numba_erfcf(float x)
{
return erfcf(x);
}
NUMBA_EXPORT_FUNC(float)
numba_nextafterf(float a, float b)
{
return nextafterf(a, b);
}
NUMBA_EXPORT_FUNC(double)
numba_nextafter(double a, double b)
{
return nextafter(a, b);
}
/* Unpack any Python complex-like object into a Py_complex structure */
NUMBA_EXPORT_FUNC(int)
numba_complex_adaptor(PyObject* obj, Py_complex *out) {
PyObject* fobj;
PyArray_Descr *dtype;
double val[2];
// Convert from python complex or numpy complex128
if (PyComplex_Check(obj)) {
out->real = PyComplex_RealAsDouble(obj);
out->imag = PyComplex_ImagAsDouble(obj);
}
// Convert from numpy complex64
else if (PyArray_IsScalar(obj, ComplexFloating)) {
dtype = PyArray_DescrFromScalar(obj);
if (dtype == NULL) {
return 0;
}
if (PyArray_CastScalarDirect(obj, dtype, &val[0], NPY_CDOUBLE) < 0) {
Py_DECREF(dtype);
return 0;
}
out->real = val[0];
out->imag = val[1];
Py_DECREF(dtype);
} else {
fobj = PyNumber_Float(obj);
if (!fobj) return 0;
out->real = PyFloat_AsDouble(fobj);
out->imag = 0.;
Py_DECREF(fobj);
}
return 1;
}
/* Minimum PyBufferObject structure to hack inside it */
typedef struct {
PyObject_HEAD
PyObject *b_base;
void *b_ptr;
Py_ssize_t b_size;
Py_ssize_t b_offset;
} PyBufferObject_Hack;
/*
Get data address of record data buffer
*/
NUMBA_EXPORT_FUNC(void *)
numba_extract_record_data(PyObject *recordobj, Py_buffer *pbuf) {
PyObject *attrdata;
void *ptr;
attrdata = PyObject_GetAttrString(recordobj, "data");
if (!attrdata) return NULL;
if (-1 == PyObject_GetBuffer(attrdata, pbuf, 0)){
Py_DECREF(attrdata);
return NULL;
} else {
ptr = pbuf->buf;
}
Py_DECREF(attrdata);
return ptr;
}
/*
* Return a record instance with dtype as the record type, and backed
* by a copy of the memory area pointed to by (pdata, size).
*/
NUMBA_EXPORT_FUNC(PyObject *)
numba_recreate_record(void *pdata, int size, PyObject *dtype) {
PyObject *numpy = NULL;
PyObject *numpy_record = NULL;
PyObject *aryobj = NULL;
PyObject *dtypearg = NULL;
PyObject *record = NULL;
PyArray_Descr *descr = NULL;
if (dtype == NULL) {
PyErr_Format(PyExc_RuntimeError,
"In 'numba_recreate_record', 'dtype' is NULL");
return NULL;
}
numpy = PyImport_ImportModuleNoBlock("numpy");
if (!numpy) goto CLEANUP;
numpy_record = PyObject_GetAttrString(numpy, "record");
if (!numpy_record) goto CLEANUP;
dtypearg = PyTuple_Pack(2, numpy_record, dtype);
if (!dtypearg || !PyArray_DescrConverter(dtypearg, &descr))
goto CLEANUP;
/* This steals a reference to descr, so we don't have to DECREF it */
aryobj = PyArray_FromString(pdata, size, descr, 1, NULL);
if (!aryobj) goto CLEANUP;
record = PySequence_GetItem(aryobj, 0);
CLEANUP:
Py_XDECREF(numpy);
Py_XDECREF(numpy_record);
Py_XDECREF(aryobj);
Py_XDECREF(dtypearg);
return record;
}
NUMBA_EXPORT_FUNC(int)
numba_adapt_ndarray(PyObject *obj, arystruct_t* arystruct) {
PyArrayObject *ndary;
int i, ndim;
npy_intp *p;
if (!PyArray_Check(obj)) {
return -1;
}
ndary = (PyArrayObject*)obj;
ndim = PyArray_NDIM(ndary);
arystruct->data = PyArray_DATA(ndary);
arystruct->nitems = PyArray_SIZE(ndary);
arystruct->itemsize = PyArray_ITEMSIZE(ndary);
arystruct->parent = obj;
p = arystruct->shape_and_strides;
for (i = 0; i < ndim; i++, p++) {
*p = PyArray_DIM(ndary, i);
}
for (i = 0; i < ndim; i++, p++) {
*p = PyArray_STRIDE(ndary, i);
}
arystruct->meminfo = NULL;
return 0;
}
NUMBA_EXPORT_FUNC(int)
numba_get_buffer(PyObject *obj, Py_buffer *buf)
{
/* Ask for shape and strides, but no suboffsets */
return PyObject_GetBuffer(obj, buf, PyBUF_RECORDS_RO);
}
NUMBA_EXPORT_FUNC(void)
numba_adapt_buffer(Py_buffer *buf, arystruct_t *arystruct)
{
int i;
npy_intp *p;
arystruct->data = buf->buf;
arystruct->itemsize = buf->itemsize;
arystruct->parent = buf->obj;
arystruct->nitems = 1;
p = arystruct->shape_and_strides;
for (i = 0; i < buf->ndim; i++, p++) {
*p = buf->shape[i];
arystruct->nitems *= buf->shape[i];
}
for (i = 0; i < buf->ndim; i++, p++) {
*p = buf->strides[i];
}
arystruct->meminfo = NULL;
}
NUMBA_EXPORT_FUNC(void)
numba_release_buffer(Py_buffer *buf)
{
PyBuffer_Release(buf);
}
NUMBA_EXPORT_FUNC(PyObject *)
numba_ndarray_new(int nd,
npy_intp *dims, /* shape */
npy_intp *strides,
void* data,
int type_num,
int itemsize)
{
PyObject *ndary;
int flags = NPY_ARRAY_BEHAVED;
ndary = PyArray_New((PyTypeObject*)&PyArray_Type, nd, dims, type_num,
strides, data, 0, flags, NULL);
return ndary;
}
/*
* Handle reshaping of zero-sized array.
* See numba_attempt_nocopy_reshape() below.
*/
static int
nocopy_empty_reshape(npy_intp nd, const npy_intp *dims, const npy_intp *strides,
npy_intp newnd, const npy_intp *newdims,
npy_intp *newstrides, npy_intp itemsize,
int is_f_order)
{
int i;
/* Just make the strides vaguely reasonable
* (they can have any value in theory).
*/
for (i = 0; i < newnd; i++)
newstrides[i] = itemsize;
return 1; /* reshape successful */
}
/*
* Straight from Numpy's _attempt_nocopy_reshape()
* (np/core/src/multiarray/shape.c).
* Attempt to reshape an array without copying data
*
* This function should correctly handle all reshapes, including
* axes of length 1. Zero strides should work but are untested.
*
* If a copy is needed, returns 0
* If no copy is needed, returns 1 and fills `npy_intp *newstrides`
* with appropriate strides
*/
NUMBA_EXPORT_FUNC(int)
numba_attempt_nocopy_reshape(npy_intp nd, const npy_intp *dims, const npy_intp *strides,
npy_intp newnd, const npy_intp *newdims,
npy_intp *newstrides, npy_intp itemsize,
int is_f_order)
{
int oldnd;
npy_intp olddims[NPY_MAXDIMS];
npy_intp oldstrides[NPY_MAXDIMS];
npy_intp np, op, last_stride;
int oi, oj, ok, ni, nj, nk;
oldnd = 0;
/*
* Remove axes with dimension 1 from the old array. They have no effect
* but would need special cases since their strides do not matter.
*/
for (oi = 0; oi < nd; oi++) {
if (dims[oi]!= 1) {
olddims[oldnd] = dims[oi];
oldstrides[oldnd] = strides[oi];
oldnd++;
}
}
np = 1;
for (ni = 0; ni < newnd; ni++) {
np *= newdims[ni];
}
op = 1;
for (oi = 0; oi < oldnd; oi++) {
op *= olddims[oi];
}
if (np != op) {
/* different total sizes; no hope */
return 0;
}
if (np == 0) {
/* the Numpy code does not handle 0-sized arrays */
return nocopy_empty_reshape(nd, dims, strides,
newnd, newdims, newstrides,
itemsize, is_f_order);
}
/* oi to oj and ni to nj give the axis ranges currently worked with */
oi = 0;
oj = 1;
ni = 0;
nj = 1;
while (ni < newnd && oi < oldnd) {
np = newdims[ni];
op = olddims[oi];
while (np != op) {
if (np < op) {
/* Misses trailing 1s, these are handled later */
np *= newdims[nj++];
} else {
op *= olddims[oj++];
}
}
/* Check whether the original axes can be combined */
for (ok = oi; ok < oj - 1; ok++) {
if (is_f_order) {
if (oldstrides[ok+1] != olddims[ok]*oldstrides[ok]) {
/* not contiguous enough */
return 0;
}
}
else {
/* C order */
if (oldstrides[ok] != olddims[ok+1]*oldstrides[ok+1]) {
/* not contiguous enough */
return 0;
}
}
}
/* Calculate new strides for all axes currently worked with */
if (is_f_order) {
newstrides[ni] = oldstrides[oi];
for (nk = ni + 1; nk < nj; nk++) {
newstrides[nk] = newstrides[nk - 1]*newdims[nk - 1];
}
}
else {
/* C order */
newstrides[nj - 1] = oldstrides[oj - 1];
for (nk = nj - 1; nk > ni; nk--) {
newstrides[nk - 1] = newstrides[nk]*newdims[nk];
}
}
ni = nj++;
oi = oj++;
}
/*
* Set strides corresponding to trailing 1s of the new shape.
*/
if (ni >= 1) {
last_stride = newstrides[ni - 1];
}
else {
last_stride = itemsize;
}
if (is_f_order) {
last_stride *= newdims[ni - 1];
}
for (nk = ni; nk < newnd; nk++) {
newstrides[nk] = last_stride;
}
return 1;
}
/*
* Cython utilities.
*/
/* Fetch the address of the given function, as exposed by
a cython module */
static void *
import_cython_function(const char *module_name, const char *function_name)
{
PyObject *module, *capi, *cobj;
void *res = NULL;
const char *capsule_name;
module = PyImport_ImportModule(module_name);
if (module == NULL)
return NULL;
capi = PyObject_GetAttrString(module, "__pyx_capi__");
Py_DECREF(module);
if (capi == NULL)
return NULL;
cobj = PyMapping_GetItemString(capi, (char *)function_name);
Py_DECREF(capi);
if (cobj == NULL) {
PyErr_Clear();
PyErr_Format(PyExc_ValueError,
"No function '%s' found in __pyx_capi__ of '%s'",
function_name, module_name);
return NULL;
}
/* 2.7+ => Cython exports a PyCapsule */
capsule_name = PyCapsule_GetName(cobj);
if (capsule_name != NULL) {
res = PyCapsule_GetPointer(cobj, capsule_name);
}
Py_DECREF(cobj);
return res;
}
NUMBA_EXPORT_FUNC(PyObject *)
_numba_import_cython_function(PyObject *self, PyObject *args)
{
const char *module_name;
const char *function_name;
void *p = NULL;
PyObject *res;
if (!PyArg_ParseTuple(args, "ss", &module_name, &function_name)) {
return NULL;
}
p = import_cython_function(module_name, function_name);
if (p == NULL) {
return NULL;
}
res = PyLong_FromVoidPtr(p);
if (res == NULL) {
PyErr_SetString(PyExc_RuntimeError,
"Could not convert function address to int");
return NULL;
}
return res;
}
/* We use separate functions for datetime64 and timedelta64, to ensure
* proper type checking.
*/
NUMBA_EXPORT_FUNC(npy_int64)
numba_extract_np_datetime(PyObject *td)
{
if (!PyArray_IsScalar(td, Datetime)) {
PyErr_SetString(PyExc_TypeError,
"expected a numpy.datetime64 object");
return -1;
}
return PyArrayScalar_VAL(td, Timedelta);
}
NUMBA_EXPORT_FUNC(npy_int64)
numba_extract_np_timedelta(PyObject *td)
{
if (!PyArray_IsScalar(td, Timedelta)) {
PyErr_SetString(PyExc_TypeError,
"expected a numpy.timedelta64 object");
return -1;
}
return PyArrayScalar_VAL(td, Timedelta);
}
NUMBA_EXPORT_FUNC(PyObject *)
numba_create_np_datetime(npy_int64 value, int unit_code)
{
PyDatetimeScalarObject *obj = (PyDatetimeScalarObject *)
PyArrayScalar_New(Datetime);
if (obj != NULL) {
obj->obval = value;
obj->obmeta.base = unit_code;
obj->obmeta.num = 1;
}
return (PyObject *) obj;
}
NUMBA_EXPORT_FUNC(PyObject *)
numba_create_np_timedelta(npy_int64 value, int unit_code)
{
PyTimedeltaScalarObject *obj = (PyTimedeltaScalarObject *)
PyArrayScalar_New(Timedelta);
if (obj != NULL) {
obj->obval = value;
obj->obmeta.base = unit_code;
obj->obmeta.num = 1;
}
return (PyObject *) obj;
}
NUMBA_EXPORT_FUNC(uint64_t)
numba_fptoui(double x) {
/* First cast to signed int of the full width to make sure sign extension
happens (this can make a difference on some platforms...). */
return (uint64_t) (int64_t) x;
}
NUMBA_EXPORT_FUNC(uint64_t)
numba_fptouif(float x) {
return (uint64_t) (int64_t) x;
}
NUMBA_EXPORT_FUNC(void)
numba_gil_ensure(PyGILState_STATE *state) {
*state = PyGILState_Ensure();
}
NUMBA_EXPORT_FUNC(void)
numba_gil_release(PyGILState_STATE *state) {
PyGILState_Release(*state);
}
NUMBA_EXPORT_FUNC(PyObject *)
numba_py_type(PyObject *obj) {
return (PyObject *) Py_TYPE(obj);
}
/*
* Functions for tagging an arbitrary Python object with an arbitrary pointer.
* These functions make strong lifetime assumptions, see below.
*/
static PyObject *private_data_dict = NULL;
static PyObject *
_get_private_data_dict(void)
{
if (private_data_dict == NULL)
private_data_dict = PyDict_New();
return private_data_dict;
}
NUMBA_EXPORT_FUNC(void)
numba_set_pyobject_private_data(PyObject *obj, void *ptr)
{
PyObject *dct = _get_private_data_dict();
/* This assumes the reference to setobj is kept alive until the
call to numba_reset_set_private_data()! */
PyObject *key = PyLong_FromVoidPtr((void *) obj);
PyObject *value = PyLong_FromVoidPtr(ptr);
if (!dct || !value || !key)
goto error;
if (PyDict_SetItem(dct, key, value))
goto error;
Py_DECREF(key);
Py_DECREF(value);
return;
error:
Py_FatalError("unable to set private data");
}
NUMBA_EXPORT_FUNC(void *)
numba_get_pyobject_private_data(PyObject *obj)
{
PyObject *dct = _get_private_data_dict();
PyObject *value, *key = PyLong_FromVoidPtr((void *) obj);
void *ptr;
if (!dct || !key)
goto error;
value = PyDict_GetItem(dct, key);
Py_DECREF(key);
if (!value)
return NULL;
else {
ptr = PyLong_AsVoidPtr(value);
if (ptr == NULL && PyErr_Occurred())
goto error;
return ptr;
}
error:
Py_FatalError("unable to get private data");
return NULL;
}
NUMBA_EXPORT_FUNC(void)
numba_reset_pyobject_private_data(PyObject *obj)
{
PyObject *dct = _get_private_data_dict();
PyObject *key = PyLong_FromVoidPtr((void *) obj);
if (!key)
goto error;
if (PyDict_DelItem(dct, key))
PyErr_Clear();
Py_DECREF(key);
return;
error:
Py_FatalError("unable to reset private data");
}
NUMBA_EXPORT_FUNC(int)
numba_unpack_slice(PyObject *obj,
Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step)
{
PySliceObject *slice = (PySliceObject *) obj;
if (!PySlice_Check(obj)) {
PyErr_Format(PyExc_TypeError,
"Expected a slice object, got '%s'",
Py_TYPE(slice)->tp_name);
return -1;
}
#define FETCH_MEMBER(NAME, DEFAULT) \
if (slice->NAME != Py_None) { \
Py_ssize_t v = PyNumber_AsSsize_t(slice->NAME, \
PyExc_OverflowError); \
if (v == -1 && PyErr_Occurred()) \
return -1; \
*NAME = v; \
} \
else { \
*NAME = DEFAULT; \
}
FETCH_MEMBER(step, 1)
FETCH_MEMBER(stop, (*step > 0) ? PY_SSIZE_T_MAX : PY_SSIZE_T_MIN)
FETCH_MEMBER(start, (*step > 0) ? 0 : PY_SSIZE_T_MAX)
return 0;
#undef FETCH_MEMBER
}
NUMBA_EXPORT_FUNC(int)
numba_fatal_error(void)
{
PyGILState_Ensure();
Py_FatalError("in Numba-compiled function");
return 0; /* unreachable */
}
/* Insert a frame into the traceback for (funcname, filename, lineno). */
/* This function is CPython's _PyTraceback_Add, renamed, see:
* https://github.com/python/cpython/blob/d545869d084e70d4838310e79b52a25a72a1ca56/Python/traceback.c#L246
* and modified for Python 2.x based on
* https://github.com/python/cpython/blob/2e1a34025cde19bddf12a2eac8fedb6afcca8339/Modules/_ctypes/callbacks.c#L151-L174
*/
static void traceback_add(const char *funcname, const char *filename, int lineno)
{
PyObject *globals = NULL;
PyCodeObject *code = NULL;
PyFrameObject *frame = NULL;
PyObject *exc, *val, *tb;
/* Save and clear the current exception. Python functions must not be
called with an exception set. Calling Python functions happens when
the codec of the filesystem encoding is implemented in pure Python. */
PyErr_Fetch(&exc, &val, &tb);
globals = PyDict_New();
if (!globals)
goto error;
code = PyCode_NewEmpty(filename, funcname, lineno);
if (!code) {
goto error;
}
frame = PyFrame_New(PyThreadState_Get(), code, globals, NULL);
Py_DECREF(globals);
Py_DECREF(code);
if (!frame)
goto error;
#if (PY_MAJOR_VERSION == 3) && (PY_MINOR_VERSION == 11) /* 3.11 */
/* unsafe cast to our copy of _frame to access the f_lineno field */
typedef struct _frame py_frame;
py_frame* hacked_frame = (py_frame*)frame;
hacked_frame->f_lineno = lineno;
#elif (PY_MAJOR_VERSION == 3) && (PY_MINOR_VERSION < 11) /* <3.11 */
frame->f_lineno = lineno;
#else
#error "Check if struct _frame has been changed in the new version"
#endif
PyErr_Restore(exc, val, tb);
PyTraceBack_Here(frame);
Py_DECREF(frame);
return;
error:
_PyErr_ChainExceptions(exc, val, tb);
}
/*
* Add traceback information to *loc* to the active exception.
* loc can be NULL, which causes this function to become a no-op.
*/
static
void traceback_add_loc(PyObject *loc) {
const char *function_name_str = NULL, *filename_str = NULL;
PyObject *function_name = NULL, *filename = NULL, *lineno = NULL;
Py_ssize_t pos;
/* instance is instantiated/internal exception is raised, if loc is present
* add a frame for it into the traceback */
if(loc && loc != Py_None && PyTuple_Check(loc))
{
pos = 0;
function_name = PyTuple_GET_ITEM(loc, pos);
function_name_str = PyString_AsString(function_name);
pos = 1;
filename = PyTuple_GET_ITEM(loc, pos);
filename_str = PyString_AsString(filename);
pos = 2;
lineno = PyTuple_GET_ITEM(loc, pos);
traceback_add(function_name_str, filename_str, \
(int)PyLong_AsLong(lineno));
}
}
/**
* Re-raise the current active exception.
* Called internal by process_raise() when *exc* is None.
*/
static
int reraise_exc_is_none(void) {
/* Reraise */
PyObject *tb, *type, *value;
#if (PY_MAJOR_VERSION >= 3) && (PY_MINOR_VERSION >= 11)
PyErr_GetExcInfo(&type, &value, &tb);
#elif (PY_MAJOR_VERSION >= 3) && (PY_MINOR_VERSION >= 8)
PyThreadState *tstate = PyThreadState_GET();
_PyErr_StackItem *tstate_exc = tstate->exc_info;
type = tstate_exc->exc_type;
value = tstate_exc->exc_value;
tb = tstate_exc->exc_traceback;
#endif
if (type == Py_None) {
PyErr_SetString(PyExc_RuntimeError,
"No active exception to reraise");
return 0;
}
/* incref needed because PyErr_Restore DOES NOT */
Py_XINCREF(type);
Py_XINCREF(value);
Py_XINCREF(tb);
PyErr_Restore(type, value, tb);
return 1;
}
/*
* Set exception given the Exception type and the constructor argument.
* Equivalent to ``raise exc(value)``.
* PyExceptionClass_Check(exc) must be True.
* value can be NULL.
*/
static
int process_exception_class(PyObject *exc, PyObject *value) {
PyObject *type;
/* It is a class, type used here just as a tmp var */
type = PyObject_CallObject(exc, value);
if (type == NULL){
return 0;
}
if (!PyExceptionInstance_Check(type)) {
PyErr_SetString(PyExc_TypeError,
"exceptions must derive from BaseException");
Py_DECREF(type);
return 0;
}
/* all ok, set type to the exc */
Py_DECREF(type);
type = exc;
PyErr_SetObject(type, value);
return 1;
}
/*
* Internal routine to process exceptions.
* exc cannot be NULL. It can be a None, Exception type, or Exception instance.
* value can be NULL for absent, or any PyObject valid for the exception.
*/
static
int process_raise(PyObject *exc, PyObject *value) {
/* exc is None */
if (exc == Py_None) {
return reraise_exc_is_none();
}
/* exc should be an exception class */
else if (PyExceptionClass_Check(exc)) {
return process_exception_class(exc, value);
}
/* exc is an instance of an Exception */
else if (PyExceptionInstance_Check(exc)) {
PyObject *type = PyExceptionInstance_Class(exc);
PyErr_SetObject(type, exc);
return 0;
}
else {
/* Not something you can raise. You get an exception
anyway, just not what you specified :-) */
PyErr_SetString(PyExc_TypeError,
"exceptions must derive from BaseException");
return 0;
}
}
/* Logic for raising an arbitrary object. Adapted from CPython's ceval.c.
This *consumes* a reference count to its argument. */
NUMBA_EXPORT_FUNC(int)
numba_do_raise(PyObject *exc_packed)
{
int status;
PyObject *exc = NULL, *value = NULL, *loc = NULL;
/* We support the following forms of raise:
raise
raise <instance>
raise <type> */
/* could be a tuple from npm (some exc like thing, args, location) */
if (PyTuple_CheckExact(exc_packed)) {
/* Unpack a (class/inst/tuple, arguments, location) tuple. */
if (!PyArg_ParseTuple(exc_packed, "OOO", &exc, &value, &loc)) {
traceback_add_loc(loc);
return 0;
}
} else {
/* could be a reraise or an exception from objmode */
exc = exc_packed;
/* branch exit with value = NULL and loc = NULL */
}
/* value is either NULL or borrowed */
status = process_raise(exc, value);
traceback_add_loc(loc);
Py_DECREF(exc_packed);
return status;
}
#ifdef PYCC_COMPILING
/* AOT avoid the use of `numba.core.serialize` */
NUMBA_EXPORT_FUNC(PyObject *)
numba_unpickle(const char *data, int n, const char *hashed)
{
PyObject *buf, *obj;
static PyObject *loads;
/* Caching the pickle.loads function shaves a couple µs here. */
if (loads == NULL) {
PyObject *picklemod;
picklemod = PyImport_ImportModule("pickle");
if (picklemod == NULL)
return NULL;
loads = PyObject_GetAttrString(picklemod, "loads");
Py_DECREF(picklemod);
if (loads == NULL)
return NULL;
}
buf = PyBytes_FromStringAndSize(data, n);
if (buf == NULL)
return NULL;
obj = PyObject_CallFunctionObjArgs(loads, buf, NULL);
Py_DECREF(buf);
return obj;
}
#else
NUMBA_EXPORT_FUNC(PyObject *)
numba_unpickle(const char *data, int n, const char *hashed)
{
PyObject *buf=NULL, *obj=NULL, *addr=NULL, *hashedbuf=NULL;
static PyObject *loads=NULL;
/* Caching the _numba_unpickle function shaves a couple µs here. */
if (loads == NULL) {
PyObject *picklemod;
picklemod = PyImport_ImportModule("numba.core.serialize");
if (picklemod == NULL)
return NULL;
loads = PyObject_GetAttrString(picklemod, "_numba_unpickle");
Py_DECREF(picklemod);
if (loads == NULL)
return NULL;
}
buf = PyBytes_FromStringAndSize(data, n);
if (buf == NULL)
return NULL;
/* SHA1 produces 160 bit or 20 bytes */
hashedbuf = PyBytes_FromStringAndSize(hashed, 20);
if (hashedbuf == NULL)
goto error;
addr = PyLong_FromVoidPtr((void*)data);
if (addr == NULL)
goto error;
obj = PyObject_CallFunctionObjArgs(loads, addr, buf, hashedbuf, NULL);
error:
Py_XDECREF(addr);
Py_XDECREF(hashedbuf);
Py_DECREF(buf);
return obj;
}
#endif
NUMBA_EXPORT_FUNC(PyObject *)
numba_runtime_build_excinfo_struct(PyObject* struct_gv, PyObject* exc_args)
{
PyObject *obj = NULL;
static PyObject *func = NULL;
/* Caching the function shaves a couple µs here. */
if (func == NULL)
{
PyObject *picklemod;
picklemod = PyImport_ImportModule("numba.core.serialize");
if (picklemod == NULL)
return NULL;
func = PyObject_GetAttrString(picklemod,
"runtime_build_excinfo_struct");
Py_DECREF(picklemod);
if (func == NULL)
return NULL;
}
obj = PyObject_CallFunctionObjArgs(func, struct_gv, exc_args, NULL);
// func returns None on failure (i.e. can't serialize one of the args).
// Is there a better way to handle this? raise an exception here?
return obj;
}
/*
* Unicode helpers
*/
/* Developer note:
*
* The hash value of unicode objects is obtained via:
* ((PyASCIIObject *)(obj))->hash;
* The use comes from this definition:
* https://github.com/python/cpython/blob/6d43f6f081023b680d9db4542d19b9e382149f0a/Objects/unicodeobject.c#L119-L120
* and it's used extensively throughout the `cpython/Object/unicodeobject.c`
* source, not least in `unicode_hash` itself:
* https://github.com/python/cpython/blob/6d43f6f081023b680d9db4542d19b9e382149f0a/Objects/unicodeobject.c#L11662-L11679
*
* The Unicode string struct layouts are described here:
* https://github.com/python/cpython/blob/6d43f6f081023b680d9db4542d19b9e382149f0a/Include/cpython/unicodeobject.h#L82-L161
* essentially, all the unicode string layouts start with a `PyASCIIObject` at
* offset 0 (as of commit 6d43f6f081023b680d9db4542d19b9e382149f0a, somewhere
* in the 3.8 development cycle).
*
* For safety against future CPython internal changes, the code checks that the
* _base members of the unicode structs are what is expected in 3.7, and that
* their offset is 0. It then walks the struct to the hash location to make sure
* the offset is indeed the same as PyASCIIObject->hash.
* Note: The large condition in the if should evaluate to a compile time
* constant.
*/
#define MEMBER_SIZE(structure, member) sizeof(((structure *)0)->member)
NUMBA_EXPORT_FUNC(void *)
numba_extract_unicode(PyObject *obj, Py_ssize_t *length, int *kind,
unsigned int *ascii, Py_ssize_t *hash) {
if (!PyUnicode_READY(obj)) {
*length = PyUnicode_GET_LENGTH(obj);
*kind = PyUnicode_KIND(obj);
/* could also use PyUnicode_IS_ASCII but it is not publicly advertised in https://docs.python.org/3/c-api/unicode.html */
*ascii = (unsigned int)(PyUnicode_MAX_CHAR_VALUE(obj) == (0x7f));
/* this is here as a crude check for safe casting of all unicode string
* structs to a PyASCIIObject */
if (MEMBER_SIZE(PyCompactUnicodeObject, _base) == sizeof(PyASCIIObject) &&
MEMBER_SIZE(PyUnicodeObject, _base) == sizeof(PyCompactUnicodeObject) &&
offsetof(PyCompactUnicodeObject, _base) == 0 &&
offsetof(PyUnicodeObject, _base) == 0 &&
offsetof(PyCompactUnicodeObject, _base.hash) == offsetof(PyASCIIObject, hash) &&
offsetof(PyUnicodeObject, _base._base.hash) == offsetof(PyASCIIObject, hash)
) {
/* Grab the hash from the type object cache, do not compute it. */
*hash = ((PyASCIIObject *)(obj))->hash;
}
else {
/* cast is not safe, fail */
return NULL;
}
return PyUnicode_DATA(obj);
} else {
return NULL;
}
}
/* this is late included as it #defines e.g. SHIFT that should not impact
* the above */
#include "_unicodetype_db.h"
/* This function is a modified copy of the private function gettyperecord from
* CPython's Objects/unicodectype.c
*
* See:https://github.com/python/cpython/blob/1d4b6ba19466aba0eb91c4ba01ba509acf18c723/Objects/unicodectype.c#L45-L59
*/
NUMBA_EXPORT_FUNC(void)
numba_gettyperecord(Py_UCS4 code, int *upper, int *lower, int *title,
unsigned char *decimal, unsigned char *digit,
unsigned short *flags)
{
int index;
const numba_PyUnicode_TypeRecord *rec;
if (code >= 0x110000)
index = 0;
else
{
index = index1[(code>>SHIFT)];
index = index2[(index<<SHIFT)+(code&((1<<SHIFT)-1))];
}
rec = &numba_PyUnicode_TypeRecords[index];
*upper = rec->upper;
*lower = rec->lower;
*title = rec->title;
*decimal = rec->decimal;
*digit = rec->digit;
*flags = rec->flags;
}
/* This function provides a consistent access point for the
* _PyUnicode_ExtendedCase array defined in CPython's Objects/unicodectype.c
* and now also as numba_PyUnicode_ExtendedCase in Numba's _unicodetype_db.h
*/
NUMBA_EXPORT_FUNC(Py_UCS4)
numba_get_PyUnicode_ExtendedCase(int code)
{
return numba_PyUnicode_ExtendedCase[code];
}
/* from _unicodetype_db.h */
#undef SHIFT
/*
* defined break point for gdb
*/
NUMBA_EXPORT_FUNC(void)
numba_gdb_breakpoint(void) {
/* does nothing */
}
/*
* Define bridge for all math functions
*/
#define MATH_UNARY(F, R, A) \
NUMBA_EXPORT_FUNC(R) numba_##F(A a) { return F(a); }
#define MATH_BINARY(F, R, A, B) \
NUMBA_EXPORT_FUNC(R) numba_##F(A a, B b) { return F(a, b); }
#include "mathnames.h"
#undef MATH_UNARY
#undef MATH_BINARY
/*
* BLAS and LAPACK wrappers
*/
#include "_lapack.c"
/*
* PRNG support
*/
#include "_random.c"
/*
Expose all functions as pointers in a dedicated C extension.
*/
#include "cext/cext.h"
/* Import _pymodule.h first, for a recent _POSIX_C_SOURCE */
#include "_pymodule.h"
#include <math.h>
#ifdef _MSC_VER
#define false 0
#define true 1
#define bool int
#else
#include <stdbool.h>
#endif
/*
Include C-extension here
*/
#include "cext/cext.h"
/* Numba C helpers */
#include "_helperlib.c"
static PyObject *
build_c_helpers_dict(void)
{
PyObject *dct = PyDict_New();
if (dct == NULL)
goto error;
#define _declpointer(name, value) do { \
PyObject *o = PyLong_FromVoidPtr(value); \
if (o == NULL) goto error; \
if (PyDict_SetItemString(dct, name, o)) { \
Py_DECREF(o); \
goto error; \
} \
Py_DECREF(o); \
} while (0)
#define declmethod(func) _declpointer(#func, &numba_##func)
#define declpointer(ptr) _declpointer(#ptr, &numba_##ptr)
declmethod(fixed_fmod);
declmethod(fixed_fmodf);
declmethod(set_fnclex);
declmethod(sdiv);
declmethod(srem);
declmethod(udiv);
declmethod(urem);
declmethod(frexp);
declmethod(frexpf);
declmethod(ldexp);
declmethod(ldexpf);
declmethod(exp2);
declmethod(exp2f);
declmethod(log2);
declmethod(log2f);
declmethod(cpow);
declmethod(cpowf);
declmethod(erf);
declmethod(erff);
declmethod(erfc);
declmethod(erfcf);
declmethod(gamma);
declmethod(gammaf);
declmethod(lgamma);
declmethod(lgammaf);
declmethod(nextafter);
declmethod(nextafterf);
declmethod(complex_adaptor);
declmethod(adapt_ndarray);
declmethod(ndarray_new);
declmethod(extract_record_data);
declmethod(get_buffer);
declmethod(adapt_buffer);
declmethod(release_buffer);
declmethod(extract_np_datetime);
declmethod(create_np_datetime);
declmethod(extract_np_timedelta);
declmethod(create_np_timedelta);
declmethod(recreate_record);
declmethod(fptoui);
declmethod(fptouif);
declmethod(gil_ensure);
declmethod(gil_release);
declmethod(fatal_error);
declmethod(py_type);
declmethod(unpack_slice);
declmethod(do_raise);
declmethod(unpickle);
declmethod(runtime_build_excinfo_struct);
declmethod(attempt_nocopy_reshape);
declmethod(get_pyobject_private_data);
declmethod(set_pyobject_private_data);
declmethod(reset_pyobject_private_data);
/* BLAS / LAPACK */
declmethod(xxgemm);
declmethod(xxgemv);
declmethod(xxdot);
declmethod(xxgetrf);
declmethod(ez_xxgetri);
declmethod(xxpotrf);
declmethod(ez_rgeev);
declmethod(ez_cgeev);
declmethod(ez_xxxevd);
declmethod(ez_gesdd);
declmethod(ez_geqrf);
declmethod(ez_xxgqr);
declmethod(ez_gelsd);
declmethod(xgesv);
declmethod(xxnrm2);
/* PRNG support */
declmethod(get_py_random_state);
declmethod(get_np_random_state);
declmethod(get_internal_random_state);
declmethod(rnd_shuffle);
declmethod(rnd_init);
declmethod(poisson_ptrs);
/* Unicode string support */
declmethod(extract_unicode);
declmethod(gettyperecord);
declmethod(get_PyUnicode_ExtendedCase);
/* for gdb breakpoint */
declmethod(gdb_breakpoint);
/* for dictionary support */
declmethod(test_dict);
declmethod(dict_new_sized);
declmethod(dict_set_method_table);
declmethod(dict_free);
declmethod(dict_length);
declmethod(dict_lookup);
declmethod(dict_insert);
declmethod(dict_insert_ez);
declmethod(dict_delitem);
declmethod(dict_popitem);
declmethod(dict_iter_sizeof);
declmethod(dict_iter);
declmethod(dict_iter_next);
declmethod(dict_dump);
/* for list support */
declmethod(test_list);
declmethod(list_new);
declmethod(list_set_method_table);
declmethod(list_free);
declmethod(list_base_ptr);
declmethod(list_size_address);
declmethod(list_length);
declmethod(list_allocated);
declmethod(list_is_mutable);
declmethod(list_set_is_mutable);
declmethod(list_setitem);
declmethod(list_getitem);
declmethod(list_append);
declmethod(list_delitem);
declmethod(list_delete_slice);
declmethod(list_iter_sizeof);
declmethod(list_iter);
declmethod(list_iter_next);
#define MATH_UNARY(F, R, A) declmethod(F);
#define MATH_BINARY(F, R, A, B) declmethod(F);
#include "mathnames.h"
#undef MATH_UNARY
#undef MATH_BINARY
#undef declmethod
return dct;
error:
Py_XDECREF(dct);
return NULL;
}
/*
* Helper to deal with flushing stdout
*/
PyAPI_FUNC(void) _numba_flush_stdout(void) ;
void
_numba_flush_stdout(void) {
fflush(stdout);
}
static PyMethodDef ext_methods[] = {
{ "rnd_get_state", (PyCFunction) _numba_rnd_get_state, METH_O, NULL },
{ "rnd_get_py_state_ptr", (PyCFunction) _numba_rnd_get_py_state_ptr, METH_NOARGS, NULL },
{ "rnd_get_np_state_ptr", (PyCFunction) _numba_rnd_get_np_state_ptr, METH_NOARGS, NULL },
{ "rnd_seed", (PyCFunction) _numba_rnd_seed, METH_VARARGS, NULL },
{ "rnd_set_state", (PyCFunction) _numba_rnd_set_state, METH_VARARGS, NULL },
{ "rnd_shuffle", (PyCFunction) _numba_rnd_shuffle, METH_O, NULL },
{ "_import_cython_function", (PyCFunction) _numba_import_cython_function, METH_VARARGS, NULL },
{ NULL },
};
/*
* These functions are exported by the module's DLL, to exercise ctypes / cffi
* without relying on libc availability (see https://bugs.python.org/issue23606)
*/
PyAPI_FUNC(double) _numba_test_sin(double x);
PyAPI_FUNC(double) _numba_test_cos(double x);
PyAPI_FUNC(double) _numba_test_exp(double x);
PyAPI_FUNC(void) _numba_test_vsquare(int n, double *x, double *out);
PyAPI_FUNC(double) _numba_test_funcptr(double (*func)(double));
PyAPI_FUNC(bool) _numba_test_boolean(void);
double _numba_test_sin(double x)
{
return sin(x);
}
double _numba_test_cos(double x)
{
return cos(x);
}
double _numba_test_exp(double x)
{
return exp(x);
}
void _numba_test_vsquare(int n, double *x, double *out)
{
int i;
for (i = 0; i < n; i++)
out[i] = pow(x[i], 2.0);
}
void _numba_test_vcube(int n, double *x, double *out)
{
int i;
for (i = 0; i < n; i++)
out[i] = pow(x[i], 3.0);
}
double _numba_test_funcptr(double (*func)(double))
{
return func(1.5);
}
bool _numba_test_boolean()
{
return true;
}
MOD_INIT(_helperlib) {
PyObject *m;
MOD_DEF(m, "_helperlib", "No docs", ext_methods)
if (m == NULL)
return MOD_ERROR_VAL;
import_array();
PyModule_AddObject(m, "c_helpers", build_c_helpers_dict());
PyModule_AddIntConstant(m, "long_min", LONG_MIN);
PyModule_AddIntConstant(m, "long_max", LONG_MAX);
PyModule_AddIntConstant(m, "py_buffer_size", sizeof(Py_buffer));
PyModule_AddIntConstant(m, "py_gil_state_size", sizeof(PyGILState_STATE));
PyModule_AddIntConstant(m, "py_unicode_1byte_kind", PyUnicode_1BYTE_KIND);
PyModule_AddIntConstant(m, "py_unicode_2byte_kind", PyUnicode_2BYTE_KIND);
PyModule_AddIntConstant(m, "py_unicode_4byte_kind", PyUnicode_4BYTE_KIND);
PyModule_AddIntConstant(m, "py_unicode_wchar_kind", PyUnicode_WCHAR_KIND);
numba_rnd_ensure_global_init();
return MOD_SUCCESS_VAL(m);
}
/*
* This file contains wrappers of BLAS and LAPACK functions
*/
/*
* BLAS calling helpers. The helpers can be called without the GIL held.
* The caller is responsible for checking arguments (especially dimensions).
*/
/* Fast getters caching the value of a function's address after
the first call to import_cblas_function(). */
#define EMIT_GET_CBLAS_FUNC(name) \
static void *cblas_ ## name = NULL; \
static void *get_cblas_ ## name(void) { \
if (cblas_ ## name == NULL) { \
PyGILState_STATE st = PyGILState_Ensure(); \
const char *mod = "scipy.linalg.cython_blas"; \
cblas_ ## name = import_cython_function(mod, # name); \
PyGILState_Release(st); \
} \
return cblas_ ## name; \
}
EMIT_GET_CBLAS_FUNC(dgemm)
EMIT_GET_CBLAS_FUNC(sgemm)
EMIT_GET_CBLAS_FUNC(cgemm)
EMIT_GET_CBLAS_FUNC(zgemm)
EMIT_GET_CBLAS_FUNC(dgemv)
EMIT_GET_CBLAS_FUNC(sgemv)
EMIT_GET_CBLAS_FUNC(cgemv)
EMIT_GET_CBLAS_FUNC(zgemv)
EMIT_GET_CBLAS_FUNC(ddot)
EMIT_GET_CBLAS_FUNC(sdot)
EMIT_GET_CBLAS_FUNC(cdotu)
EMIT_GET_CBLAS_FUNC(zdotu)
EMIT_GET_CBLAS_FUNC(cdotc)
EMIT_GET_CBLAS_FUNC(zdotc)
EMIT_GET_CBLAS_FUNC(snrm2)
EMIT_GET_CBLAS_FUNC(dnrm2)
EMIT_GET_CBLAS_FUNC(scnrm2)
EMIT_GET_CBLAS_FUNC(dznrm2)
#undef EMIT_GET_CBLAS_FUNC
/*
* NOTE: On return value convention.
* For LAPACK wrapper development the following conventions are followed:
* Publicly exposed wrapper functions must return:-
* STATUS_ERROR : For an unrecoverable error e.g. caught by xerbla, this is so
* a Py_FatalError can be raised.
* STATUS_SUCCESS: For successful execution
* +n : Where n is an integer for a routine specific error
* (typically derived from an `info` argument).
*
* The caller is responsible for checking and handling the error status.
*/
/* return STATUS_SUCCESS if everything went ok */
#define STATUS_SUCCESS (0)
/* return STATUS_ERROR if an unrecoverable error is encountered */
#define STATUS_ERROR (-1)
/*
* A union of all the types accepted by BLAS/LAPACK for use in cases where
* stack based allocation is needed (typically for work space query args length
* 1).
*/
typedef union all_dtypes_
{
float s;
double d;
npy_complex64 c;
npy_complex128 z;
} all_dtypes;
/*
* A checked PyMem_RawMalloc, ensures that the var is either NULL
* and an exception is raised, or that the allocation was successful.
* Returns zero on success for status checking.
*/
static int checked_PyMem_RawMalloc(void** var, size_t bytes)
{
*var = NULL;
*var = PyMem_RawMalloc(bytes);
if (!(*var))
{
{
PyGILState_STATE st = PyGILState_Ensure();
PyErr_SetString(PyExc_MemoryError,
"Insufficient memory for buffer allocation\
required by LAPACK.");
PyGILState_Release(st);
}
return 1;
}
return 0;
}
/*
* Checks that the char kind is valid (one of [s,d,c,z]) for use in blas/lapack.
* Returns zero on success for status checking.
*/
static int check_kind(char kind)
{
switch (kind)
{
case 's':
case 'd':
case 'c':
case 'z':
break;
default:
{
PyGILState_STATE st = PyGILState_Ensure();
PyErr_SetString(PyExc_ValueError,
"invalid data type (kind) found");
PyGILState_Release(st);
}
return 1;
}
return 0;
}
/*
* Guard macro for ensuring a valid data "kind" is being used.
* Place at the top of all routines with switches on "kind" that accept
* one of [s,d,c,z].
*/
#define ENSURE_VALID_KIND(__KIND) \
if (check_kind( __KIND )) \
{ \
return STATUS_ERROR; \
} \
/*
* Checks that the char kind is valid for the real domain (one of [s,d])
* for use in blas/lapack.
* Returns zero on success for status checking.
*/
static int check_real_kind(char kind)
{
switch (kind)
{
case 's':
case 'd':
break;
default:
{
PyGILState_STATE st = PyGILState_Ensure();
PyErr_SetString(PyExc_ValueError,
"invalid data type (kind) found");
PyGILState_Release(st);
}
return 1;
}
return 0;
}
/*
* Guard macro for ensuring a valid data "kind" is being used for the
* real domain routines.
* Place at the top of all routines with switches on "kind" that accept
* one of [s,d].
*/
#define ENSURE_VALID_REAL_KIND(__KIND) \
if (check_real_kind( __KIND )) \
{ \
return STATUS_ERROR; \
} \
/*
* Checks that the char kind is valid for the complex domain (one of [c,z])
* for use in blas/lapack.
* Returns zero on success for status checking.
*/
static int check_complex_kind(char kind)
{
switch (kind)
{
case 'c':
case 'z':
break;
default:
{
PyGILState_STATE st = PyGILState_Ensure();
PyErr_SetString(PyExc_ValueError,
"invalid data type (kind) found");
PyGILState_Release(st);
}
return 1;
}
return 0;
}
/*
* Guard macro for ensuring a valid data "kind" is being used for the
* real domain routines.
* Place at the top of all routines with switches on "kind" that accept
* one of [c,z].
*/
#define ENSURE_VALID_COMPLEX_KIND(__KIND) \
if (check_complex_kind( __KIND )) \
{ \
return STATUS_ERROR; \
} \
/*
* Checks that a function is found (i.e. not null)
* Returns zero on success for status checking.
*/
static int check_func(void *func)
{
if (func == NULL)
{
PyGILState_STATE st = PyGILState_Ensure();
PyErr_SetString(PyExc_RuntimeError,
"Specified LAPACK function could not be found.");
PyGILState_Release(st);
return STATUS_ERROR;
}
return STATUS_SUCCESS;
}
/*
* Guard macro for ensuring a valid function is found.
*/
#define ENSURE_VALID_FUNC(__FUNC) \
if (check_func(__FUNC)) \
{ \
return STATUS_ERROR; \
} \
/*
* Define what a Fortran "int" is, some LAPACKs have 64 bit integer support
* numba presently opts for a 32 bit C int.
* This definition allows scope for later configuration time magic to adjust
* the size of int at all the call sites.
*/
#define F_INT int
typedef float (*sdot_t)(F_INT *n, void *dx, F_INT *incx, void *dy, F_INT *incy);
typedef double (*ddot_t)(F_INT *n, void *dx, F_INT *incx, void *dy, F_INT
*incy);
typedef npy_complex64 (*cdot_t)(F_INT *n, void *dx, F_INT *incx, void *dy,
F_INT *incy);
typedef npy_complex128 (*zdot_t)(F_INT *n, void *dx, F_INT *incx, void *dy,
F_INT *incy);
typedef void (*xxgemv_t)(char *trans, F_INT *m, F_INT *n,
void *alpha, void *a, F_INT *lda,
void *x, F_INT *incx, void *beta,
void *y, F_INT *incy);
typedef void (*xxgemm_t)(char *transa, char *transb,
F_INT *m, F_INT *n, F_INT *k,
void *alpha, void *a, F_INT *lda,
void *b, F_INT *ldb, void *beta,
void *c, F_INT *ldc);
typedef float (*sxnrm2_t) (F_INT *n, void *x, F_INT *incx);
typedef double (*dxnrm2_t) (F_INT *n, void *x, F_INT *incx);
/* Vector * vector: result = dx * dy */
NUMBA_EXPORT_FUNC(int)
numba_xxdot(char kind, char conjugate, Py_ssize_t n, void *dx, void *dy,
void *result)
{
void *raw_func = NULL;
F_INT _n;
F_INT inc = 1;
ENSURE_VALID_KIND(kind)
switch (kind)
{
case 's':
raw_func = get_cblas_sdot();
break;
case 'd':
raw_func = get_cblas_ddot();
break;
case 'c':
raw_func = conjugate ? get_cblas_cdotc() : get_cblas_cdotu();
break;
case 'z':
raw_func = conjugate ? get_cblas_zdotc() : get_cblas_zdotu();
break;
}
ENSURE_VALID_FUNC(raw_func)
_n = (F_INT) n;
switch (kind)
{
case 's':
*(float *) result = (*(sdot_t) raw_func)(&_n, dx, &inc, dy, &inc);;
break;
case 'd':
*(double *) result = (*(ddot_t) raw_func)(&_n, dx, &inc, dy, &inc);;
break;
case 'c':
*(npy_complex64 *) result = (*(cdot_t) raw_func)(&_n, dx, &inc, dy,\
&inc);;
break;
case 'z':
*(npy_complex128 *) result = (*(zdot_t) raw_func)(&_n, dx, &inc,\
dy, &inc);;
break;
}
return 0;
}
/* Matrix * vector: y = alpha * a * x + beta * y */
NUMBA_EXPORT_FUNC(int)
numba_xxgemv(char kind, char trans, Py_ssize_t m, Py_ssize_t n,
void *alpha, void *a, Py_ssize_t lda,
void *x, void *beta, void *y)
{
void *raw_func = NULL;
F_INT _m, _n;
F_INT _lda;
F_INT inc = 1;
ENSURE_VALID_KIND(kind)
switch (kind)
{
case 's':
raw_func = get_cblas_sgemv();
break;
case 'd':
raw_func = get_cblas_dgemv();
break;
case 'c':
raw_func = get_cblas_cgemv();
break;
case 'z':
raw_func = get_cblas_zgemv();
break;
}
ENSURE_VALID_FUNC(raw_func)
_m = (F_INT) m;
_n = (F_INT) n;
_lda = (F_INT) lda;
(*(xxgemv_t) raw_func)(&trans, &_m, &_n, alpha, a, &_lda,
x, &inc, beta, y, &inc);
return 0;
}
/* Matrix * matrix: c = alpha * a * b + beta * c */
NUMBA_EXPORT_FUNC(int)
numba_xxgemm(char kind, char transa, char transb,
Py_ssize_t m, Py_ssize_t n, Py_ssize_t k,
void *alpha, void *a, Py_ssize_t lda,
void *b, Py_ssize_t ldb, void *beta,
void *c, Py_ssize_t ldc)
{
void *raw_func = NULL;
F_INT _m, _n, _k;
F_INT _lda, _ldb, _ldc;
ENSURE_VALID_KIND(kind)
switch (kind)
{
case 's':
raw_func = get_cblas_sgemm();
break;
case 'd':
raw_func = get_cblas_dgemm();
break;
case 'c':
raw_func = get_cblas_cgemm();
break;
case 'z':
raw_func = get_cblas_zgemm();
break;
}
ENSURE_VALID_FUNC(raw_func)
_m = (F_INT) m;
_n = (F_INT) n;
_k = (F_INT) k;
_lda = (F_INT) lda;
_ldb = (F_INT) ldb;
_ldc = (F_INT) ldc;
(*(xxgemm_t) raw_func)(&transa, &transb, &_m, &_n, &_k, alpha, a, &_lda,
b, &_ldb, beta, c, &_ldc);
return 0;
}
/* L2-norms */
NUMBA_EXPORT_FUNC(F_INT)
numba_xxnrm2(char kind, Py_ssize_t n, void * x, Py_ssize_t incx, void * result)
{
void *raw_func = NULL;
F_INT _incx;
F_INT _n;
ENSURE_VALID_KIND(kind)
switch (kind)
{
case 's':
raw_func = get_cblas_snrm2();
break;
case 'd':
raw_func = get_cblas_dnrm2();
break;
case 'c':
raw_func = get_cblas_scnrm2();
break;
case 'z':
raw_func = get_cblas_dznrm2();
break;
}
ENSURE_VALID_FUNC(raw_func)
_n = (F_INT) n;
_incx = (F_INT) incx;
switch (kind)
{
case 's':
*(float *) result = (*(sxnrm2_t) raw_func)(&_n, x, &_incx);;
break;
case 'd':
*(double *) result = (*(dxnrm2_t) raw_func)(&_n, x, &_incx);;
break;
case 'c':
*(float *) result = (*(sxnrm2_t) raw_func)(&_n, x, &_incx);;
break;
case 'z':
*(double *) result = (*(dxnrm2_t) raw_func)(&_n, x, &_incx);;
break;
}
return 0;
}
/*
* LAPACK calling helpers. The helpers can be called without the GIL held.
* The caller is responsible for checking arguments (especially dimensions).
*/
/* Fast getters caching the value of a function's address after
the first call to import_clapack_function(). */
#define EMIT_GET_CLAPACK_FUNC(name) \
static void *clapack_ ## name = NULL; \
static void *get_clapack_ ## name(void) { \
if (clapack_ ## name == NULL) { \
PyGILState_STATE st = PyGILState_Ensure(); \
const char *mod = "scipy.linalg.cython_lapack"; \
clapack_ ## name = import_cython_function(mod, # name); \
PyGILState_Release(st); \
} \
return clapack_ ## name; \
}
/* Computes an LU factorization of a general M-by-N matrix A
* using partial pivoting with row interchanges.
*/
EMIT_GET_CLAPACK_FUNC(sgetrf)
EMIT_GET_CLAPACK_FUNC(dgetrf)
EMIT_GET_CLAPACK_FUNC(cgetrf)
EMIT_GET_CLAPACK_FUNC(zgetrf)
/* Computes the inverse of a matrix using the LU factorization
* computed by xGETRF.
*/
EMIT_GET_CLAPACK_FUNC(sgetri)
EMIT_GET_CLAPACK_FUNC(dgetri)
EMIT_GET_CLAPACK_FUNC(cgetri)
EMIT_GET_CLAPACK_FUNC(zgetri)
/* Compute Cholesky factorizations */
EMIT_GET_CLAPACK_FUNC(spotrf)
EMIT_GET_CLAPACK_FUNC(dpotrf)
EMIT_GET_CLAPACK_FUNC(cpotrf)
EMIT_GET_CLAPACK_FUNC(zpotrf)
/* Computes for an N-by-N real nonsymmetric matrix A, the
* eigenvalues and, optionally, the left and/or right eigenvectors.
*/
EMIT_GET_CLAPACK_FUNC(sgeev)
EMIT_GET_CLAPACK_FUNC(dgeev)
EMIT_GET_CLAPACK_FUNC(cgeev)
EMIT_GET_CLAPACK_FUNC(zgeev)
/* Computes for an N-by-N Hermitian matrix A, the
* eigenvalues and, optionally, the left and/or right eigenvectors.
*/
EMIT_GET_CLAPACK_FUNC(ssyevd)
EMIT_GET_CLAPACK_FUNC(dsyevd)
EMIT_GET_CLAPACK_FUNC(cheevd)
EMIT_GET_CLAPACK_FUNC(zheevd)
/* Computes generalised SVD */
EMIT_GET_CLAPACK_FUNC(sgesdd)
EMIT_GET_CLAPACK_FUNC(dgesdd)
EMIT_GET_CLAPACK_FUNC(cgesdd)
EMIT_GET_CLAPACK_FUNC(zgesdd)
/* Computes QR decompositions */
EMIT_GET_CLAPACK_FUNC(sgeqrf)
EMIT_GET_CLAPACK_FUNC(dgeqrf)
EMIT_GET_CLAPACK_FUNC(cgeqrf)
EMIT_GET_CLAPACK_FUNC(zgeqrf)
/* Computes columns of Q from elementary reflectors produced by xgeqrf() (QR).
*/
EMIT_GET_CLAPACK_FUNC(sorgqr)
EMIT_GET_CLAPACK_FUNC(dorgqr)
EMIT_GET_CLAPACK_FUNC(cungqr)
EMIT_GET_CLAPACK_FUNC(zungqr)
/* Computes the minimum norm solution to linear least squares problems */
EMIT_GET_CLAPACK_FUNC(sgelsd)
EMIT_GET_CLAPACK_FUNC(dgelsd)
EMIT_GET_CLAPACK_FUNC(cgelsd)
EMIT_GET_CLAPACK_FUNC(zgelsd)
// Computes the solution to a system of linear equations
EMIT_GET_CLAPACK_FUNC(sgesv)
EMIT_GET_CLAPACK_FUNC(dgesv)
EMIT_GET_CLAPACK_FUNC(cgesv)
EMIT_GET_CLAPACK_FUNC(zgesv)
#undef EMIT_GET_CLAPACK_FUNC
typedef void (*xxgetrf_t)(F_INT *m, F_INT *n, void *a, F_INT *lda, F_INT *ipiv,
F_INT *info);
typedef void (*xxgetri_t)(F_INT *n, void *a, F_INT *lda, F_INT *ipiv, void
*work, F_INT *lwork, F_INT *info);
typedef void (*xxpotrf_t)(char *uplo, F_INT *n, void *a, F_INT *lda, F_INT
*info);
typedef void (*rgeev_t)(char *jobvl, char *jobvr, F_INT *n, void *a, F_INT *lda,
void *wr, void *wi, void *vl, F_INT *ldvl, void *vr,
F_INT *ldvr, void *work, F_INT *lwork, F_INT *info);
typedef void (*cgeev_t)(char *jobvl, char *jobvr, F_INT *n, void *a, F_INT
*lda, void *w, void *vl, F_INT *ldvl, void *vr,
F_INT *ldvr, void *work, F_INT *lwork, void *rwork,
F_INT *info);
typedef void (*rgesdd_t)(char *jobz, F_INT *m, F_INT *n, void *a, F_INT *lda,
void *s, void *u, F_INT *ldu, void *vt, F_INT *ldvt,
void *work, F_INT *lwork, F_INT *iwork, F_INT *info);
typedef void (*cgesdd_t)(char *jobz, F_INT *m, F_INT *n, void *a, F_INT *lda,
void *s, void * u, F_INT *ldu, void * vt, F_INT *ldvt,
void *work, F_INT *lwork, void *rwork, F_INT *iwork,
F_INT *info);
typedef void (*xsyevd_t)(char *jobz, char *uplo, F_INT *n, void *a, F_INT *lda,
void *w, void *work, F_INT *lwork, F_INT *iwork,
F_INT *liwork, F_INT *info);
typedef void (*xheevd_t)(char *jobz, char *uplo, F_INT *n, void *a, F_INT *lda,
void *w, void *work, F_INT *lwork, void *rwork,
F_INT *lrwork, F_INT *iwork, F_INT *liwork,
F_INT *info);
typedef void (*xgeqrf_t)(F_INT *m, F_INT *n, void *a, F_INT *lda, void *tau,
void *work, F_INT *lwork, F_INT *info);
typedef void (*xxxgqr_t)(F_INT *m, F_INT *n, F_INT *k, void *a, F_INT *lda,
void *tau, void *work, F_INT *lwork, F_INT *info);
typedef void (*rgelsd_t)(F_INT *m, F_INT *n, F_INT *nrhs, void *a, F_INT *lda,
void *b, F_INT *ldb, void *s, void *rcond, F_INT *rank,
void *work, F_INT *lwork, F_INT *iwork, F_INT *info);
typedef void (*cgelsd_t)(F_INT *m, F_INT *n, F_INT *nrhs, void *a, F_INT *lda,
void *b, F_INT *ldb, void *s, void *rcond, F_INT *rank,
void *work, F_INT *lwork, void *rwork, F_INT *iwork,
F_INT *info);
typedef void (*xgesv_t)(F_INT *n, F_INT *nrhs, void *a, F_INT *lda, F_INT *ipiv,
void *b, F_INT *ldb, F_INT *info);
/*
* kind_size()
* gets the data size appropriate for a specified kind.
*
* Input:
* kind - the kind, one of:
* (s, d, c, z) = (float, double, complex, double complex).
*
* Returns:
* data_size - the appropriate data size.
*
*/
static size_t kind_size(char kind)
{
size_t data_size = 0;
switch (kind)
{
case 's':
data_size = sizeof(float);
break;
case 'd':
data_size = sizeof(double);
break;
case 'c':
data_size = sizeof(npy_complex64);
break;
case 'z':
data_size = sizeof(npy_complex128);
break;
}
return data_size;
}
/*
* underlying_float_kind()
* gets the underlying float kind for a given kind.
*
* Input:
* kind - the kind, one of:
* (s, d, c, z) = (float, double, complex, double complex).
*
* Returns:
* underlying_float_kind - the underlying float kind, one of:
* (s, d) = (float, double).
*
* This function essentially provides a map between the char kind
* of a type and the char kind of the underlying float used in the
* type. Essentially:
* ---------------
* Input -> Output
* ---------------
* s -> s
* d -> d
* c -> s
* z -> d
* ---------------
*
*/
static char underlying_float_kind(char kind)
{
switch(kind)
{
case 's':
case 'c':
return 's';
case 'd':
case 'z':
return 'd';
default:
{
PyGILState_STATE st = PyGILState_Ensure();
PyErr_SetString(PyExc_ValueError,
"invalid kind in underlying_float_kind()");
PyGILState_Release(st);
}
}
return -1;
}
/*
* cast_from_X()
* cast from a kind (s, d, c, z) = (float, double, complex, double complex)
* to a Fortran integer.
*
* Parameters:
* kind the kind of val
* val a pointer to the value to cast
*
* Returns:
* A Fortran int from a cast of val (in complex case, takes the real part).
*
* Struct access via non c99 (python only) cmplx types, used for compatibility.
*/
static F_INT
cast_from_X(char kind, void *val)
{
switch(kind)
{
case 's':
return (F_INT)(*((float *) val));
case 'd':
return (F_INT)(*((double *) val));
case 'c':
return (F_INT)(*((npy_complex64 *)val)).real;
case 'z':
return (F_INT)(*((npy_complex128 *)val)).real;
default:
{
PyGILState_STATE st = PyGILState_Ensure();
PyErr_SetString(PyExc_ValueError,
"invalid kind in cast");
PyGILState_Release(st);
}
}
return -1;
}
#define CATCH_LAPACK_INVALID_ARG(__routine, info) \
do { \
if (info < 0) { \
PyGILState_STATE st = PyGILState_Ensure(); \
PyErr_Format(PyExc_RuntimeError, \
"LAPACK Error: Routine " #__routine ". On input %d\n",\
-(int) info); \
PyGILState_Release(st); \
return STATUS_ERROR; \
} \
} while(0)
/* Compute LU decomposition of A
* NOTE: ipiv is an array of Fortran integers allocated by the caller,
* which is therefore expected to use the right dtype.
*/
NUMBA_EXPORT_FUNC(int)
numba_xxgetrf(char kind, Py_ssize_t m, Py_ssize_t n, void *a, Py_ssize_t lda,
F_INT *ipiv)
{
void *raw_func = NULL;
F_INT _m, _n, _lda, info;
ENSURE_VALID_KIND(kind)
switch (kind)
{
case 's':
raw_func = get_clapack_sgetrf();
break;
case 'd':
raw_func = get_clapack_dgetrf();
break;
case 'c':
raw_func = get_clapack_cgetrf();
break;
case 'z':
raw_func = get_clapack_zgetrf();
break;
}
ENSURE_VALID_FUNC(raw_func)
_m = (F_INT) m;
_n = (F_INT) n;
_lda = (F_INT) lda;
(*(xxgetrf_t) raw_func)(&_m, &_n, a, &_lda, ipiv, &info);
CATCH_LAPACK_INVALID_ARG("xxgetrf", info);
return (int)info;
}
/* Compute the inverse of a matrix given its LU decomposition
* Args are as per LAPACK.
*/
static int
numba_raw_xxgetri(char kind, F_INT n, void *a, F_INT lda,
F_INT *ipiv, void *work, F_INT *lwork, F_INT *info)
{
void *raw_func = NULL;
ENSURE_VALID_KIND(kind)
switch (kind)
{
case 's':
raw_func = get_clapack_sgetri();
break;
case 'd':
raw_func = get_clapack_dgetri();
break;
case 'c':
raw_func = get_clapack_cgetri();
break;
case 'z':
raw_func = get_clapack_zgetri();
break;
}
ENSURE_VALID_FUNC(raw_func)
(*(xxgetri_t) raw_func)(&n, a, &lda, ipiv, work, lwork, info);
return 0;
}
/* Compute the inverse of a matrix from the factorization provided by
* xxgetrf. (see numba_xxgetrf() about ipiv)
* Args are as per LAPACK.
*/
NUMBA_EXPORT_FUNC(int)
numba_ez_xxgetri(char kind, Py_ssize_t n, void *a, Py_ssize_t lda,
F_INT *ipiv)
{
F_INT _n, _lda;
F_INT lwork = -1;
F_INT info = 0;
size_t base_size = -1;
void * work = NULL;
all_dtypes stack_slot;
ENSURE_VALID_KIND(kind)
_n = (F_INT)n;
_lda = (F_INT)lda;
base_size = kind_size(kind);
work = &stack_slot;
numba_raw_xxgetri(kind, _n, a, _lda, ipiv, work, &lwork, &info);
CATCH_LAPACK_INVALID_ARG("xxgetri", info);
lwork = cast_from_X(kind, work);
if (checked_PyMem_RawMalloc(&work, base_size * lwork))
{
return STATUS_ERROR;
}
numba_raw_xxgetri(kind, _n, a, _lda, ipiv, work, &lwork, &info);
PyMem_RawFree(work);
CATCH_LAPACK_INVALID_ARG("xxgetri", info);
return (int)info;
}
/* Compute the Cholesky factorization of a matrix. */
NUMBA_EXPORT_FUNC(int)
numba_xxpotrf(char kind, char uplo, Py_ssize_t n, void *a, Py_ssize_t lda)
{
void *raw_func = NULL;
F_INT _n, _lda, info;
ENSURE_VALID_KIND(kind)
switch (kind)
{
case 's':
raw_func = get_clapack_spotrf();
break;
case 'd':
raw_func = get_clapack_dpotrf();
break;
case 'c':
raw_func = get_clapack_cpotrf();
break;
case 'z':
raw_func = get_clapack_zpotrf();
break;
}
ENSURE_VALID_FUNC(raw_func)
_n = (F_INT) n;
_lda = (F_INT) lda;
(*(xxpotrf_t) raw_func)(&uplo, &_n, a, &_lda, &info);
CATCH_LAPACK_INVALID_ARG("xxpotrf", info);
return (int)info;
}
/* real space eigen systems info from dgeev/sgeev */
static int
numba_raw_rgeev(char kind, char jobvl, char jobvr,
Py_ssize_t n, void *a, Py_ssize_t lda, void *wr, void *wi,
void *vl, Py_ssize_t ldvl, void *vr, Py_ssize_t ldvr,
void *work, Py_ssize_t lwork, F_INT *info)
{
void *raw_func = NULL;
F_INT _n, _lda, _ldvl, _ldvr, _lwork;
ENSURE_VALID_REAL_KIND(kind)
switch (kind)
{
case 's':
raw_func = get_clapack_sgeev();
break;
case 'd':
raw_func = get_clapack_dgeev();
break;
}
ENSURE_VALID_FUNC(raw_func)
_n = (F_INT) n;
_lda = (F_INT) lda;
_ldvl = (F_INT) ldvl;
_ldvr = (F_INT) ldvr;
_lwork = (F_INT) lwork;
(*(rgeev_t) raw_func)(&jobvl, &jobvr, &_n, a, &_lda, wr, wi, vl, &_ldvl, vr,
&_ldvr, work, &_lwork, info);
return 0;
}
/* Real space eigen systems info from dgeev/sgeev
* as numba_raw_rgeev but the allocation and error handling is done for the user.
* Args are as per LAPACK.
*/
NUMBA_EXPORT_FUNC(int)
numba_ez_rgeev(char kind, char jobvl, char jobvr, Py_ssize_t n, void *a,
Py_ssize_t lda, void *wr, void *wi, void *vl, Py_ssize_t ldvl,
void *vr, Py_ssize_t ldvr)
{
F_INT info = 0;
F_INT lwork = -1;
F_INT _n, _lda, _ldvl, _ldvr;
size_t base_size = -1;
void * work = NULL;
all_dtypes stack_slot;
ENSURE_VALID_REAL_KIND(kind)
_n = (F_INT) n;
_lda = (F_INT) lda;
_ldvl = (F_INT) ldvl;
_ldvr = (F_INT) ldvr;
base_size = kind_size(kind);
work = &stack_slot;
numba_raw_rgeev(kind, jobvl, jobvr, _n, a, _lda, wr, wi, vl, _ldvl,
vr, _ldvr, work, lwork, &info);
CATCH_LAPACK_INVALID_ARG("numba_raw_rgeev", info);
lwork = cast_from_X(kind, work);
if (checked_PyMem_RawMalloc(&work, base_size * lwork))
{
return STATUS_ERROR;
}
numba_raw_rgeev(kind, jobvl, jobvr, _n, a, _lda, wr, wi, vl, _ldvl,
vr, _ldvr, work, lwork, &info);
PyMem_RawFree(work);
CATCH_LAPACK_INVALID_ARG("numba_raw_rgeev", info);
return (int)info;
}
/* Complex space eigen systems info from cgeev/zgeev
* Args are as per LAPACK.
*/
static int
numba_raw_cgeev(char kind, char jobvl, char jobvr,
Py_ssize_t n, void *a, Py_ssize_t lda, void *w, void *vl,
Py_ssize_t ldvl, void *vr, Py_ssize_t ldvr, void *work,
Py_ssize_t lwork, void *rwork, F_INT *info)
{
void *raw_func = NULL;
F_INT _n, _lda, _ldvl, _ldvr, _lwork;
ENSURE_VALID_COMPLEX_KIND(kind)
_n = (F_INT) n;
_lda = (F_INT) lda;
_ldvl = (F_INT) ldvl;
_ldvr = (F_INT) ldvr;
_lwork = (F_INT) lwork;
switch (kind)
{
case 'c':
raw_func = get_clapack_cgeev();
break;
case 'z':
raw_func = get_clapack_zgeev();
break;
}
ENSURE_VALID_FUNC(raw_func)
(*(cgeev_t) raw_func)(&jobvl, &jobvr, &_n, a, &_lda, w, vl, &_ldvl, vr,
&_ldvr, work, &_lwork, rwork, info);
return 0;
}
/* Complex space eigen systems info from cgeev/zgeev
* as numba_raw_cgeev but the allocation and error handling is done for the user.
* Args are as per LAPACK.
*/
NUMBA_EXPORT_FUNC(int)
numba_ez_cgeev(char kind, char jobvl, char jobvr, Py_ssize_t n, void *a,
Py_ssize_t lda, void *w, void *vl, Py_ssize_t ldvl, void *vr,
Py_ssize_t ldvr)
{
F_INT info = 0;
F_INT lwork = -1;
F_INT _n, _lda, _ldvl, _ldvr;
size_t base_size = -1;
all_dtypes stack_slot, wk;
void * work = NULL;
void * rwork = (void *)&wk;
ENSURE_VALID_COMPLEX_KIND(kind)
_n = (F_INT) n;
_lda = (F_INT) lda;
_ldvl = (F_INT) ldvl;
_ldvr = (F_INT) ldvr;
base_size = kind_size(kind);
work = &stack_slot;
numba_raw_cgeev(kind, jobvl, jobvr, n, a, lda, w, vl, ldvl,
vr, ldvr, work, lwork, rwork, &info);
CATCH_LAPACK_INVALID_ARG("numba_raw_cgeev", info);
lwork = cast_from_X(kind, work);
if (checked_PyMem_RawMalloc((void**)&rwork, 2*n*base_size))
{
return STATUS_ERROR;
}
if (checked_PyMem_RawMalloc(&work, base_size * lwork))
{
PyMem_RawFree(rwork);
return STATUS_ERROR;
}
numba_raw_cgeev(kind, jobvl, jobvr, _n, a, _lda, w, vl, _ldvl,
vr, _ldvr, work, lwork, rwork, &info);
PyMem_RawFree(work);
PyMem_RawFree(rwork);
CATCH_LAPACK_INVALID_ARG("numba_raw_cgeev", info);
return (int)info;
}
/* real space symmetric eigen systems info from ssyevd/dsyevd */
static int
numba_raw_rsyevd(char kind, char jobz, char uplo, Py_ssize_t n, void *a,
Py_ssize_t lda, void *w, void *work, Py_ssize_t lwork,
F_INT *iwork, Py_ssize_t liwork, F_INT *info)
{
void *raw_func = NULL;
F_INT _n, _lda, _lwork, _liwork;
ENSURE_VALID_REAL_KIND(kind)
switch (kind)
{
case 's':
raw_func = get_clapack_ssyevd();
break;
case 'd':
raw_func = get_clapack_dsyevd();
break;
}
ENSURE_VALID_FUNC(raw_func)
_n = (F_INT) n;
_lda = (F_INT) lda;
_lwork = (F_INT) lwork;
_liwork = (F_INT) liwork;
(*(xsyevd_t) raw_func)(&jobz, &uplo, &_n, a, &_lda, w, work, &_lwork, iwork, &_liwork, info);
return 0;
}
/* Real space eigen systems info from dsyevd/ssyevd
* as numba_raw_rsyevd but the allocation and error handling is done for the user.
* Args are as per LAPACK.
*/
static int
numba_ez_rsyevd(char kind, char jobz, char uplo, Py_ssize_t n, void *a, Py_ssize_t lda, void *w)
{
F_INT info = 0;
F_INT lwork = -1, liwork=-1;
F_INT _n, _lda;
size_t base_size = -1;
void *work = NULL;
F_INT *iwork = NULL;
all_dtypes stack_slot;
int stack_int = -1;
ENSURE_VALID_REAL_KIND(kind)
_n = (F_INT) n;
_lda = (F_INT) lda;
base_size = kind_size(kind);
work = &stack_slot;
iwork = &stack_int;
numba_raw_rsyevd(kind, jobz, uplo, _n, a, _lda, w, work, lwork, iwork, liwork, &info);
CATCH_LAPACK_INVALID_ARG("numba_raw_rsyevd", info);
lwork = cast_from_X(kind, work);
if (checked_PyMem_RawMalloc(&work, base_size * lwork))
{
return STATUS_ERROR;
}
liwork = *iwork;
if (checked_PyMem_RawMalloc((void**)&iwork, base_size * liwork))
{
PyMem_RawFree(work);
return STATUS_ERROR;
}
numba_raw_rsyevd(kind, jobz, uplo, _n, a, _lda, w, work, lwork, iwork, liwork, &info);
PyMem_RawFree(work);
PyMem_RawFree(iwork);
CATCH_LAPACK_INVALID_ARG("numba_raw_rsyevd", info);
return (int)info;
}
/* complex space symmetric eigen systems info from cheevd/zheevd*/
static int
numba_raw_cheevd(char kind, char jobz, char uplo, Py_ssize_t n, void *a,
Py_ssize_t lda, void *w, void *work, Py_ssize_t lwork,
void *rwork, Py_ssize_t lrwork, F_INT *iwork,
Py_ssize_t liwork, F_INT *info)
{
void *raw_func = NULL;
F_INT _n, _lda, _lwork, _lrwork, _liwork;
ENSURE_VALID_COMPLEX_KIND(kind)
switch (kind)
{
case 'c':
raw_func = get_clapack_cheevd();
break;
case 'z':
raw_func = get_clapack_zheevd();
break;
}
ENSURE_VALID_FUNC(raw_func)
_n = (F_INT) n;
_lda = (F_INT) lda;
_lwork = (F_INT) lwork;
_lrwork = (F_INT) lrwork;
_liwork = (F_INT) liwork;
(*(xheevd_t) raw_func)(&jobz, &uplo, &_n, a, &_lda, w, work, &_lwork, rwork, &_lrwork, iwork, &_liwork, info);
return 0;
}
/* complex space eigen systems info from cheevd/zheevd
* as numba_raw_cheevd but the allocation and error handling is done for the user.
* Args are as per LAPACK.
*/
static int
numba_ez_cheevd(char kind, char jobz, char uplo, Py_ssize_t n, void *a, Py_ssize_t lda, void *w)
{
F_INT info = 0;
F_INT lwork = -1, lrwork = -1, liwork=-1;
F_INT _n, _lda;
size_t base_size = -1, underlying_float_size = -1;
void *work = NULL, *rwork = NULL;
F_INT *iwork = NULL;
all_dtypes stack_slot1, stack_slot2;
char uf_kind;
int stack_int = -1;
ENSURE_VALID_COMPLEX_KIND(kind)
_n = (F_INT) n;
_lda = (F_INT) lda;
base_size = kind_size(kind);
uf_kind = underlying_float_kind(kind);
underlying_float_size = kind_size(uf_kind);
work = &stack_slot1;
rwork = &stack_slot2;
iwork = &stack_int;
numba_raw_cheevd(kind, jobz, uplo, _n, a, _lda, w, work, lwork, rwork, lrwork, iwork, liwork, &info);
CATCH_LAPACK_INVALID_ARG("numba_raw_cheevd", info);
lwork = cast_from_X(uf_kind, work);
if (checked_PyMem_RawMalloc(&work, base_size * lwork))
{
return STATUS_ERROR;
}
lrwork = cast_from_X(uf_kind, rwork);
if (checked_PyMem_RawMalloc(&rwork, underlying_float_size * lrwork))
{
PyMem_RawFree(work);
return STATUS_ERROR;
}
liwork = *iwork;
if (checked_PyMem_RawMalloc((void**)&iwork, base_size * liwork))
{
PyMem_RawFree(work);
PyMem_RawFree(rwork);
return STATUS_ERROR;
}
numba_raw_cheevd(kind, jobz, uplo, _n, a, _lda, w, work, lwork, rwork, lrwork, iwork, liwork, &info);
PyMem_RawFree(work);
PyMem_RawFree(rwork);
PyMem_RawFree(iwork);
CATCH_LAPACK_INVALID_ARG("numba_raw_cheevd", info);
return (int)info;
}
/* Hermitian eigenvalue systems info from *syevd and *heevd.
* This routine hides the type and general complexity involved with making the
* calls. The work space computation and error handling etc is hidden.
* Args are as per LAPACK.
*/
NUMBA_EXPORT_FUNC(int)
numba_ez_xxxevd(char kind, char jobz, char uplo, Py_ssize_t n, void *a, Py_ssize_t lda, void *w)
{
ENSURE_VALID_KIND(kind)
switch (kind)
{
case 's':
case 'd':
return numba_ez_rsyevd(kind, jobz, uplo, n, a, lda, w);
case 'c':
case 'z':
return numba_ez_cheevd(kind, jobz, uplo, n, a, lda, w);
}
return STATUS_ERROR; /* unreachable */
}
/* Real space svd systems info from dgesdd/sgesdd
* Args are as per LAPACK.
*/
static int
numba_raw_rgesdd(char kind, char jobz, Py_ssize_t m, Py_ssize_t n, void *a,
Py_ssize_t lda, void *s, void *u, Py_ssize_t ldu, void *vt,
Py_ssize_t ldvt, void *work, Py_ssize_t lwork,
F_INT *iwork, F_INT *info)
{
void *raw_func = NULL;
F_INT _m, _n, _lda, _ldu, _ldvt, _lwork;
ENSURE_VALID_REAL_KIND(kind)
_m = (F_INT) m;
_n = (F_INT) n;
_lda = (F_INT) lda;
_ldu = (F_INT) ldu;
_ldvt = (F_INT) ldvt;
_lwork = (F_INT) lwork;
switch (kind)
{
case 's':
raw_func = get_clapack_sgesdd();
break;
case 'd':
raw_func = get_clapack_dgesdd();
break;
}
ENSURE_VALID_FUNC(raw_func)
(*(rgesdd_t) raw_func)(&jobz, &_m, &_n, a, &_lda, s, u, &_ldu, vt, &_ldvt,
work, &_lwork, iwork, info);
return 0;
}
/* Real space svd info from dgesdd/sgesdd.
* As numba_raw_rgesdd but the allocation and error handling is done for the
* user.
* Args are as per LAPACK.
*/
static int
numba_ez_rgesdd(char kind, char jobz, Py_ssize_t m, Py_ssize_t n, void *a,
Py_ssize_t lda, void *s, void *u, Py_ssize_t ldu, void *vt,
Py_ssize_t ldvt)
{
F_INT info = 0;
Py_ssize_t minmn = -1;
Py_ssize_t lwork = -1;
all_dtypes stack_slot, wk;
size_t base_size = -1;
F_INT *iwork = (F_INT *)&wk;
void *work = NULL;
ENSURE_VALID_REAL_KIND(kind)
base_size = kind_size(kind);
work = &stack_slot;
/* Compute optimal work size (lwork) */
numba_raw_rgesdd(kind, jobz, m, n, a, lda, s, u, ldu, vt, ldvt, work,
lwork, iwork, &info);
CATCH_LAPACK_INVALID_ARG("numba_raw_rgesdd", info);
/* Allocate work array */
lwork = cast_from_X(kind, work);
if (checked_PyMem_RawMalloc(&work, base_size * lwork))
return -1;
minmn = m > n ? n : m;
if (checked_PyMem_RawMalloc((void**) &iwork, 8 * minmn * sizeof(F_INT)))
{
PyMem_RawFree(work);
return STATUS_ERROR;
}
numba_raw_rgesdd(kind, jobz, m, n, a, lda, s, u ,ldu, vt, ldvt, work, lwork,
iwork, &info);
PyMem_RawFree(work);
PyMem_RawFree(iwork);
CATCH_LAPACK_INVALID_ARG("numba_raw_rgesdd", info);
return (int)info;
}
/* Complex space svd systems info from cgesdd/zgesdd
* Args are as per LAPACK.
*/
static int
numba_raw_cgesdd(char kind, char jobz, Py_ssize_t m, Py_ssize_t n, void *a,
Py_ssize_t lda, void *s, void *u, Py_ssize_t ldu, void *vt,
Py_ssize_t ldvt, void *work, Py_ssize_t lwork, void *rwork,
F_INT *iwork, F_INT *info)
{
void *raw_func = NULL;
F_INT _m, _n, _lda, _ldu, _ldvt, _lwork;
ENSURE_VALID_COMPLEX_KIND(kind)
_m = (F_INT) m;
_n = (F_INT) n;
_lda = (F_INT) lda;
_ldu = (F_INT) ldu;
_ldvt = (F_INT) ldvt;
_lwork = (F_INT) lwork;
switch (kind)
{
case 'c':
raw_func = get_clapack_cgesdd();
break;
case 'z':
raw_func = get_clapack_zgesdd();
break;
}
ENSURE_VALID_FUNC(raw_func)
(*(cgesdd_t) raw_func)(&jobz, &_m, &_n, a, &_lda, s, u, &_ldu, vt, &_ldvt,
work, &_lwork, rwork, iwork, info);
return 0;
}
/* complex space svd info from cgesdd/zgesdd.
* As numba_raw_cgesdd but the allocation and error handling is done for the
* user.
* Args are as per LAPACK.
*/
static int
numba_ez_cgesdd(char kind, char jobz, Py_ssize_t m, Py_ssize_t n, void *a,
Py_ssize_t lda, void *s, void *u, Py_ssize_t ldu, void *vt,
Py_ssize_t ldvt)
{
F_INT info = 0;
Py_ssize_t lwork = -1;
Py_ssize_t lrwork = -1;
Py_ssize_t minmn = -1;
Py_ssize_t tmp1, tmp2;
Py_ssize_t maxmn = -1;
size_t real_base_size = -1;
size_t complex_base_size = -1;
all_dtypes stack_slot, wk1, wk2;
void *work = NULL;
void *rwork = (void *)&wk1;
F_INT *iwork = (F_INT *)&wk2;
ENSURE_VALID_COMPLEX_KIND(kind)
switch (kind)
{
case 'c':
real_base_size = sizeof(float);
complex_base_size = sizeof(npy_complex64);
break;
case 'z':
real_base_size = sizeof(double);
complex_base_size = sizeof(npy_complex128);
break;
default:
{
PyGILState_STATE st = PyGILState_Ensure();
PyErr_SetString(PyExc_ValueError,\
"Invalid kind in numba_ez_rgesdd");
PyGILState_Release(st);
}
return STATUS_ERROR;
}
work = &stack_slot;
/* Compute optimal work size (lwork) */
numba_raw_cgesdd(kind, jobz, m, n, a, lda, s, u ,ldu, vt, ldvt, work, lwork,
rwork, iwork, &info);
CATCH_LAPACK_INVALID_ARG("numba_raw_cgesdd", info);
/* Allocate work array */
lwork = cast_from_X(kind, work);
if (checked_PyMem_RawMalloc(&work, complex_base_size * lwork))
return STATUS_ERROR;
minmn = m > n ? n : m;
if (jobz == 'n')
{
lrwork = 7 * minmn;
}
else
{
maxmn = m > n ? m : n;
tmp1 = 5 * minmn + 7;
tmp2 = 2 * maxmn + 2 * minmn + 1;
lrwork = minmn * (tmp1 > tmp2 ? tmp1: tmp2);
}
if (checked_PyMem_RawMalloc(&rwork,
real_base_size * (lrwork > 1 ? lrwork : 1)))
{
PyMem_RawFree(work);
return STATUS_ERROR;
}
if (checked_PyMem_RawMalloc((void **) &iwork,
8 * minmn * sizeof(F_INT)))
{
PyMem_RawFree(work);
PyMem_RawFree(rwork);
return STATUS_ERROR;
}
numba_raw_cgesdd(kind, jobz, m, n, a, lda, s, u ,ldu, vt, ldvt, work, lwork,
rwork, iwork, &info);
PyMem_RawFree(work);
PyMem_RawFree(rwork);
PyMem_RawFree(iwork);
CATCH_LAPACK_INVALID_ARG("numba_raw_cgesdd", info);
return (int)info;
}
/* SVD systems info from *gesdd.
* This routine hides the type and general complexity involved with making the
* calls to *gesdd. The work space computation and error handling etc is hidden.
* Args are as per LAPACK.
*/
NUMBA_EXPORT_FUNC(int)
numba_ez_gesdd(char kind, char jobz, Py_ssize_t m, Py_ssize_t n, void *a,
Py_ssize_t lda, void *s, void *u, Py_ssize_t ldu, void *vt,
Py_ssize_t ldvt)
{
ENSURE_VALID_KIND(kind)
switch (kind)
{
case 's':
case 'd':
return numba_ez_rgesdd(kind, jobz, m, n, a, lda, s, u, ldu, vt,
ldvt);
case 'c':
case 'z':
return numba_ez_cgesdd(kind, jobz, m, n, a, lda, s, u, ldu, vt,
ldvt);
}
return STATUS_ERROR; /* unreachable */
}
/*
* Compute the QR factorization of a matrix.
* Return -1 on internal error, 0 on success, > 0 on failure.
*/
static int
numba_raw_xgeqrf(char kind, Py_ssize_t m, Py_ssize_t n, void *a, Py_ssize_t
lda, void *tau, void *work, Py_ssize_t lwork, F_INT *info)
{
void *raw_func = NULL;
F_INT _m, _n, _lda, _lwork;
ENSURE_VALID_KIND(kind)
switch (kind)
{
case 's':
raw_func = get_clapack_sgeqrf();
break;
case 'd':
raw_func = get_clapack_dgeqrf();
break;
case 'c':
raw_func = get_clapack_cgeqrf();
break;
case 'z':
raw_func = get_clapack_zgeqrf();
break;
}
ENSURE_VALID_FUNC(raw_func)
_m = (F_INT) m;
_n = (F_INT) n;
_lda = (F_INT) lda;
_lwork = (F_INT) lwork;
(*(xgeqrf_t) raw_func)(&_m, &_n, a, &_lda, tau, work, &_lwork, info);
return 0;
}
/*
* Compute the QR factorization of a matrix.
* This routine hides the type and general complexity involved with making the
* xgeqrf calls. The work space computation and error handling etc is hidden.
* Args are as per LAPACK.
*/
NUMBA_EXPORT_FUNC(int)
numba_ez_geqrf(char kind, Py_ssize_t m, Py_ssize_t n, void *a, Py_ssize_t
lda, void *tau)
{
F_INT info = 0;
Py_ssize_t lwork = -1;
size_t base_size = -1;
all_dtypes stack_slot;
void *work = NULL;
base_size = kind_size(kind);
work = &stack_slot;
/* Compute optimal work size (lwork) */
numba_raw_xgeqrf(kind, m, n, a, lda, tau, work, lwork, &info);
CATCH_LAPACK_INVALID_ARG("numba_raw_xgeqrf", info);
/* Allocate work array */
lwork = cast_from_X(kind, work);
if (checked_PyMem_RawMalloc(&work, base_size * lwork))
return STATUS_ERROR;
numba_raw_xgeqrf(kind, m, n, a, lda, tau, work, lwork, &info);
PyMem_RawFree(work);
CATCH_LAPACK_INVALID_ARG("numba_raw_xgeqrf", info);
return 0; /* info cannot be >0 */
}
/*
* Compute the orthogonal Q matrix (in QR) from elementary relectors.
*/
static int
numba_raw_xxxgqr(char kind, Py_ssize_t m, Py_ssize_t n, Py_ssize_t k, void *a,
Py_ssize_t lda, void *tau, void * work, Py_ssize_t lwork, F_INT *info)
{
void *raw_func = NULL;
F_INT _m, _n, _k, _lda, _lwork;
ENSURE_VALID_KIND(kind)
switch (kind)
{
case 's':
raw_func = get_clapack_sorgqr();
break;
case 'd':
raw_func = get_clapack_dorgqr();
break;
case 'c':
raw_func = get_clapack_cungqr();
break;
case 'z':
raw_func = get_clapack_zungqr();
break;
}
ENSURE_VALID_FUNC(raw_func)
_m = (F_INT) m;
_n = (F_INT) n;
_k = (F_INT) k;
_lda = (F_INT) lda;
_lwork = (F_INT) lwork;
(*(xxxgqr_t) raw_func)(&_m, &_n, &_k, a, &_lda, tau, work, &_lwork, info);
return 0;
}
/*
* Compute the orthogonal Q matrix (in QR) from elementary reflectors.
* This routine hides the type and general complexity involved with making the
* x{or,un}qrf calls. The work space computation and error handling etc is
* hidden. Args are as per LAPACK.
*/
NUMBA_EXPORT_FUNC(int)
numba_ez_xxgqr(char kind, Py_ssize_t m, Py_ssize_t n, Py_ssize_t k, void *a,
Py_ssize_t lda, void *tau)
{
F_INT info = 0;
Py_ssize_t lwork = -1;
size_t base_size = -1;
all_dtypes stack_slot;
void *work = NULL;
work = &stack_slot;
/* Compute optimal work size (lwork) */
numba_raw_xxxgqr(kind, m, n, k, a, lda, tau, work, lwork, &info);
CATCH_LAPACK_INVALID_ARG("numba_raw_xxxgqr", info);
base_size = kind_size(kind);
/* Allocate work array */
lwork = cast_from_X(kind, work);
if (checked_PyMem_RawMalloc(&work, base_size * lwork))
return STATUS_ERROR;
numba_raw_xxxgqr(kind, m, n, k, a, lda, tau, work, lwork, &info);
PyMem_RawFree(work);
CATCH_LAPACK_INVALID_ARG("numba_raw_xxxgqr", info);
return 0; /* info cannot be >0 */
}
/*
* Compute the minimum-norm solution to a real linear least squares problem.
*/
static int
numba_raw_rgelsd(char kind, Py_ssize_t m, Py_ssize_t n, Py_ssize_t nrhs,
void *a, Py_ssize_t lda, void *b, Py_ssize_t ldb, void *S,
void * rcond, Py_ssize_t * rank, void * work,
Py_ssize_t lwork, F_INT *iwork, F_INT *info)
{
void *raw_func = NULL;
F_INT _m, _n, _nrhs, _lda, _ldb, _rank, _lwork;
ENSURE_VALID_REAL_KIND(kind)
switch (kind)
{
case 's':
raw_func = get_clapack_sgelsd();
break;
case 'd':
raw_func = get_clapack_dgelsd();
break;
}
ENSURE_VALID_FUNC(raw_func)
_m = (F_INT) m;
_n = (F_INT) n;
_nrhs = (F_INT) nrhs;
_lda = (F_INT) lda;
_ldb = (F_INT) ldb;
_lwork = (F_INT) lwork;
(*(rgelsd_t) raw_func)(&_m, &_n, &_nrhs, a, &_lda, b, &_ldb, S, rcond,
&_rank, work, &_lwork, iwork, info);
*rank = (Py_ssize_t) _rank;
return 0;
}
/*
* Compute the minimum-norm solution to a real linear least squares problem.
* This routine hides the type and general complexity involved with making the
* {s,d}gelsd calls. The work space computation and error handling etc is
* hidden. Args are as per LAPACK.
*/
static int
numba_ez_rgelsd(char kind, Py_ssize_t m, Py_ssize_t n, Py_ssize_t nrhs,
void *a, Py_ssize_t lda, void *b, Py_ssize_t ldb, void *S,
double rcond, Py_ssize_t * rank)
{
F_INT info = 0;
Py_ssize_t lwork = -1;
size_t base_size = -1;
all_dtypes stack_slot;
void *work = NULL, *rcond_cast = NULL;
F_INT *iwork = NULL;
F_INT iwork_tmp;
float tmpf;
ENSURE_VALID_REAL_KIND(kind)
base_size = kind_size(kind);
work = &stack_slot;
rcond_cast = work; /* stop checks on null ptr complaining */
/* Compute optimal work size (lwork) */
numba_raw_rgelsd(kind, m, n, nrhs, a, lda, b, ldb, S, rcond_cast, rank,
work, lwork, &iwork_tmp, &info);
CATCH_LAPACK_INVALID_ARG("numba_raw_rgelsd", info);
/* Allocate work array */
lwork = cast_from_X(kind, work);
if (checked_PyMem_RawMalloc(&work, base_size * lwork))
return STATUS_ERROR;
/* Allocate iwork array */
if (checked_PyMem_RawMalloc((void **)&iwork, sizeof(F_INT) * iwork_tmp))
{
PyMem_RawFree(work);
return STATUS_ERROR;
}
/* cast rcond to the right type */
switch (kind)
{
case 's':
tmpf = (float)rcond;
rcond_cast = (void * )&tmpf;
break;
case 'd':
rcond_cast = (void * )&rcond;
break;
}
numba_raw_rgelsd(kind, m, n, nrhs, a, lda, b, ldb, S, rcond_cast, rank,
work, lwork, iwork, &info);
PyMem_RawFree(work);
PyMem_RawFree(iwork);
CATCH_LAPACK_INVALID_ARG("numba_raw_rgelsd", info);
return (int)info;
}
/*
* Compute the minimum-norm solution to a complex linear least squares problem.
*/
static int
numba_raw_cgelsd(char kind, Py_ssize_t m, Py_ssize_t n, Py_ssize_t nrhs,
void *a, Py_ssize_t lda, void *b, Py_ssize_t ldb, void *S,
void *rcond, Py_ssize_t * rank, void * work,
Py_ssize_t lwork, void * rwork, F_INT *iwork, F_INT *info)
{
void *raw_func = NULL;
F_INT _m, _n, _nrhs, _lda, _ldb, _rank, _lwork;
ENSURE_VALID_COMPLEX_KIND(kind)
switch (kind)
{
case 'c':
raw_func = get_clapack_cgelsd();
break;
case 'z':
raw_func = get_clapack_zgelsd();
break;
}
ENSURE_VALID_FUNC(raw_func)
_m = (F_INT) m;
_n = (F_INT) n;
_nrhs = (F_INT) nrhs;
_lda = (F_INT) lda;
_ldb = (F_INT) ldb;
_lwork = (F_INT) lwork;
(*(cgelsd_t) raw_func)(&_m, &_n, &_nrhs, a, &_lda, b, &_ldb, S, rcond,
&_rank, work, &_lwork, rwork, iwork, info);
*rank = (Py_ssize_t) _rank;
return 0;
}
/*
* Compute the minimum-norm solution to a complex linear least squares problem.
* This routine hides the type and general complexity involved with making the
* {c,z}gelsd calls. The work space computation and error handling etc is
* hidden. Args are as per LAPACK.
*/
static int
numba_ez_cgelsd(char kind, Py_ssize_t m, Py_ssize_t n, Py_ssize_t nrhs,
void *a, Py_ssize_t lda, void *b, Py_ssize_t ldb, void *S,
double rcond, Py_ssize_t * rank)
{
F_INT info = 0;
Py_ssize_t lwork = -1;
size_t base_size = -1;
all_dtypes stack_slot1, stack_slot2;
size_t real_base_size = 0;
void *work = NULL, *rwork = NULL, *rcond_cast = NULL;
Py_ssize_t lrwork;
F_INT *iwork = NULL;
F_INT iwork_tmp;
char real_kind = '-';
float tmpf;
ENSURE_VALID_COMPLEX_KIND(kind)
base_size = kind_size(kind);
work = &stack_slot1;
rwork = &stack_slot2;
rcond_cast = work; /* stop checks on null ptr complaining */
/* Compute optimal work size */
numba_raw_cgelsd(kind, m, n, nrhs, a, lda, b, ldb, S, rcond_cast, rank,
work, lwork, rwork, &iwork_tmp, &info);
CATCH_LAPACK_INVALID_ARG("numba_raw_cgelsd", info);
/* Allocate work array */
lwork = cast_from_X(kind, work);
if (checked_PyMem_RawMalloc(&work, base_size * lwork))
return STATUS_ERROR;
/* Allocate iwork array */
if (checked_PyMem_RawMalloc((void **)&iwork, sizeof(F_INT) * iwork_tmp))
{
PyMem_RawFree(work);
return STATUS_ERROR;
}
switch (kind)
{
case 'c':
real_kind = 's';
tmpf = (float)rcond;
rcond_cast = (void * )&tmpf;
break;
case 'z':
real_kind = 'd';
rcond_cast = (void * )&rcond;
break;
}
real_base_size = kind_size(real_kind);
lrwork = cast_from_X(real_kind, rwork);
if (checked_PyMem_RawMalloc((void **)&rwork, real_base_size * lrwork))
{
PyMem_RawFree(work);
PyMem_RawFree(iwork);
return STATUS_ERROR;
}
numba_raw_cgelsd(kind, m, n, nrhs, a, lda, b, ldb, S, rcond_cast, rank,
work, lwork, rwork, iwork, &info);
PyMem_RawFree(work);
PyMem_RawFree(rwork);
PyMem_RawFree(iwork);
CATCH_LAPACK_INVALID_ARG("numba_raw_cgelsd", info);
return (int)info;
}
/*
* Compute the minimum-norm solution to a linear least squares problems.
* This routine hides the type and general complexity involved with making the
* calls to *gelsd. The work space computation and error handling etc is hidden.
* Args are as per LAPACK.
*/
NUMBA_EXPORT_FUNC(int)
numba_ez_gelsd(char kind, Py_ssize_t m, Py_ssize_t n, Py_ssize_t nrhs,
void *a, Py_ssize_t lda, void *b, Py_ssize_t ldb, void *S,
double rcond, Py_ssize_t * rank)
{
ENSURE_VALID_KIND(kind)
switch (kind)
{
case 's':
case 'd':
return numba_ez_rgelsd(kind, m, n, nrhs, a, lda, b, ldb, S, rcond,
rank);
case 'c':
case 'z':
return numba_ez_cgelsd(kind, m, n, nrhs, a, lda, b, ldb, S, rcond,
rank);
}
return STATUS_ERROR; /* unreachable */
}
/*
* Compute the solution to a system of linear equations
*/
NUMBA_EXPORT_FUNC(int)
numba_xgesv(char kind, Py_ssize_t n, Py_ssize_t nrhs, void *a, Py_ssize_t lda,
F_INT *ipiv, void *b, Py_ssize_t ldb)
{
void *raw_func = NULL;
F_INT _n, _nrhs, _lda, _ldb, info;
ENSURE_VALID_KIND(kind)
switch (kind)
{
case 's':
raw_func = get_clapack_sgesv();
break;
case 'd':
raw_func = get_clapack_dgesv();
break;
case 'c':
raw_func = get_clapack_cgesv();
break;
case 'z':
raw_func = get_clapack_zgesv();
break;
}
ENSURE_VALID_FUNC(raw_func)
_n = (F_INT) n;
_nrhs = (F_INT) nrhs;
_lda = (F_INT) lda;
_ldb = (F_INT) ldb;
(*(xgesv_t) raw_func)(&_n, &_nrhs, a, &_lda, ipiv, b, &_ldb, &info);
CATCH_LAPACK_INVALID_ARG("xgesv", info);
return (int)info;
}
/* undef defines and macros */
#undef STATUS_SUCCESS
#undef STATUS_ERROR
#undef ENSURE_VALID_KIND
#undef ENSURE_VALID_REAL_KIND
#undef ENSURE_VALID_COMPLEX_KIND
#undef ENSURE_VALID_FUNC
#undef F_INT
#undef EMIT_GET_CLAPACK_FUNC
#undef CATCH_LAPACK_INVALID_ARG
#ifndef NUMBA_COMMON_H_
#define NUMBA_COMMON_H_
/* __has_attribute() is a clang / gcc-5 macro */
#ifndef __has_attribute
# define __has_attribute(x) 0
#endif
/* This attribute marks symbols that can be shared across C objects
* but are not exposed outside of a shared library or executable.
* Note this is default behaviour for global symbols under Windows.
*/
#if defined(_MSC_VER)
#define VISIBILITY_HIDDEN
#define VISIBILITY_GLOBAL __declspec(dllexport)
#elif (__has_attribute(visibility) || (defined(__GNUC__) && __GNUC__ >= 4))
#define VISIBILITY_HIDDEN __attribute__ ((visibility("hidden")))
#define VISIBILITY_GLOBAL __attribute__ ((visibility("default")))
#else
#define VISIBILITY_HIDDEN
#define VISIBILITY_GLOBAL
#endif
/*
* Numba's version of the PyArray_DescrCheck macro from NumPy, use it as a
* direct replacement of NumPy's PyArray_DescrCheck to ensure binary
* compatibility.
*
* Details of why this is needed:
* NumPy 1.18 changed the definition of the PyArray_DescrCheck macro here:
* https://github.com/numpy/numpy/commit/6108b5d1e138d07e3c9f2a4e3b1933749ad0e698
* the result of this being that building against NumPy <1.18 would prevent
* Numba running against NumPy >= 1.20 as noted here:
* https://github.com/numba/numba/issues/6041#issuecomment-665132199
*
* This macro definition is copied from:
* https://github.com/numpy/numpy/commit/6108b5d1e138d07e3c9f2a4e3b1933749ad0e698#diff-ad2213da23136c5fc5883d9eb2d88666R26
*
* NOTE: This is the NumPy 1.18 and above version of the macro.
*/
#define NUMBA_PyArray_DescrCheck(op) PyObject_TypeCheck(op, &PyArrayDescr_Type)
#endif /* NUMBA_COMMON_H_ */
#ifndef NUMBA_PY_MODULE_H_
#define NUMBA_PY_MODULE_H_
#define PY_SSIZE_T_CLEAN
#include "Python.h"
#include "structmember.h"
#include "frameobject.h"
#define MOD_ERROR_VAL NULL
#define MOD_SUCCESS_VAL(val) val
#define MOD_INIT(name) PyMODINIT_FUNC PyInit_##name(void)
#define MOD_DEF(ob, name, doc, methods) { \
static struct PyModuleDef moduledef = { \
PyModuleDef_HEAD_INIT, name, doc, -1, methods, NULL, NULL, NULL, NULL }; \
ob = PyModule_Create(&moduledef); }
#define MOD_INIT_EXEC(name) PyInit_##name();
#define PyString_AsString PyUnicode_AsUTF8
#define PyString_Check PyUnicode_Check
#define PyString_FromFormat PyUnicode_FromFormat
#define PyString_FromString PyUnicode_FromString
#define PyString_InternFromString PyUnicode_InternFromString
#define PyInt_Type PyLong_Type
#define PyInt_Check PyLong_Check
#define PyInt_CheckExact PyLong_CheckExact
#define SetAttrStringFromVoidPointer(m, name) do { \
PyObject *tmp = PyLong_FromVoidPtr((void *) &name); \
PyObject_SetAttrString(m, #name, tmp); \
Py_DECREF(tmp); } while (0)
#endif /* NUMBA_PY_MODULE_H_ */
/*
* PRNG support.
*/
#ifdef _MSC_VER
#define HAVE_PTHREAD_ATFORK 0
#else
#define HAVE_PTHREAD_ATFORK 1
#include <pthread.h>
#endif
/* Magic Mersenne Twister constants */
#define MT_N 624
#define MT_M 397
#define MT_MATRIX_A 0x9908b0dfU
#define MT_UPPER_MASK 0x80000000U
#define MT_LOWER_MASK 0x7fffffffU
/*
* Note this structure is accessed in numba.targets.randomimpl,
* any changes here should be reflected there too.
*/
typedef struct {
int index;
/* unsigned int is sufficient on modern machines as we only need 32 bits */
unsigned int mt[MT_N];
int has_gauss;
double gauss;
int is_initialized;
} rnd_state_t;
/* Some code portions below from CPython's _randommodule.c, some others
from Numpy's and Jean-Sebastien Roy's randomkit.c. */
NUMBA_EXPORT_FUNC(void)
numba_rnd_shuffle(rnd_state_t *state)
{
int i;
unsigned int y;
for (i = 0; i < MT_N - MT_M; i++) {
y = (state->mt[i] & MT_UPPER_MASK) | (state->mt[i+1] & MT_LOWER_MASK);
state->mt[i] = state->mt[i+MT_M] ^ (y >> 1) ^
(-(int) (y & 1) & MT_MATRIX_A);
}
for (; i < MT_N - 1; i++) {
y = (state->mt[i] & MT_UPPER_MASK) | (state->mt[i+1] & MT_LOWER_MASK);
state->mt[i] = state->mt[i+(MT_M-MT_N)] ^ (y >> 1) ^
(-(int) (y & 1) & MT_MATRIX_A);
}
y = (state->mt[MT_N - 1] & MT_UPPER_MASK) | (state->mt[0] & MT_LOWER_MASK);
state->mt[MT_N - 1] = state->mt[MT_M - 1] ^ (y >> 1) ^
(-(int) (y & 1) & MT_MATRIX_A);
}
/* Initialize mt[] with an integer seed */
NUMBA_EXPORT_FUNC(void)
numba_rnd_init(rnd_state_t *state, unsigned int seed)
{
unsigned int pos;
seed &= 0xffffffffU;
/* Knuth's PRNG as used in the Mersenne Twister reference implementation */
for (pos = 0; pos < MT_N; pos++) {
state->mt[pos] = seed;
seed = (1812433253U * (seed ^ (seed >> 30)) + pos + 1) & 0xffffffffU;
}
state->index = MT_N;
state->has_gauss = 0;
state->gauss = 0.0;
state->is_initialized = 1;
}
/* Perturb mt[] with a key array */
static void
rnd_init_by_array(rnd_state_t *state, unsigned int init_key[], size_t key_length)
{
size_t i, j, k;
unsigned int *mt = state->mt;
numba_rnd_init(state, 19650218U);
i = 1; j = 0;
k = (MT_N > key_length ? MT_N : key_length);
for (; k; k--) {
mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1664525U))
+ init_key[j] + (unsigned int) j; /* non linear */
mt[i] &= 0xffffffffU;
i++; j++;
if (i >= MT_N) { mt[0] = mt[MT_N - 1]; i = 1; }
if (j >= key_length) j = 0;
}
for (k = MT_N - 1; k; k--) {
mt[i] = (mt[i] ^ ((mt[i-1] ^ (mt[i-1] >> 30)) * 1566083941U))
- (unsigned int) i; /* non linear */
mt[i] &= 0xffffffffU;
i++;
if (i >= MT_N) { mt[0] = mt[MT_N - 1]; i=1; }
}
mt[0] = 0x80000000U; /* MSB is 1; ensuring non-zero initial array */
state->index = MT_N;
state->has_gauss = 0;
state->gauss = 0.0;
state->is_initialized = 1;
}
/*
* Management of thread-local random state.
*/
static int rnd_globally_initialized;
#ifdef _MSC_VER
#define THREAD_LOCAL(ty) __declspec(thread) ty
#else
/* Non-standard C99 extension that's understood by gcc and clang */
#define THREAD_LOCAL(ty) __thread ty
#endif
static THREAD_LOCAL(rnd_state_t) numba_py_random_state;
static THREAD_LOCAL(rnd_state_t) numba_np_random_state;
static THREAD_LOCAL(rnd_state_t) numba_internal_random_state;
/* Seed the state with random bytes */
static int
rnd_seed_with_bytes(rnd_state_t *state, Py_buffer *buf)
{
unsigned int *keys;
unsigned char *bytes;
size_t i, nkeys;
nkeys = buf->len / sizeof(unsigned int);
keys = (unsigned int *) PyMem_Malloc(nkeys * sizeof(unsigned int));
if (keys == NULL) {
PyBuffer_Release(buf);
return -1;
}
bytes = (unsigned char *) buf->buf;
/* Convert input bytes to int32 keys, without violating alignment
* constraints.
*/
for (i = 0; i < nkeys; i++, bytes += 4) {
keys[i] =
((unsigned int)bytes[3] << 24) +
((unsigned int)bytes[2] << 16) +
((unsigned int)bytes[1] << 8) +
((unsigned int)bytes[0] << 0);
}
PyBuffer_Release(buf);
rnd_init_by_array(state, keys, nkeys);
PyMem_Free(keys);
return 0;
}
#if HAVE_PTHREAD_ATFORK
/* After a fork(), the child should reseed its random states.
* Since only the main thread survives in the child, it's enough to mark
* the current thread-local states as uninitialized.
*/
static void
rnd_atfork_child(void)
{
numba_py_random_state.is_initialized = 0;
numba_np_random_state.is_initialized = 0;
numba_internal_random_state.is_initialized = 0;
}
#endif
/* Global initialization routine. It must be called as early as possible.
*/
NUMBA_EXPORT_FUNC(void)
numba_rnd_ensure_global_init(void)
{
if (!rnd_globally_initialized) {
#if HAVE_PTHREAD_ATFORK
pthread_atfork(NULL, NULL, rnd_atfork_child);
#endif
numba_py_random_state.is_initialized = 0;
numba_np_random_state.is_initialized = 0;
numba_internal_random_state.is_initialized = 0;
rnd_globally_initialized = 1;
}
}
/* First-time init a random state */
static void
rnd_implicit_init(rnd_state_t *state)
{
/* Initialize with random bytes. The easiest way to get good-quality
* cross-platform random bytes is still to call os.urandom()
* using the Python interpreter...
*/
PyObject *module, *bufobj;
Py_buffer buf;
PyGILState_STATE gilstate = PyGILState_Ensure();
module = PyImport_ImportModuleNoBlock("os");
if (module == NULL)
goto error;
/* Read as many bytes as necessary to get the full entropy
* exploitable by the MT generator.
*/
bufobj = PyObject_CallMethod(module, "urandom", "i",
(int) (MT_N * sizeof(unsigned int)));
Py_DECREF(module);
if (bufobj == NULL)
goto error;
if (PyObject_GetBuffer(bufobj, &buf, PyBUF_SIMPLE))
goto error;
Py_DECREF(bufobj);
if (rnd_seed_with_bytes(state, &buf))
goto error;
/* state->is_initialized is set now */
PyGILState_Release(gilstate);
return;
error:
/* In normal conditions, os.urandom() and PyMem_Malloc() shouldn't fail,
* and we don't want the caller to deal with errors, so just bail out.
*/
if (PyErr_Occurred())
PyErr_Print();
Py_FatalError(NULL);
}
/* Functions returning the thread-local random state pointer.
* The LLVM JIT doesn't support thread-local variables so we rely
* on the C compiler instead.
*/
NUMBA_EXPORT_FUNC(rnd_state_t *)
numba_get_py_random_state(void)
{
rnd_state_t *state = &numba_py_random_state;
if (!state->is_initialized)
rnd_implicit_init(state);
return state;
}
NUMBA_EXPORT_FUNC(rnd_state_t *)
numba_get_np_random_state(void)
{
rnd_state_t *state = &numba_np_random_state;
if (!state->is_initialized)
rnd_implicit_init(state);
return state;
}
NUMBA_EXPORT_FUNC(rnd_state_t *)
numba_get_internal_random_state(void)
{
rnd_state_t *state = &numba_internal_random_state;
if (!state->is_initialized)
rnd_implicit_init(state);
return state;
}
/*
* Python-exposed helpers for state management and testing.
*/
static int
rnd_state_converter(PyObject *obj, rnd_state_t **state)
{
*state = (rnd_state_t *) PyLong_AsVoidPtr(obj);
return (*state != NULL || !PyErr_Occurred());
}
NUMBA_EXPORT_FUNC(PyObject *)
_numba_rnd_get_py_state_ptr(PyObject *self)
{
return PyLong_FromVoidPtr(numba_get_py_random_state());
}
NUMBA_EXPORT_FUNC(PyObject *)
_numba_rnd_get_np_state_ptr(PyObject *self)
{
return PyLong_FromVoidPtr(numba_get_np_random_state());
}
NUMBA_EXPORT_FUNC(PyObject *)
_numba_rnd_shuffle(PyObject *self, PyObject *arg)
{
rnd_state_t *state;
if (!rnd_state_converter(arg, &state))
return NULL;
numba_rnd_shuffle(state);
Py_RETURN_NONE;
}
NUMBA_EXPORT_FUNC(PyObject *)
_numba_rnd_set_state(PyObject *self, PyObject *args)
{
int i, index;
rnd_state_t *state;
PyObject *tuplearg, *intlist;
if (!PyArg_ParseTuple(args, "O&O!:rnd_set_state",
rnd_state_converter, &state,
&PyTuple_Type, &tuplearg))
return NULL;
if (!PyArg_ParseTuple(tuplearg, "iO!", &index, &PyList_Type, &intlist))
return NULL;
if (PyList_GET_SIZE(intlist) != MT_N) {
PyErr_SetString(PyExc_ValueError, "list object has wrong size");
return NULL;
}
state->index = index;
for (i = 0; i < MT_N; i++) {
PyObject *v = PyList_GET_ITEM(intlist, i);
unsigned long x = PyLong_AsUnsignedLong(v);
if (x == (unsigned long) -1 && PyErr_Occurred())
return NULL;
state->mt[i] = (unsigned int) x;
}
state->has_gauss = 0;
state->gauss = 0.0;
state->is_initialized = 1;
Py_RETURN_NONE;
}
NUMBA_EXPORT_FUNC(PyObject *)
_numba_rnd_get_state(PyObject *self, PyObject *arg)
{
PyObject *intlist;
int i;
rnd_state_t *state;
if (!rnd_state_converter(arg, &state))
return NULL;
intlist = PyList_New(MT_N);
if (intlist == NULL)
return NULL;
for (i = 0; i < MT_N; i++) {
PyObject *v = PyLong_FromUnsignedLong(state->mt[i]);
if (v == NULL) {
Py_DECREF(intlist);
return NULL;
}
PyList_SET_ITEM(intlist, i, v);
}
return Py_BuildValue("iN", state->index, intlist);
}
NUMBA_EXPORT_FUNC(PyObject *)
_numba_rnd_seed(PyObject *self, PyObject *args)
{
unsigned int seed;
rnd_state_t *state;
if (!PyArg_ParseTuple(args, "O&I:rnd_seed",
rnd_state_converter, &state, &seed)) {
/* rnd_seed_*(bytes-like object) */
Py_buffer buf;
PyErr_Clear();
if (!PyArg_ParseTuple(args, "O&s*:rnd_seed",
rnd_state_converter, &state, &buf))
return NULL;
if (rnd_seed_with_bytes(state, &buf))
return NULL;
else
Py_RETURN_NONE;
}
else {
/* rnd_seed_*(int32) */
numba_rnd_init(state, seed);
Py_RETURN_NONE;
}
}
/*
* Random distribution helpers.
* Most code straight from Numpy's distributions.c.
*/
#ifndef M_PI
#define M_PI 3.14159265358979323846264338328
#endif
NUMBA_EXPORT_FUNC(unsigned int)
get_next_int32(rnd_state_t *state)
{
unsigned int y;
if (state->index == MT_N) {
numba_rnd_shuffle(state);
state->index = 0;
}
y = state->mt[state->index++];
/* Tempering */
y ^= (y >> 11);
y ^= (y << 7) & 0x9d2c5680U;
y ^= (y << 15) & 0xefc60000U;
y ^= (y >> 18);
return y;
}
NUMBA_EXPORT_FUNC(double)
get_next_double(rnd_state_t *state)
{
double a = get_next_int32(state) >> 5;
double b = get_next_int32(state) >> 6;
return (a * 67108864.0 + b) / 9007199254740992.0;
}
NUMBA_EXPORT_FUNC(double)
loggam(double x)
{
double x0, x2, xp, gl, gl0;
long k, n;
static double a[10] = {8.333333333333333e-02,-2.777777777777778e-03,
7.936507936507937e-04,-5.952380952380952e-04,
8.417508417508418e-04,-1.917526917526918e-03,
6.410256410256410e-03,-2.955065359477124e-02,
1.796443723688307e-01,-1.39243221690590e+00};
x0 = x;
n = 0;
if ((x == 1.0) || (x == 2.0))
{
return 0.0;
}
else if (x <= 7.0)
{
n = (long)(7 - x);
x0 = x + n;
}
x2 = 1.0/(x0*x0);
xp = 2*M_PI;
gl0 = a[9];
for (k=8; k>=0; k--)
{
gl0 *= x2;
gl0 += a[k];
}
gl = gl0/x0 + 0.5*log(xp) + (x0-0.5)*log(x0) - x0;
if (x <= 7.0)
{
for (k=1; k<=n; k++)
{
gl -= log(x0-1.0);
x0 -= 1.0;
}
}
return gl;
}
NUMBA_EXPORT_FUNC(int64_t)
numba_poisson_ptrs(rnd_state_t *state, double lam)
{
/* This method is invoked only if the parameter lambda of this
* distribution is big enough ( >= 10 ). The algorithm used is
* described in "Hörmann, W. 1992. 'The Transformed Rejection
* Method for Generating Poisson Random Variables'.
* The implementation comes straight from Numpy.
*/
int64_t k;
double U, V, slam, loglam, a, b, invalpha, vr, us;
slam = sqrt(lam);
loglam = log(lam);
b = 0.931 + 2.53*slam;
a = -0.059 + 0.02483*b;
invalpha = 1.1239 + 1.1328/(b-3.4);
vr = 0.9277 - 3.6224/(b-2);
while (1)
{
U = get_next_double(state) - 0.5;
V = get_next_double(state);
us = 0.5 - fabs(U);
k = (int64_t) floor((2*a/us + b)*U + lam + 0.43);
if ((us >= 0.07) && (V <= vr))
{
return k;
}
if ((k < 0) ||
((us < 0.013) && (V > us)))
{
continue;
}
if ((log(V) + log(invalpha) - log(a/(us*us)+b)) <=
(-lam + (double) k*loglam - loggam((double) k+1)))
{
return k;
}
}
}
#include "_pymodule.h"
#include <string.h>
#include <time.h>
#include <assert.h>
#include "_numba_common.h"
#include "_typeof.h"
#include "_hashtable.h"
#include "_devicearray.h"
#include "pyerrors.h"
#define NPY_NO_DEPRECATED_API NPY_1_7_API_VERSION
#include <numpy/ndarrayobject.h>
/* Cached typecodes for basic scalar types */
static int tc_int8;
static int tc_int16;
static int tc_int32;
static int tc_int64;
static int tc_uint8;
static int tc_uint16;
static int tc_uint32;
static int tc_uint64;
static int tc_float32;
static int tc_float64;
static int tc_complex64;
static int tc_complex128;
static int BASIC_TYPECODES[12];
static int tc_intp;
/* The type object for the numba .dispatcher.OmittedArg class
* that wraps omitted arguments.
*/
static PyObject *omittedarg_type;
static PyObject *typecache;
static PyObject *ndarray_typecache;
static PyObject *structured_dtypes;
static PyObject *str_typeof_pyval = NULL;
static PyObject *str_value = NULL;
static PyObject *str_numba_type = NULL;
/* CUDA device array API */
void **DeviceArray_API;
/*
* Type fingerprint computation.
*/
typedef struct {
/* A buffer the fingerprint will be written to */
char *buf;
size_t n;
size_t allocated;
/* A preallocated buffer, sufficient to fit the fingerprint for most types */
char static_buf[40];
} string_writer_t;
static void
string_writer_init(string_writer_t *w)
{
w->buf = w->static_buf;
w->n = 0;
w->allocated = sizeof(w->static_buf) / sizeof(unsigned char);
}
static void
string_writer_clear(string_writer_t *w)
{
if (w->buf != w->static_buf)
free(w->buf);
}
static void
string_writer_move(string_writer_t *dest, const string_writer_t *src)
{
dest->n = src->n;
dest->allocated = src->allocated;
if (src->buf == src->static_buf) {
dest->buf = dest->static_buf;
memcpy(dest->buf, src->buf, src->n);
}
else {
dest->buf = src->buf;
}
}
/* Ensure at least *bytes* can be appended to the string writer's buffer. */
static int
string_writer_ensure(string_writer_t *w, size_t bytes)
{
size_t newsize;
bytes += w->n;
if (bytes <= w->allocated)
return 0;
newsize = (w->allocated << 2) + 1;
if (newsize < bytes)
newsize = bytes;
if (w->buf == w->static_buf)
w->buf = (char *) malloc(newsize);
else
w->buf = (char *) realloc(w->buf, newsize);
if (w->buf) {
w->allocated = newsize;
return 0;
}
else {
PyErr_NoMemory();
return -1;
}
}
static int
string_writer_put_char(string_writer_t *w, unsigned char c)
{
if (string_writer_ensure(w, 1))
return -1;
w->buf[w->n++] = c;
return 0;
}
static int
string_writer_put_int32(string_writer_t *w, unsigned int v)
{
if (string_writer_ensure(w, 4))
return -1;
w->buf[w->n] = v & 0xff;
w->buf[w->n + 1] = (v >> 8) & 0xff;
w->buf[w->n + 2] = (v >> 16) & 0xff;
w->buf[w->n + 3] = (v >> 24) & 0xff;
w->n += 4;
return 0;
}
static int
string_writer_put_intp(string_writer_t *w, npy_intp v)
{
if (string_writer_ensure(w, NPY_SIZEOF_PY_INTPTR_T))
return -1;
w->buf[w->n] = v & 0xff;
w->buf[w->n + 1] = (v >> 8) & 0xff;
w->buf[w->n + 2] = (v >> 16) & 0xff;
w->buf[w->n + 3] = (v >> 24) & 0xff;
#if NPY_SIZEOF_PY_INTPTR_T == 8
w->buf[w->n + 4] = (v >> 32) & 0xff;
w->buf[w->n + 5] = (v >> 40) & 0xff;
w->buf[w->n + 6] = (v >> 48) & 0xff;
w->buf[w->n + 7] = (v >> 56) & 0xff;
#endif
w->n += NPY_SIZEOF_PY_INTPTR_T;
return 0;
}
static int
string_writer_put_string(string_writer_t *w, const char *s)
{
if (s == NULL) {
return string_writer_put_char(w, 0);
}
else {
size_t N = strlen(s) + 1;
if (string_writer_ensure(w, N))
return -1;
memcpy(w->buf + w->n, s, N);
w->n += N;
return 0;
}
}
enum opcode {
OP_START_TUPLE = '(',
OP_END_TUPLE = ')',
OP_INT = 'i',
OP_FLOAT = 'f',
OP_COMPLEX = 'c',
OP_BOOL = '?',
OP_OMITTED = '!',
OP_BYTEARRAY = 'a',
OP_BYTES = 'b',
OP_NONE = 'n',
OP_LIST = '[',
OP_SET = '{',
OP_BUFFER = 'B',
OP_NP_SCALAR = 'S',
OP_NP_ARRAY = 'A',
OP_NP_DTYPE = 'D'
};
#define TRY(func, w, arg) \
do { \
if (func(w, arg)) return -1; \
} while (0)
static int
fingerprint_unrecognized(void)
{
PyErr_SetString(PyExc_NotImplementedError,
"cannot compute type fingerprint for value");
return -1;
}
static int
compute_dtype_fingerprint(string_writer_t *w, PyArray_Descr *descr)
{
int typenum = descr->type_num;
if (typenum < NPY_OBJECT)
return string_writer_put_char(w, (char) typenum);
if (typenum == NPY_VOID) {
/* Structured dtype: serialize the dtype pointer. Unfortunately,
* some structured dtypes can be ephemeral, so we have to
* intern them to avoid pointer reuse and fingerprint collisions.
* (e.g. np.recarray(dtype=some_dtype) creates a new dtype
* equal to some_dtype)
*/
PyObject *interned = PyDict_GetItem(structured_dtypes,
(PyObject *) descr);
if (interned == NULL) {
interned = (PyObject *) descr;
if (PyDict_SetItem(structured_dtypes, interned, interned))
return -1;
}
TRY(string_writer_put_char, w, (char) typenum);
return string_writer_put_intp(w, (npy_intp) interned);
}
#if NPY_API_VERSION >= 0x00000007
if (PyTypeNum_ISDATETIME(typenum)) {
PyArray_DatetimeMetaData *md;
md = &(((PyArray_DatetimeDTypeMetaData *)descr->c_metadata)->meta);
TRY(string_writer_put_char, w, (char) typenum);
TRY(string_writer_put_char, w, (char) md->base);
return string_writer_put_int32(w, (char) md->num);
}
#endif
return fingerprint_unrecognized();
}
static int
compute_fingerprint(string_writer_t *w, PyObject *val)
{
/*
* Implementation note: for performance, we start with common
* types that can be tested with fast checks.
*/
if (val == Py_None)
return string_writer_put_char(w, OP_NONE);
if (PyBool_Check(val))
return string_writer_put_char(w, OP_BOOL);
/* Note we avoid matching int subclasses such as IntEnum */
if (PyInt_CheckExact(val) || PyLong_CheckExact(val))
return string_writer_put_char(w, OP_INT);
if (PyFloat_Check(val))
return string_writer_put_char(w, OP_FLOAT);
if (PyComplex_CheckExact(val))
return string_writer_put_char(w, OP_COMPLEX);
if (PyTuple_Check(val)) {
if(PyTuple_CheckExact(val)) {
Py_ssize_t i, n;
n = PyTuple_GET_SIZE(val);
TRY(string_writer_put_char, w, OP_START_TUPLE);
for (i = 0; i < n; i++)
TRY(compute_fingerprint, w, PyTuple_GET_ITEM(val, i));
TRY(string_writer_put_char, w, OP_END_TUPLE);
return 0;
}
/* as per typeof.py, check "_asdict" for namedtuple. */
else if(PyObject_HasAttrString(val, "_asdict"))
{
/*
* This encodes the class name and field names of a namedtuple into
* the fingerprint on the condition that the number of fields is
* small (<10) and that the class name and field names are encodable
* as ASCII.
*/
PyObject * clazz = NULL;
PyObject * name = NULL;
PyObject * _fields = PyObject_GetAttrString(val, "_fields");
PyObject * field = NULL;
PyObject * ascii_str = NULL;
Py_ssize_t i, n, j, flen;
char * buf = NULL;
int ret;
clazz = PyObject_GetAttrString(val, "__class__");
if (clazz == NULL)
return -1;
name = PyObject_GetAttrString(clazz, "__name__");
Py_DECREF(clazz);
if (name == NULL)
return -1;
ascii_str = PyUnicode_AsEncodedString(name, "ascii", "ignore");
Py_DECREF(name);
if (ascii_str == NULL)
return -1;
ret = PyBytes_AsStringAndSize(ascii_str, &buf, &flen);
if (ret == -1)
return -1;
for(j = 0; j < flen; j++) {
TRY(string_writer_put_char, w, buf[j]);
}
Py_DECREF(ascii_str);
if (_fields == NULL)
return -1;
n = PyTuple_GET_SIZE(val);
TRY(string_writer_put_char, w, OP_START_TUPLE);
for (i = 0; i < n; i++) {
field = PyTuple_GET_ITEM(_fields, i);
if (field == NULL)
return -1;
ascii_str = PyUnicode_AsEncodedString(field, "ascii", "ignore");
if (ascii_str == NULL)
return -1;
ret = PyBytes_AsStringAndSize(ascii_str, &buf, &flen);
if (ret == -1)
return -1;
for(j = 0; j < flen; j++) {
TRY(string_writer_put_char, w, buf[j]);
}
Py_DECREF(ascii_str);
TRY(compute_fingerprint, w, PyTuple_GET_ITEM(val, i));
}
TRY(string_writer_put_char, w, OP_END_TUPLE);
Py_DECREF(_fields);
return 0;
}
}
if (PyBytes_Check(val))
return string_writer_put_char(w, OP_BYTES);
if (PyByteArray_Check(val))
return string_writer_put_char(w, OP_BYTEARRAY);
if ((PyObject *) Py_TYPE(val) == omittedarg_type) {
PyObject *default_val = PyObject_GetAttr(val, str_value);
if (default_val == NULL)
return -1;
TRY(string_writer_put_char, w, OP_OMITTED);
TRY(compute_fingerprint, w, default_val);
Py_DECREF(default_val);
return 0;
}
if (PyArray_IsScalar(val, Generic)) {
/* Note: PyArray_DescrFromScalar() may be a bit slow on
non-trivial types. */
PyArray_Descr *descr = PyArray_DescrFromScalar(val);
if (descr == NULL)
return -1;
TRY(string_writer_put_char, w, OP_NP_SCALAR);
TRY(compute_dtype_fingerprint, w, descr);
Py_DECREF(descr);
return 0;
}
if (PyArray_Check(val)) {
PyArrayObject *ary = (PyArrayObject *) val;
int ndim = PyArray_NDIM(ary);
TRY(string_writer_put_char, w, OP_NP_ARRAY);
TRY(string_writer_put_int32, w, ndim);
if (PyArray_IS_C_CONTIGUOUS(ary))
TRY(string_writer_put_char, w, 'C');
else if (PyArray_IS_F_CONTIGUOUS(ary))
TRY(string_writer_put_char, w, 'F');
else
TRY(string_writer_put_char, w, 'A');
if (PyArray_ISWRITEABLE(ary))
TRY(string_writer_put_char, w, 'W');
else
TRY(string_writer_put_char, w, 'R');
return compute_dtype_fingerprint(w, PyArray_DESCR(ary));
}
if (PyList_Check(val)) {
Py_ssize_t n = PyList_GET_SIZE(val);
if (n == 0) {
PyErr_SetString(PyExc_ValueError,
"cannot compute fingerprint of empty list");
return -1;
}
/* Only the first item is considered, as in typeof.py */
TRY(string_writer_put_char, w, OP_LIST);
TRY(compute_fingerprint, w, PyList_GET_ITEM(val, 0));
return 0;
}
/* Note we only accept sets, not frozensets */
if (Py_TYPE(val) == &PySet_Type) {
Py_hash_t h;
PyObject *item;
Py_ssize_t pos = 0;
/* Only one item is considered, as in typeof.py */
if (!_PySet_NextEntry(val, &pos, &item, &h)) {
/* Empty set */
PyErr_SetString(PyExc_ValueError,
"cannot compute fingerprint of empty set");
return -1;
}
TRY(string_writer_put_char, w, OP_SET);
TRY(compute_fingerprint, w, item);
return 0;
}
if (PyObject_CheckBuffer(val)) {
Py_buffer buf;
int flags = PyBUF_ND | PyBUF_STRIDES | PyBUF_FORMAT;
char contig;
int ndim;
char readonly;
/* Attempt to get a writable buffer, then fallback on read-only */
if (PyObject_GetBuffer(val, &buf, flags | PyBUF_WRITABLE)) {
PyErr_Clear();
if (PyObject_GetBuffer(val, &buf, flags))
goto _unrecognized;
}
if (PyBuffer_IsContiguous(&buf, 'C'))
contig = 'C';
else if (PyBuffer_IsContiguous(&buf, 'F'))
contig = 'F';
else
contig = 'A';
ndim = buf.ndim;
readonly = buf.readonly ? 'R' : 'W';
if (string_writer_put_char(w, OP_BUFFER) ||
string_writer_put_int32(w, ndim) ||
string_writer_put_char(w, contig) ||
string_writer_put_char(w, readonly) ||
string_writer_put_string(w, buf.format) ||
/* We serialize the object's Python type as well, to
distinguish between types which have Numba specializations
(e.g. array.array() vs. memoryview)
*/
string_writer_put_intp(w, (npy_intp) Py_TYPE(val))) {
PyBuffer_Release(&buf);
return -1;
}
PyBuffer_Release(&buf);
return 0;
}
if (NUMBA_PyArray_DescrCheck(val)) {
TRY(string_writer_put_char, w, OP_NP_DTYPE);
return compute_dtype_fingerprint(w, (PyArray_Descr *) val);
}
_unrecognized:
/* Type not recognized */
return fingerprint_unrecognized();
}
PyObject *
typeof_compute_fingerprint(PyObject *val)
{
PyObject *res;
string_writer_t w;
string_writer_init(&w);
if (compute_fingerprint(&w, val))
goto error;
res = PyBytes_FromStringAndSize(w.buf, w.n);
string_writer_clear(&w);
return res;
error:
string_writer_clear(&w);
return NULL;
}
/*
* Getting the typecode from a Type object.
*/
static int
_typecode_from_type_object(PyObject *tyobj) {
int typecode;
PyObject *tmpcode = PyObject_GetAttrString(tyobj, "_code");
if (tmpcode == NULL) {
return -1;
}
typecode = PyLong_AsLong(tmpcode);
Py_DECREF(tmpcode);
return typecode;
}
/* When we want to cache the type's typecode for later lookup, we need to
keep a reference to the returned type object so that it cannot be
deleted. This is because of the following events occurring when first
using a @jit function for a given set of types:
1. typecode_fallback requests a new typecode for an arbitrary Python value;
this implies creating a Numba type object (on the first dispatcher call);
the typecode cache is then populated.
2. matching of the typecode list in _dispatcherimpl.cpp fails, since the
typecode is new.
3. we have to compile: compile_and_invoke() is called, it will invoke
Dispatcher_Insert to register the new signature.
The reference to the Numba type object returned in step 1 is deleted as
soon as we call Py_DECREF() on it, since we are holding the only
reference. If this happens and we use the typecode we got to populate the
cache, then the cache won't ever return the correct typecode, and the
dispatcher will never successfully match the typecodes with those of
some already-compiled instance. So we need to make sure that we don't
call Py_DECREF() on objects whose typecode will be used to populate the
cache. This is ensured by calling _typecode_fallback with
retain_reference == 0.
Note that technically we are leaking the reference, since we do not continue
to hold a pointer to the type object that we get back from typeof_pyval.
However, we don't need to refer to it again, we just need to make sure that
it is never deleted.
*/
static int
_typecode_fallback(PyObject *dispatcher, PyObject *val,
int retain_reference) {
PyObject *numba_type;
int typecode;
/*
* For values that define "_numba_type_", which holds a numba Type
* instance that should be used as the type of the value.
* Note this is done here, not in typeof_typecode(), so that
* some values can still benefit from fingerprint caching.
*/
if (PyObject_HasAttr(val, str_numba_type)) {
numba_type = PyObject_GetAttrString(val, "_numba_type_");
if (!numba_type)
return -1;
}
else {
// Go back to the interpreter
numba_type = PyObject_CallMethodObjArgs((PyObject *) dispatcher,
str_typeof_pyval, val, NULL);
}
if (!numba_type)
return -1;
typecode = _typecode_from_type_object(numba_type);
if (!retain_reference)
Py_DECREF(numba_type);
return typecode;
}
/* Variations on _typecode_fallback for convenience */
static
int typecode_fallback(PyObject *dispatcher, PyObject *val) {
return _typecode_fallback(dispatcher, val, 0);
}
static
int typecode_fallback_keep_ref(PyObject *dispatcher, PyObject *val) {
return _typecode_fallback(dispatcher, val, 1);
}
/* A cache mapping fingerprints (string_writer_t *) to typecodes (int). */
static _Numba_hashtable_t *fingerprint_hashtable = NULL;
static Py_uhash_t
hash_writer(const void *key)
{
string_writer_t *writer = (string_writer_t *) key;
Py_uhash_t x = 0;
/* The old FNV algorithm used by Python 2 */
if (writer->n > 0) {
unsigned char *p = (unsigned char *) writer->buf;
Py_ssize_t len = writer->n;
x ^= *p << 7;
while (--len >= 0)
x = (1000003*x) ^ *p++;
x ^= writer->n;
if (x == (Py_uhash_t) -1)
x = -2;
}
return x;
}
static int
compare_writer(const void *key, const _Numba_hashtable_entry_t *entry)
{
string_writer_t *v = (string_writer_t *) key;
string_writer_t *w = (string_writer_t *) entry->key;
if (v->n != w->n)
return 0;
return memcmp(v->buf, w->buf, v->n) == 0;
}
/* Try to compute *val*'s typecode using its fingerprint and the
* fingerprint->typecode cache.
*/
static int
typecode_using_fingerprint(PyObject *dispatcher, PyObject *val)
{
int typecode;
string_writer_t w;
string_writer_init(&w);
if (compute_fingerprint(&w, val)) {
string_writer_clear(&w);
if (PyErr_ExceptionMatches(PyExc_NotImplementedError)) {
/* Can't compute a type fingerprint for the given value,
fall back on typeof() without caching. */
PyErr_Clear();
return typecode_fallback(dispatcher, val);
}
return -1;
}
if (_Numba_HASHTABLE_GET(fingerprint_hashtable, &w, typecode) > 0) {
/* Cache hit */
string_writer_clear(&w);
return typecode;
}
/* Not found in cache: invoke pure Python typeof() and cache result.
* Note we have to keep the type alive forever as explained
* above in _typecode_fallback().
*/
typecode = typecode_fallback_keep_ref(dispatcher, val);
if (typecode >= 0) {
string_writer_t *key = (string_writer_t *) malloc(sizeof(string_writer_t));
if (key == NULL) {
string_writer_clear(&w);
PyErr_NoMemory();
return -1;
}
/* Ownership of the string writer's buffer will be transferred
* to the hash table.
*/
string_writer_move(key, &w);
if (_Numba_HASHTABLE_SET(fingerprint_hashtable, key, typecode)) {
string_writer_clear(&w);
PyErr_NoMemory();
return -1;
}
}
return typecode;
}
/*
* Direct lookup table for extra-fast typecode resolution of simple array types.
*/
#define N_DTYPES 12
#define N_NDIM 5 /* Fast path for up to 5D array */
#define N_LAYOUT 3
static int cached_arycode[N_NDIM][N_LAYOUT][N_DTYPES];
/* Convert a Numpy dtype number to an internal index into cached_arycode.
The returned value must also be a valid index into BASIC_TYPECODES. */
static int dtype_num_to_typecode(int type_num) {
int dtype;
switch(type_num) {
case NPY_INT8:
dtype = 0;
break;
case NPY_INT16:
dtype = 1;
break;
case NPY_INT32:
dtype = 2;
break;
case NPY_INT64:
dtype = 3;
break;
case NPY_UINT8:
dtype = 4;
break;
case NPY_UINT16:
dtype = 5;
break;
case NPY_UINT32:
dtype = 6;
break;
case NPY_UINT64:
dtype = 7;
break;
case NPY_FLOAT32:
dtype = 8;
break;
case NPY_FLOAT64:
dtype = 9;
break;
case NPY_COMPLEX64:
dtype = 10;
break;
case NPY_COMPLEX128:
dtype = 11;
break;
default:
/* Type not included in the global lookup table */
dtype = -1;
}
return dtype;
}
static
int get_cached_typecode(PyArray_Descr* descr) {
PyObject* tmpobject = PyDict_GetItem(typecache, (PyObject*)descr);
if (tmpobject == NULL)
return -1;
return PyLong_AsLong(tmpobject);
}
static
void cache_typecode(PyArray_Descr* descr, int typecode) {
PyObject* value = PyLong_FromLong(typecode);
PyDict_SetItem(typecache, (PyObject*)descr, value);
Py_DECREF(value);
}
static
PyObject* ndarray_key(int ndim, int layout, PyArray_Descr* descr) {
PyObject* tmpndim = PyLong_FromLong(ndim);
PyObject* tmplayout = PyLong_FromLong(layout);
PyObject* key = PyTuple_Pack(3, tmpndim, tmplayout, descr);
Py_DECREF(tmpndim);
Py_DECREF(tmplayout);
return key;
}
static
int get_cached_ndarray_typecode(int ndim, int layout, PyArray_Descr* descr) {
PyObject* key = ndarray_key(ndim, layout, descr);
PyObject *tmpobject = PyDict_GetItem(ndarray_typecache, key);
if (tmpobject == NULL)
return -1;
Py_DECREF(key);
return PyLong_AsLong(tmpobject);
}
static
void cache_ndarray_typecode(int ndim, int layout, PyArray_Descr* descr,
int typecode) {
PyObject* key = ndarray_key(ndim, layout, descr);
PyObject* value = PyLong_FromLong(typecode);
PyDict_SetItem(ndarray_typecache, key, value);
Py_DECREF(key);
Py_DECREF(value);
}
static
int typecode_ndarray(PyObject *dispatcher, PyArrayObject *ary) {
int typecode;
int dtype;
int ndim = PyArray_NDIM(ary);
int layout = 0;
/* The order in which we check for the right contiguous-ness is important.
The order must match the order by numba.numpy_support.map_layout.
Further, only *contiguous-ness* is checked, not alignment, byte order or
write permissions.
*/
if (PyArray_IS_C_CONTIGUOUS(ary)){
layout = 1;
} else if (PyArray_IS_F_CONTIGUOUS(ary)) {
layout = 2;
}
/* the typecode cache by convention is for "behaved" arrays (aligned and
* writeable), all others must be forced to the fall back */
if (!PyArray_ISBEHAVED(ary)) goto FALLBACK;
if (ndim <= 0 || ndim > N_NDIM) goto FALLBACK;
dtype = dtype_num_to_typecode(PyArray_TYPE(ary));
if (dtype == -1) goto FALLBACK;
/* Fast path, using direct table lookup */
assert(layout < N_LAYOUT);
assert(ndim <= N_NDIM);
assert(dtype < N_DTYPES);
typecode = cached_arycode[ndim - 1][layout][dtype];
if (typecode == -1) {
/* First use of this table entry, so it requires populating */
typecode = typecode_fallback_keep_ref(dispatcher, (PyObject*)ary);
cached_arycode[ndim - 1][layout][dtype] = typecode;
}
return typecode;
FALLBACK:
/* Slower path, for non-trivial array types */
/* If this isn't a structured array then we can't use the cache */
if (PyArray_TYPE(ary) != NPY_VOID)
return typecode_using_fingerprint(dispatcher, (PyObject *) ary);
/* Check type cache */
typecode = get_cached_ndarray_typecode(ndim, layout, PyArray_DESCR(ary));
if (typecode == -1) {
/* First use of this type, use fallback and populate the cache */
typecode = typecode_fallback_keep_ref(dispatcher, (PyObject*)ary);
cache_ndarray_typecode(ndim, layout, PyArray_DESCR(ary), typecode);
}
return typecode;
}
static
int typecode_arrayscalar(PyObject *dispatcher, PyObject* aryscalar) {
int typecode;
PyArray_Descr *descr;
descr = PyArray_DescrFromScalar(aryscalar);
if (!descr)
return typecode_using_fingerprint(dispatcher, aryscalar);
/* Is it a structured scalar? */
if (descr->type_num == NPY_VOID) {
typecode = get_cached_typecode(descr);
if (typecode == -1) {
/* Resolve through fallback then populate cache */
typecode = typecode_fallback_keep_ref(dispatcher, aryscalar);
cache_typecode(descr, typecode);
}
Py_DECREF(descr);
return typecode;
}
/* Is it one of the well-known basic types? */
typecode = dtype_num_to_typecode(descr->type_num);
Py_DECREF(descr);
if (typecode == -1)
return typecode_using_fingerprint(dispatcher, aryscalar);
return BASIC_TYPECODES[typecode];
}
static
int typecode_devicendarray(PyObject *dispatcher, PyObject *ary)
{
int typecode;
int dtype;
int ndim;
int layout = 0;
PyObject *ndim_obj = nullptr;
PyObject *num_obj = nullptr;
PyObject *dtype_obj = nullptr;
int dtype_num = 0;
PyObject* flags = PyObject_GetAttrString(ary, "flags");
if (flags == NULL)
{
PyErr_Clear();
goto FALLBACK;
}
if (PyDict_GetItemString(flags, "C_CONTIGUOUS") == Py_True) {
layout = 1;
} else if (PyDict_GetItemString(flags, "F_CONTIGUOUS") == Py_True) {
layout = 2;
}
Py_DECREF(flags);
ndim_obj = PyObject_GetAttrString(ary, "ndim");
if (ndim_obj == NULL) {
/* If there's no ndim, try to proceed by clearing the error and using the
* fallback. */
PyErr_Clear();
goto FALLBACK;
}
ndim = PyLong_AsLong(ndim_obj);
Py_DECREF(ndim_obj);
if (PyErr_Occurred()) {
/* ndim wasn't an integer for some reason - unlikely to happen, but try
* the fallback. */
PyErr_Clear();
goto FALLBACK;
}
if (ndim <= 0 || ndim > N_NDIM)
goto FALLBACK;
dtype_obj = PyObject_GetAttrString(ary, "dtype");
if (dtype_obj == NULL) {
/* No dtype: try the fallback. */
PyErr_Clear();
goto FALLBACK;
}
num_obj = PyObject_GetAttrString(dtype_obj, "num");
Py_DECREF(dtype_obj);
if (num_obj == NULL) {
/* This strange dtype has no num - try the fallback. */
PyErr_Clear();
goto FALLBACK;
}
dtype_num = PyLong_AsLong(num_obj);
Py_DECREF(num_obj);
if (PyErr_Occurred()) {
/* num wasn't an integer for some reason - unlikely to happen, but try
* the fallback. */
PyErr_Clear();
goto FALLBACK;
}
dtype = dtype_num_to_typecode(dtype_num);
if (dtype == -1) {
/* Not a dtype we have in the global lookup table. */
goto FALLBACK;
}
/* Fast path, using direct table lookup */
assert(layout < N_LAYOUT);
assert(ndim <= N_NDIM);
assert(dtype < N_DTYPES);
typecode = cached_arycode[ndim - 1][layout][dtype];
if (typecode == -1) {
/* First use of this table entry, so it requires populating */
typecode = typecode_fallback_keep_ref(dispatcher, (PyObject*)ary);
cached_arycode[ndim - 1][layout][dtype] = typecode;
}
return typecode;
FALLBACK:
/* Slower path, for non-trivial array types. At present this always uses
the fingerprinting to get the typecode. Future optimization might
implement a cache, but this would require some fast equivalent of
PyArray_DESCR for a device array. */
return typecode_using_fingerprint(dispatcher, (PyObject *) ary);
}
extern "C" int
typeof_typecode(PyObject *dispatcher, PyObject *val)
{
PyTypeObject *tyobj = Py_TYPE(val);
int subtype_attr;
/* This needs to be kept in sync with Dispatcher.typeof_pyval(),
* otherwise funny things may happen.
*/
if (tyobj == &PyInt_Type || tyobj == &PyLong_Type) {
#if SIZEOF_VOID_P < 8
/* On 32-bit platforms, choose between tc_intp (32-bit) and tc_int64 */
PY_LONG_LONG ll = PyLong_AsLongLong(val);
if (ll == -1 && PyErr_Occurred()) {
/* The integer is too large, let us truncate it */
PyErr_Clear();
return tc_int64;
}
if ((ll & 0xffffffff) != ll)
return tc_int64;
#endif
return tc_intp;
}
else if (tyobj == &PyFloat_Type)
return tc_float64;
else if (tyobj == &PyComplex_Type)
return tc_complex128;
/* Array scalar handling */
else if (PyArray_CheckScalar(val)) {
return typecode_arrayscalar(dispatcher, val);
}
/* Array handling */
else if (tyobj == &PyArray_Type) {
return typecode_ndarray(dispatcher, (PyArrayObject*)val);
}
/* Subtype of CUDA device array */
else if (PyType_IsSubtype(tyobj, &DeviceArrayType)) {
return typecode_devicendarray(dispatcher, val);
}
/* Subtypes of Array handling */
else if (PyType_IsSubtype(tyobj, &PyArray_Type)) {
/* By default, Numba will treat all numpy.ndarray subtypes as if they
were the base numpy.ndarray type. In this way, ndarray subtypes
can easily use all of the support that Numba has for ndarray
methods.
EXPERIMENTAL: There may be cases where a programmer would NOT want
ndarray subtypes to be treated exactly like the base numpy.ndarray.
For this purpose, a currently experimental feature allows a
programmer to add an attribute named
__numba_array_subtype_dispatch__ to their ndarray subtype. This
attribute can have any value as Numba only checks for the presence
of the attribute and not its value. When present, a ndarray subtype
will NOT be typed by Numba as a regular ndarray but this code will
fallthrough to the typecode_using_fingerprint call, which will
create a new unique Numba typecode for this ndarray subtype. This
behavior has several significant effects. First, since this
ndarray subtype will be treated as a different type by Numba,
the Numba dispatcher would then specialize on this type. So, if
there was a function that had several parameters that were
expected to be either numpy.ndarray or a subtype of ndarray, then
Numba would compile a custom version of this function for each
combination of base and subtypes that were actually passed to the
function. Second, because this subtype would now be treated as
a totally separate type, it will cease to function in Numba unless
an implementation of that type is provided to Numba through the
Numba type extension mechanisms (e.g., overload). This would
typically start with defining a Numba type corresponding to the
ndarray subtype. This is the same concept as how Numba has a
corollary of numpy.ndarray in its type system as types.Array.
Next, one would typically defining boxing and unboxing routines
and the associated memory model. Then, overloads for NumPy
functions on that type would be created. However,
if the same default array memory model is used then there are tricks
one can do to look at Numba's internal types.Array registries and
to quickly apply those to the subtype as well. In this manner,
only those cases where the base ndarray and the ndarray subtype
behavior differ would new custom functions need to be written for
the subtype. Finally,
after adding support for the new type, you would have a separate
ndarray subtype that could operate with other objects of the same
subtype but would not support interoperation with regular NumPy
ndarrays. In standard Python, this interoperation is provided
through the __array_ufunc__ magic method in the ndarray subtype
class and in that case the function operates on ndarrays or their
subtypes. This idea is extended into Numba such that
__array_ufunc__ can be present in a Numba array type object.
In this case, this function is consulted during Numba typing and
so the arguments to __array_ufunc__ are Numba types instead of
ndarray subtypes. The array type __array_ufunc__ returns the
type of the output of the given ufunc.
*/
subtype_attr = PyObject_HasAttrString(val, "__numba_array_subtype_dispatch__");
if (!subtype_attr) {
return typecode_ndarray(dispatcher, (PyArrayObject*)val);
}
}
return typecode_using_fingerprint(dispatcher, val);
}
static
void* wrap_import_array(void) {
import_array(); /* import array returns NULL on failure */
return (void*)1;
}
static
int init_numpy(void) {
return wrap_import_array() != NULL;
}
/*
* typeof_init(omittedarg_type, typecode_dict)
* (called from dispatcher.py to fill in missing information)
*/
extern "C" PyObject *
typeof_init(PyObject *self, PyObject *args)
{
PyObject *tmpobj;
PyObject *dict;
int index = 0;
if (!PyArg_ParseTuple(args, "O!O!:typeof_init",
&PyType_Type, &omittedarg_type,
&PyDict_Type, &dict))
return NULL;
/* Initialize Numpy API */
if ( ! init_numpy() ) {
return NULL;
}
#define UNWRAP_TYPE(S) \
if(!(tmpobj = PyDict_GetItemString(dict, #S))) return NULL; \
else { tc_##S = PyLong_AsLong(tmpobj); \
BASIC_TYPECODES[index++] = tc_##S; }
UNWRAP_TYPE(int8)
UNWRAP_TYPE(int16)
UNWRAP_TYPE(int32)
UNWRAP_TYPE(int64)
UNWRAP_TYPE(uint8)
UNWRAP_TYPE(uint16)
UNWRAP_TYPE(uint32)
UNWRAP_TYPE(uint64)
UNWRAP_TYPE(float32)
UNWRAP_TYPE(float64)
UNWRAP_TYPE(complex64)
UNWRAP_TYPE(complex128)
switch(sizeof(void*)) {
case 4:
tc_intp = tc_int32;
break;
case 8:
tc_intp = tc_int64;
break;
default:
PyErr_SetString(PyExc_AssertionError, "sizeof(void*) != {4, 8}");
return NULL;
}
#undef UNWRAP_TYPE
typecache = PyDict_New();
ndarray_typecache = PyDict_New();
structured_dtypes = PyDict_New();
if (typecache == NULL || ndarray_typecache == NULL ||
structured_dtypes == NULL) {
PyErr_SetString(PyExc_RuntimeError, "failed to create type cache");
return NULL;
}
fingerprint_hashtable = _Numba_hashtable_new(sizeof(int),
hash_writer,
compare_writer);
if (fingerprint_hashtable == NULL) {
PyErr_NoMemory();
return NULL;
}
/* initialize cached_arycode to all ones (in bits) */
memset(cached_arycode, 0xFF, sizeof(cached_arycode));
str_typeof_pyval = PyString_InternFromString("typeof_pyval");
str_value = PyString_InternFromString("value");
str_numba_type = PyString_InternFromString("_numba_type_");
if (!str_value || !str_typeof_pyval || !str_numba_type)
return NULL;
Py_RETURN_NONE;
}
#ifndef NUMBA_TYPEOF_H_
#define NUMBA_TYPEOF_H_
#ifdef __cplusplus
extern "C" {
#endif
extern PyObject *typeof_init(PyObject *self, PyObject *args);
extern int typeof_typecode(PyObject *dispatcher, PyObject *val);
extern PyObject *typeof_compute_fingerprint(PyObject *val);
#ifdef __cplusplus
}
#endif
#endif /* NUMBA_TYPEOF_H_ */
This source diff could not be displayed because it is too large. You can view the blob instead.
# This file helps to compute a version number in source trees obtained from
# git-archive tarball (such as those provided by githubs download-from-tag
# feature). Distribution tarballs (built by setup.py sdist) and build
# directories (produced by setup.py build) will contain a much shorter file
# that just contains the computed version number.
# This file is released into the public domain.
# Generated by versioneer-0.28
# https://github.com/python-versioneer/python-versioneer
"""Git implementation of _version.py."""
import errno
import os
import re
import subprocess
import sys
from typing import Callable, Dict
import functools
def get_keywords():
"""Get the keywords needed to look up the version information."""
# these strings will be replaced by git during git-archive.
# setup.py/versioneer.py will grep for the variable names, so they must
# each be defined on a line of their own. _version.py will just call
# get_keywords().
git_refnames = " (tag: 0.58.1, release0.58)"
git_full = "d4460feb8c91213e7b89f97b632d19e34a776cd3"
git_date = "2023-10-16 15:33:43 +0200"
keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
return keywords
class VersioneerConfig:
"""Container for Versioneer configuration parameters."""
def get_config():
"""Create, populate and return the VersioneerConfig() object."""
# these strings are filled in when 'setup.py versioneer' creates
# _version.py
cfg = VersioneerConfig()
cfg.VCS = "git"
cfg.style = "pep440"
cfg.tag_prefix = ""
cfg.parentdir_prefix = "numba-"
cfg.versionfile_source = "numba/_version.py"
cfg.verbose = False
return cfg
class NotThisMethod(Exception):
"""Exception raised if a method is not valid for the current scenario."""
LONG_VERSION_PY: Dict[str, str] = {}
HANDLERS: Dict[str, Dict[str, Callable]] = {}
def register_vcs_handler(vcs, method): # decorator
"""Create decorator to mark a method as the handler of a VCS."""
def decorate(f):
"""Store f in HANDLERS[vcs][method]."""
if vcs not in HANDLERS:
HANDLERS[vcs] = {}
HANDLERS[vcs][method] = f
return f
return decorate
def run_command(commands, args, cwd=None, verbose=False, hide_stderr=False,
env=None):
"""Call the given command(s)."""
assert isinstance(commands, list)
process = None
popen_kwargs = {}
if sys.platform == "win32":
# This hides the console window if pythonw.exe is used
startupinfo = subprocess.STARTUPINFO()
startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
popen_kwargs["startupinfo"] = startupinfo
for command in commands:
try:
dispcmd = str([command] + args)
# remember shell=False, so use git.cmd on windows, not just git
process = subprocess.Popen([command] + args, cwd=cwd, env=env,
stdout=subprocess.PIPE,
stderr=(subprocess.PIPE if hide_stderr
else None), **popen_kwargs)
break
except OSError:
e = sys.exc_info()[1]
if e.errno == errno.ENOENT:
continue
if verbose:
print("unable to run %s" % dispcmd)
print(e)
return None, None
else:
if verbose:
print("unable to find command, tried %s" % (commands,))
return None, None
stdout = process.communicate()[0].strip().decode()
if process.returncode != 0:
if verbose:
print("unable to run %s (error)" % dispcmd)
print("stdout was %s" % stdout)
return None, process.returncode
return stdout, process.returncode
def versions_from_parentdir(parentdir_prefix, root, verbose):
"""Try to determine the version from the parent directory name.
Source tarballs conventionally unpack into a directory that includes both
the project name and a version string. We will also support searching up
two directory levels for an appropriately named parent directory
"""
rootdirs = []
for _ in range(3):
dirname = os.path.basename(root)
if dirname.startswith(parentdir_prefix):
return {"version": dirname[len(parentdir_prefix):],
"full-revisionid": None,
"dirty": False, "error": None, "date": None}
rootdirs.append(root)
root = os.path.dirname(root) # up a level
if verbose:
print("Tried directories %s but none started with prefix %s" %
(str(rootdirs), parentdir_prefix))
raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
@register_vcs_handler("git", "get_keywords")
def git_get_keywords(versionfile_abs):
"""Extract version information from the given file."""
# the code embedded in _version.py can just fetch the value of these
# keywords. When used from setup.py, we don't want to import _version.py,
# so we do it with a regexp instead. This function is not used from
# _version.py.
keywords = {}
try:
with open(versionfile_abs, "r") as fobj:
for line in fobj:
if line.strip().startswith("git_refnames ="):
mo = re.search(r'=\s*"(.*)"', line)
if mo:
keywords["refnames"] = mo.group(1)
if line.strip().startswith("git_full ="):
mo = re.search(r'=\s*"(.*)"', line)
if mo:
keywords["full"] = mo.group(1)
if line.strip().startswith("git_date ="):
mo = re.search(r'=\s*"(.*)"', line)
if mo:
keywords["date"] = mo.group(1)
except OSError:
pass
return keywords
@register_vcs_handler("git", "keywords")
def git_versions_from_keywords(keywords, tag_prefix, verbose):
"""Get version information from git keywords."""
if "refnames" not in keywords:
raise NotThisMethod("Short version file found")
date = keywords.get("date")
if date is not None:
# Use only the last line. Previous lines may contain GPG signature
# information.
date = date.splitlines()[-1]
# git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
# datestamp. However we prefer "%ci" (which expands to an "ISO-8601
# -like" string, which we must then edit to make compliant), because
# it's been around since git-1.5.3, and it's too difficult to
# discover which version we're using, or to work around using an
# older one.
date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
refnames = keywords["refnames"].strip()
if refnames.startswith("$Format"):
if verbose:
print("keywords are unexpanded, not using")
raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
refs = {r.strip() for r in refnames.strip("()").split(",")}
# starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
# just "foo-1.0". If we see a "tag: " prefix, prefer those.
TAG = "tag: "
tags = {r[len(TAG):] for r in refs if r.startswith(TAG)}
if not tags:
# Either we're using git < 1.8.3, or there really are no tags. We use
# a heuristic: assume all version tags have a digit. The old git %d
# expansion behaves like git log --decorate=short and strips out the
# refs/heads/ and refs/tags/ prefixes that would let us distinguish
# between branches and tags. By ignoring refnames without digits, we
# filter out many common branch names like "release" and
# "stabilization", as well as "HEAD" and "master".
tags = {r for r in refs if re.search(r'\d', r)}
if verbose:
print("discarding '%s', no digits" % ",".join(refs - tags))
if verbose:
print("likely tags: %s" % ",".join(sorted(tags)))
for ref in sorted(tags):
# sorting will prefer e.g. "2.0" over "2.0rc1"
if ref.startswith(tag_prefix):
r = ref[len(tag_prefix):]
# Filter out refs that exactly match prefix or that don't start
# with a number once the prefix is stripped (mostly a concern
# when prefix is '')
if not re.match(r'\d', r):
continue
if verbose:
print("picking %s" % r)
return {"version": r,
"full-revisionid": keywords["full"].strip(),
"dirty": False, "error": None,
"date": date}
# no suitable tags, so version is "0+unknown", but full hex is still there
if verbose:
print("no suitable tags, using unknown + full revision id")
return {"version": "0+unknown",
"full-revisionid": keywords["full"].strip(),
"dirty": False, "error": "no suitable tags", "date": None}
@register_vcs_handler("git", "pieces_from_vcs")
def git_pieces_from_vcs(tag_prefix, root, verbose, runner=run_command):
"""Get version from 'git describe' in the root of the source tree.
This only gets called if the git-archive 'subst' keywords were *not*
expanded, and _version.py hasn't already been rewritten with a short
version string, meaning we're inside a checked out source tree.
"""
GITS = ["git"]
if sys.platform == "win32":
GITS = ["git.cmd", "git.exe"]
# GIT_DIR can interfere with correct operation of Versioneer.
# It may be intended to be passed to the Versioneer-versioned project,
# but that should not change where we get our version from.
env = os.environ.copy()
env.pop("GIT_DIR", None)
runner = functools.partial(runner, env=env)
_, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root,
hide_stderr=not verbose)
if rc != 0:
if verbose:
print("Directory %s not under git control" % root)
raise NotThisMethod("'git rev-parse --git-dir' returned error")
# if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
# if there isn't one, this yields HEX[-dirty] (no NUM)
describe_out, rc = runner(GITS, [
"describe", "--tags", "--dirty", "--always", "--long",
"--match", f"{tag_prefix}[[:digit:]]*"
], cwd=root)
# --long was added in git-1.5.5
if describe_out is None:
raise NotThisMethod("'git describe' failed")
describe_out = describe_out.strip()
full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
if full_out is None:
raise NotThisMethod("'git rev-parse' failed")
full_out = full_out.strip()
pieces = {}
pieces["long"] = full_out
pieces["short"] = full_out[:7] # maybe improved later
pieces["error"] = None
branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"],
cwd=root)
# --abbrev-ref was added in git-1.6.3
if rc != 0 or branch_name is None:
raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
branch_name = branch_name.strip()
if branch_name == "HEAD":
# If we aren't exactly on a branch, pick a branch which represents
# the current commit. If all else fails, we are on a branchless
# commit.
branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
# --contains was added in git-1.5.4
if rc != 0 or branches is None:
raise NotThisMethod("'git branch --contains' returned error")
branches = branches.split("\n")
# Remove the first line if we're running detached
if "(" in branches[0]:
branches.pop(0)
# Strip off the leading "* " from the list of branches.
branches = [branch[2:] for branch in branches]
if "master" in branches:
branch_name = "master"
elif not branches:
branch_name = None
else:
# Pick the first branch that is returned. Good or bad.
branch_name = branches[0]
pieces["branch"] = branch_name
# parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
# TAG might have hyphens.
git_describe = describe_out
# look for -dirty suffix
dirty = git_describe.endswith("-dirty")
pieces["dirty"] = dirty
if dirty:
git_describe = git_describe[:git_describe.rindex("-dirty")]
# now we have TAG-NUM-gHEX or HEX
if "-" in git_describe:
# TAG-NUM-gHEX
mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe)
if not mo:
# unparsable. Maybe git-describe is misbehaving?
pieces["error"] = ("unable to parse git-describe output: '%s'"
% describe_out)
return pieces
# tag
full_tag = mo.group(1)
if not full_tag.startswith(tag_prefix):
if verbose:
fmt = "tag '%s' doesn't start with prefix '%s'"
print(fmt % (full_tag, tag_prefix))
pieces["error"] = ("tag '%s' doesn't start with prefix '%s'"
% (full_tag, tag_prefix))
return pieces
pieces["closest-tag"] = full_tag[len(tag_prefix):]
# distance: number of commits since tag
pieces["distance"] = int(mo.group(2))
# commit: short hex revision ID
pieces["short"] = mo.group(3)
else:
# HEX: no tags
pieces["closest-tag"] = None
out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root)
pieces["distance"] = len(out.split()) # total number of commits
# commit date: see ISO-8601 comment in git_versions_from_keywords()
date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
# Use only the last line. Previous lines may contain GPG signature
# information.
date = date.splitlines()[-1]
pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
return pieces
def plus_or_dot(pieces):
"""Return a + if we don't already have one, else return a ."""
if "+" in pieces.get("closest-tag", ""):
return "."
return "+"
def render_pep440(pieces):
"""Build up version string, with post-release "local version identifier".
Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
Exceptions:
1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"] or pieces["dirty"]:
rendered += plus_or_dot(pieces)
rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
if pieces["dirty"]:
rendered += ".dirty"
else:
# exception #1
rendered = "0+untagged.%d.g%s" % (pieces["distance"],
pieces["short"])
if pieces["dirty"]:
rendered += ".dirty"
return rendered
def render_pep440_branch(pieces):
"""TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
The ".dev0" means not master branch. Note that .dev0 sorts backwards
(a feature branch will appear "older" than the master branch).
Exceptions:
1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"] or pieces["dirty"]:
if pieces["branch"] != "master":
rendered += ".dev0"
rendered += plus_or_dot(pieces)
rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
if pieces["dirty"]:
rendered += ".dirty"
else:
# exception #1
rendered = "0"
if pieces["branch"] != "master":
rendered += ".dev0"
rendered += "+untagged.%d.g%s" % (pieces["distance"],
pieces["short"])
if pieces["dirty"]:
rendered += ".dirty"
return rendered
def pep440_split_post(ver):
"""Split pep440 version string at the post-release segment.
Returns the release segments before the post-release and the
post-release version number (or -1 if no post-release segment is present).
"""
vc = str.split(ver, ".post")
return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
def render_pep440_pre(pieces):
"""TAG[.postN.devDISTANCE] -- No -dirty.
Exceptions:
1: no tags. 0.post0.devDISTANCE
"""
if pieces["closest-tag"]:
if pieces["distance"]:
# update the post release segment
tag_version, post_version = pep440_split_post(pieces["closest-tag"])
rendered = tag_version
if post_version is not None:
rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"])
else:
rendered += ".post0.dev%d" % (pieces["distance"])
else:
# no commits, use the tag as the version
rendered = pieces["closest-tag"]
else:
# exception #1
rendered = "0.post0.dev%d" % pieces["distance"]
return rendered
def render_pep440_post(pieces):
"""TAG[.postDISTANCE[.dev0]+gHEX] .
The ".dev0" means dirty. Note that .dev0 sorts backwards
(a dirty tree will appear "older" than the corresponding clean one),
but you shouldn't be releasing software with -dirty anyways.
Exceptions:
1: no tags. 0.postDISTANCE[.dev0]
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"] or pieces["dirty"]:
rendered += ".post%d" % pieces["distance"]
if pieces["dirty"]:
rendered += ".dev0"
rendered += plus_or_dot(pieces)
rendered += "g%s" % pieces["short"]
else:
# exception #1
rendered = "0.post%d" % pieces["distance"]
if pieces["dirty"]:
rendered += ".dev0"
rendered += "+g%s" % pieces["short"]
return rendered
def render_pep440_post_branch(pieces):
"""TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
The ".dev0" means not master branch.
Exceptions:
1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"] or pieces["dirty"]:
rendered += ".post%d" % pieces["distance"]
if pieces["branch"] != "master":
rendered += ".dev0"
rendered += plus_or_dot(pieces)
rendered += "g%s" % pieces["short"]
if pieces["dirty"]:
rendered += ".dirty"
else:
# exception #1
rendered = "0.post%d" % pieces["distance"]
if pieces["branch"] != "master":
rendered += ".dev0"
rendered += "+g%s" % pieces["short"]
if pieces["dirty"]:
rendered += ".dirty"
return rendered
def render_pep440_old(pieces):
"""TAG[.postDISTANCE[.dev0]] .
The ".dev0" means dirty.
Exceptions:
1: no tags. 0.postDISTANCE[.dev0]
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"] or pieces["dirty"]:
rendered += ".post%d" % pieces["distance"]
if pieces["dirty"]:
rendered += ".dev0"
else:
# exception #1
rendered = "0.post%d" % pieces["distance"]
if pieces["dirty"]:
rendered += ".dev0"
return rendered
def render_git_describe(pieces):
"""TAG[-DISTANCE-gHEX][-dirty].
Like 'git describe --tags --dirty --always'.
Exceptions:
1: no tags. HEX[-dirty] (note: no 'g' prefix)
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
if pieces["distance"]:
rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
else:
# exception #1
rendered = pieces["short"]
if pieces["dirty"]:
rendered += "-dirty"
return rendered
def render_git_describe_long(pieces):
"""TAG-DISTANCE-gHEX[-dirty].
Like 'git describe --tags --dirty --always -long'.
The distance/hash is unconditional.
Exceptions:
1: no tags. HEX[-dirty] (note: no 'g' prefix)
"""
if pieces["closest-tag"]:
rendered = pieces["closest-tag"]
rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
else:
# exception #1
rendered = pieces["short"]
if pieces["dirty"]:
rendered += "-dirty"
return rendered
def render(pieces, style):
"""Render the given version pieces into the requested style."""
if pieces["error"]:
return {"version": "unknown",
"full-revisionid": pieces.get("long"),
"dirty": None,
"error": pieces["error"],
"date": None}
if not style or style == "default":
style = "pep440" # the default
if style == "pep440":
rendered = render_pep440(pieces)
elif style == "pep440-branch":
rendered = render_pep440_branch(pieces)
elif style == "pep440-pre":
rendered = render_pep440_pre(pieces)
elif style == "pep440-post":
rendered = render_pep440_post(pieces)
elif style == "pep440-post-branch":
rendered = render_pep440_post_branch(pieces)
elif style == "pep440-old":
rendered = render_pep440_old(pieces)
elif style == "git-describe":
rendered = render_git_describe(pieces)
elif style == "git-describe-long":
rendered = render_git_describe_long(pieces)
else:
raise ValueError("unknown style '%s'" % style)
return {"version": rendered, "full-revisionid": pieces["long"],
"dirty": pieces["dirty"], "error": None,
"date": pieces.get("date")}
def get_versions():
"""Get version information or return default if unable to do so."""
# I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
# __file__, we can work backwards from there to the root. Some
# py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
# case we can only use expanded keywords.
cfg = get_config()
verbose = cfg.verbose
try:
return git_versions_from_keywords(get_keywords(), cfg.tag_prefix,
verbose)
except NotThisMethod:
pass
try:
root = os.path.realpath(__file__)
# versionfile_source is the relative path from the top of the source
# tree (where the .git directory might live) to this file. Invert
# this to find the root from __file__.
for _ in cfg.versionfile_source.split('/'):
root = os.path.dirname(root)
except NameError:
return {"version": "0+unknown", "full-revisionid": None,
"dirty": None,
"error": "unable to find root of source tree",
"date": None}
try:
pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
return render(pieces, cfg.style)
except NotThisMethod:
pass
try:
if cfg.parentdir_prefix:
return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
except NotThisMethod:
pass
return {"version": "0+unknown", "full-revisionid": None,
"dirty": None,
"error": "unable to compute version", "date": None}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment