Commit deb763b7 authored by root's avatar root
Browse files

clone code from github

parent 93bf084b
Pipeline #3386 canceled with stages
@inproceedings{cupy_learningsys2017,
author = "Okuta, Ryosuke and Unno, Yuya and Nishino, Daisuke and Hido, Shohei and Loomis, Crissman",
title = "CuPy: A NumPy-Compatible Library for NVIDIA GPU Calculations",
booktitle = "Proceedings of Workshop on Machine Learning Systems (LearningSys) in The Thirty-first Annual Conference on Neural Information Processing Systems (NIPS)",
year = "2017",
url = "http://learningsys.org/nips17/assets/papers/paper_16.pdf"
}
# CuPy Code of Conduct
CuPy follows the [NumFOCUS Code of Conduct][homepage] available at https://numfocus.org/code-of-conduct.
Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at `dlfw@preferred.jp`.
[homepage]: https://numfocus.org/
Copyright (c) 2015 Preferred Infrastructure, Inc.
Copyright (c) 2015 Preferred Networks, Inc.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
# Contents of sdist. See also `setup.py`.
recursive-include cupy *.h *.hpp
recursive-include cupy *.pyx *.pxd *.pxi
recursive-include cupy_backends *.h *.hpp
recursive-include cupy_backends *.pyx *.pxd *.pxi
# Fail-safe to avoid including Cythoinzed sources in sdist.
recursive-exclude cupy *.cpp
recursive-exclude cupy_backends *.cpp
# Installers
recursive-include install *.py
recursive-include tests *.py
# Licenses
include LICENSE
include docs/LICENSE_THIRD_PARTY
include docs/source/license.rst
comment: false
github_checks:
annotations: false
coverage:
status:
# Disable coverage measurement for overall codebase.
project: off
# Enable coverage measurement for diff introduced in the pull-request,
# but do not mark "X" on commit status for now.
patch:
default:
target: '0%'
This diff is collapsed.
# Functions from the following NumPy document
# https://numpy.org/doc/stable/reference/routines.bitwise.html
from cupy import _core
bitwise_and = _core.bitwise_and
bitwise_or = _core.bitwise_or
bitwise_xor = _core.bitwise_xor
bitwise_not = _core.invert
invert = _core.invert
left_shift = _core.left_shift
right_shift = _core.right_shift
import cupy
from cupy import _core
_packbits_kernel = {
'big': _core.ElementwiseKernel(
'raw T a, raw int32 a_size', 'uint8 packed',
'''for (int j = 0; j < 8; ++j) {
int k = i * 8 + j;
int bit = k < a_size && a[k] != 0;
packed |= bit << (7 - j);
}''',
'cupy_packbits_big'
),
'little': _core.ElementwiseKernel(
'raw T a, raw int32 a_size', 'uint8 packed',
'''for (int j = 0; j < 8; ++j) {
int k = i * 8 + j;
int bit = k < a_size && a[k] != 0;
packed |= bit << j;
}''',
'cupy_packbits_little'
)
}
def packbits(a, axis=None, bitorder='big'):
"""Packs the elements of a binary-valued array into bits in a uint8 array.
This function currently does not support ``axis`` option.
Args:
a (cupy.ndarray): Input array.
axis (int, optional): Not supported yet.
bitorder (str, optional): bit order to use when packing the array,
allowed values are `'little'` and `'big'`. Defaults to `'big'`.
Returns:
cupy.ndarray: The packed array.
.. note::
When the input array is empty, this function returns a copy of it,
i.e., the type of the output array is not necessarily always uint8.
This exactly follows the NumPy's behaviour (as of version 1.11),
alghough this is inconsistent to the documentation.
.. seealso:: :func:`numpy.packbits`
"""
if a.dtype.kind not in 'biu':
raise TypeError(
'Expected an input array of integer or boolean data type')
if axis is not None:
raise NotImplementedError('axis option is not supported yet')
if bitorder not in ('big', 'little'):
raise ValueError("bitorder must be either 'big' or 'little'")
a = a.ravel()
packed_size = (a.size + 7) // 8
packed = cupy.zeros((packed_size,), dtype=cupy.uint8)
return _packbits_kernel[bitorder](a, a.size, packed)
_unpackbits_kernel = {
'big': _core.ElementwiseKernel(
'raw uint8 a', 'T unpacked',
'unpacked = (a[i / 8] >> (7 - i % 8)) & 1;',
'cupy_unpackbits_big'
),
'little': _core.ElementwiseKernel(
'raw uint8 a', 'T unpacked',
'unpacked = (a[i / 8] >> (i % 8)) & 1;',
'cupy_unpackbits_little'
)
}
def unpackbits(a, axis=None, bitorder='big'):
"""Unpacks elements of a uint8 array into a binary-valued output array.
This function currently does not support ``axis`` option.
Args:
a (cupy.ndarray): Input array.
bitorder (str, optional): bit order to use when unpacking the array,
allowed values are `'little'` and `'big'`. Defaults to `'big'`.
Returns:
cupy.ndarray: The unpacked array.
.. seealso:: :func:`numpy.unpackbits`
"""
if a.dtype != cupy.uint8:
raise TypeError('Expected an input array of unsigned byte data type')
if axis is not None:
raise NotImplementedError('axis option is not supported yet')
if bitorder not in ('big', 'little'):
raise ValueError("bitorder must be either 'big' or 'little'")
unpacked = cupy.ndarray((a.size * 8), dtype=cupy.uint8)
return _unpackbits_kernel[bitorder](a, unpacked)
# mypy: ignore-errors
from cupy._core import core # NOQA
from cupy._core import fusion # NOQA
from cupy._core import internal # NOQA
# internal APIs for testing and developement
from cupy._core._accelerator import set_elementwise_accelerators # NOQA
from cupy._core._accelerator import set_reduction_accelerators # NOQA
from cupy._core._accelerator import set_routine_accelerators # NOQA
from cupy._core._accelerator import get_elementwise_accelerators # NOQA
from cupy._core._accelerator import get_reduction_accelerators # NOQA
from cupy._core._accelerator import get_routine_accelerators # NOQA
# import class and function
from cupy._core._kernel import create_ufunc # NOQA
from cupy._core._kernel import ElementwiseKernel # NOQA
from cupy._core._kernel import ufunc # NOQA
from cupy._core._kernel import _get_warpsize # NOQA
from cupy._core._reduction import create_reduction_func # NOQA
from cupy._core._reduction import ReductionKernel # NOQA
from cupy._core._routines_binary import bitwise_and # NOQA
from cupy._core._routines_binary import bitwise_or # NOQA
from cupy._core._routines_binary import bitwise_xor # NOQA
from cupy._core._routines_binary import invert # NOQA
from cupy._core._routines_binary import left_shift # NOQA
from cupy._core._routines_binary import right_shift # NOQA
from cupy._core._routines_linalg import _mat_ptrs # NOQA
from cupy._core._routines_linalg import dot # NOQA
from cupy._core._routines_linalg import get_compute_type # NOQA
from cupy._core._routines_linalg import matmul # NOQA
from cupy._core._routines_linalg import set_compute_type # NOQA
from cupy._core._routines_linalg import tensordot_core # NOQA
from cupy._core._routines_logic import create_comparison # NOQA
from cupy._core._routines_logic import equal # NOQA
from cupy._core._routines_logic import greater # NOQA
from cupy._core._routines_logic import greater_equal # NOQA
from cupy._core._routines_logic import less # NOQA
from cupy._core._routines_logic import less_equal # NOQA
from cupy._core._routines_logic import not_equal # NOQA
from cupy._core._routines_manipulation import array_split # NOQA
from cupy._core._routines_manipulation import broadcast # NOQA
from cupy._core._routines_manipulation import broadcast_to # NOQA
from cupy._core._routines_manipulation import concatenate_method # NOQA
from cupy._core._routines_manipulation import moveaxis # NOQA
from cupy._core._routines_manipulation import rollaxis # NOQA
from cupy._core._routines_manipulation import size # NOQA'
from cupy._core._routines_math import absolute # NOQA
from cupy._core._routines_math import add # NOQA
from cupy._core._routines_math import angle, angle_deg # NOQA
from cupy._core._routines_math import conjugate # NOQA
from cupy._core._routines_math import divide # NOQA
from cupy._core._routines_math import floor_divide # NOQA
from cupy._core._routines_math import multiply # NOQA
from cupy._core._routines_math import negative # NOQA
from cupy._core._routines_math import positive # NOQA
from cupy._core._routines_math import power # NOQA
from cupy._core._routines_math import remainder # NOQA
from cupy._core._routines_math import sqrt # NOQA
from cupy._core._routines_math import subtract # NOQA
from cupy._core._routines_math import true_divide # NOQA
from cupy._core._routines_statistics import nanmax # NOQA
from cupy._core._routines_statistics import nanmin # NOQA
from cupy._core.core import _internal_ascontiguousarray # NOQA
from cupy._core.core import _internal_asfortranarray # NOQA
from cupy._core.core import array # NOQA
from cupy._core.core import ascontiguousarray # NOQA
from cupy._core.core import asfortranarray # NOQA
from cupy._core.core import divmod # NOQA
from cupy._core.core import elementwise_copy # NOQA
from cupy._core.core import ndarray # NOQA
from cupy._core.dlpack import fromDlpack # NOQA
from cupy._core.dlpack import from_dlpack # NOQA
from cupy._core.internal import complete_slice # NOQA
from cupy._core.internal import get_size # NOQA
from cupy._core.raw import RawKernel # NOQA
from cupy._core.raw import RawModule # NOQA
cdef list _elementwise_accelerators
cdef list _reduction_accelerators
cdef list _routine_accelerators
cpdef enum accelerator_type:
ACCELERATOR_CUB = 1
ACCELERATOR_CUTENSOR = 2
ACCELERATOR_CUTENSORNET = 3
import os
from cupy_backends.cuda.api cimport runtime
cdef list _elementwise_accelerators = []
cdef list _reduction_accelerators = []
cdef list _routine_accelerators = []
cdef int _get_accelerator(accelerator) except -1:
if isinstance(accelerator, int):
return accelerator
if accelerator == 'cub':
return ACCELERATOR_CUB
if accelerator == 'cutensor':
return ACCELERATOR_CUTENSOR
if accelerator == 'cutensornet':
return ACCELERATOR_CUTENSORNET
raise ValueError('Unknown accelerator: {}'.format(accelerator))
def set_elementwise_accelerators(accelerators):
global _elementwise_accelerators
_elementwise_accelerators = [_get_accelerator(b) for b in accelerators]
def set_reduction_accelerators(accelerators):
global _reduction_accelerators
_reduction_accelerators = [_get_accelerator(b) for b in accelerators]
def set_routine_accelerators(accelerators):
global _routine_accelerators
_routine_accelerators = [_get_accelerator(b) for b in accelerators]
def get_elementwise_accelerators():
return _elementwise_accelerators
def get_reduction_accelerators():
return _reduction_accelerators
def get_routine_accelerators():
return _routine_accelerators
cdef _set_default_accelerators():
cdef str b, accelerator_names = os.getenv(
'CUPY_ACCELERATORS', '' if runtime._is_hip_environment else 'cub')
cdef list accelerators = [b for b in accelerator_names.split(',') if b]
set_elementwise_accelerators(accelerators)
set_reduction_accelerators(accelerators)
set_routine_accelerators(accelerators)
_set_default_accelerators()
cimport cython # NOQA
from libcpp cimport vector
from cupy.cuda cimport function
ctypedef vector.vector[Py_ssize_t] shape_t
ctypedef vector.vector[Py_ssize_t] strides_t
# this matches NPY_MAXDIMS
# Note: we make it an enum to work around cython/cython#4369
cdef enum: MAX_NDIM = 32
cdef struct _CArray:
void* data
Py_ssize_t size
Py_ssize_t shape_and_strides[MAX_NDIM * 2]
@cython.final
cdef class CArray(function.CPointer):
cdef:
_CArray val
cdef void init(
self, void* data_ptr, Py_ssize_t data_size,
const shape_t& shape, const strides_t& strides) except*
cdef struct _CIndexer:
Py_ssize_t size
Py_ssize_t shape_and_index[MAX_NDIM * 2]
cdef class CIndexer(function.CPointer):
cdef:
_CIndexer val
cdef void init(self, Py_ssize_t size, const shape_t &shape) except*
cdef class Indexer:
cdef:
readonly Py_ssize_t size
readonly shape_t shape
readonly bint _index_32_bits
cdef void init(self, const shape_t& shape)
cdef function.CPointer get_pointer(self)
cdef Indexer _indexer_init(const shape_t& shape)
from cupy.cuda cimport function
from cupy._core cimport internal
cdef class CArray(function.CPointer):
cdef void init(
self, void* data_ptr, Py_ssize_t data_size,
const shape_t& shape, const strides_t& strides) except*:
cdef size_t ndim = shape.size()
assert ndim == strides.size()
assert ndim <= MAX_NDIM
cdef Py_ssize_t* shape_and_strides = (
self.val.shape_and_strides)
cdef size_t i
self.val.data = data_ptr
self.val.size = data_size
for i in range(ndim):
shape_and_strides[i] = shape[i]
shape_and_strides[i + ndim] = strides[i]
self.ptr = <void*>&self.val
cdef class CIndexer(function.CPointer):
cdef void init(self, Py_ssize_t size, const shape_t &shape) except*:
cdef size_t ndim = shape.size()
assert ndim <= MAX_NDIM
self.val.size = size
cdef Py_ssize_t i
for i in range(<Py_ssize_t>shape.size()):
self.val.shape_and_index[i] = shape[i]
self.ptr = <void*>&self.val
cdef class Indexer:
cdef void init(self, const shape_t& shape):
self.shape = shape
self.size = internal.prod(shape)
self._index_32_bits = self.size <= (1 << 31)
@property
def ndim(self):
return self.shape.size()
cdef function.CPointer get_pointer(self):
cdef CIndexer indexer = CIndexer.__new__(CIndexer)
indexer.init(self.size, self.shape)
return indexer
cdef inline Indexer _indexer_init(const shape_t& shape):
cdef Indexer indexer = Indexer.__new__(Indexer)
indexer.init(shape)
return indexer
from typing import Any, List
_CodeType = Any # TODO(asi1024): Correct type annotation
class CodeBlock:
"""Code fragment for the readable format.
"""
def __init__(self, head: str, codes: _CodeType) -> None:
self._head = '' if head == '' else head + ' '
self._codes = codes
def _to_str_list(self, indent_width: int = 0) -> List[str]:
codes: List[str] = []
codes.append(' ' * indent_width + self._head + '{')
for code in self._codes:
next_indent_width = indent_width + 2
if isinstance(code, str):
codes.append(' ' * next_indent_width + code)
elif isinstance(code, CodeBlock):
codes += code._to_str_list(indent_width=next_indent_width)
else:
assert False
codes.append(' ' * indent_width + '}')
return codes
def __str__(self) -> str:
"""Emit CUDA program like the following format.
<<head>> {
<<begin codes>>
...;
<<end codes>>
}
"""
return '\n'.join(self._to_str_list())
from cupy._core._carray cimport shape_t
from cupy._core._kernel cimport _TypeMap
from cupy._core.core cimport _ndarray_base
cdef bint _try_to_call_cub_reduction(
self, list in_args, list out_args, const shape_t& a_shape,
stream, optimize_context, tuple key,
map_expr, reduce_expr, post_map_expr,
reduce_type, _TypeMap type_map,
tuple reduce_axis, tuple out_axis, const shape_t& out_shape,
_ndarray_base ret) except *
This diff is collapsed.
cpdef get_dtype(t)
cpdef tuple get_dtype_with_itemsize(t)
cpdef int to_cuda_dtype(dtype, bint is_half_allowed=*) except -1
cpdef void _raise_if_invalid_cast(
from_dt,
to_dt,
str casting,
argname=*
) except *
cimport cython # NOQA
import numpy
import warnings
from cupy_backends.cuda.api cimport runtime
all_type_chars = '?bhilqBHILQefdFD'
# for c in '?bhilqBHILQefdFD':
# print('#', c, '...', np.dtype(c).name)
# ? ... bool
# b ... int8
# h ... int16
# i ... int32
# l ... int64 (int32 in windows)
# q ... int64
# B ... uint8
# H ... uint16
# I ... uint32
# L ... uint64 (uint32 in windows)
# Q ... uint64
# e ... float16
# f ... float32
# d ... float64
# F ... complex64
# D ... complex128
cdef dict _dtype_dict = {}
cdef _dtype = numpy.dtype
cdef _init_dtype_dict():
for i in (int, float, bool, complex, None):
dtype = _dtype(i)
_dtype_dict[i] = (dtype, dtype.itemsize)
for i in all_type_chars:
dtype = _dtype(i)
item = (dtype, dtype.itemsize)
_dtype_dict[i] = item
_dtype_dict[dtype.type] = item
for i in {str(_dtype(i)) for i in all_type_chars}:
dtype = _dtype(i)
_dtype_dict[i] = (dtype, dtype.itemsize)
_init_dtype_dict()
@cython.profile(False)
cpdef get_dtype(t):
ret = _dtype_dict.get(t, None)
if ret is None:
return _dtype(t)
return ret[0]
@cython.profile(False)
cpdef tuple get_dtype_with_itemsize(t):
ret = _dtype_dict.get(t, None)
if ret is None:
t = _dtype(t)
return t, t.itemsize
return ret
cpdef int to_cuda_dtype(dtype, bint is_half_allowed=False) except -1:
cdef str dtype_char
try:
dtype_char = dtype.char
except AttributeError:
dtype_char = dtype
if dtype_char == 'e' and is_half_allowed:
return runtime.CUDA_R_16F
elif dtype_char == 'f':
return runtime.CUDA_R_32F
elif dtype_char == 'd':
return runtime.CUDA_R_64F
elif dtype_char == 'F':
return runtime.CUDA_C_32F
elif dtype_char == 'D':
return runtime.CUDA_C_64F
elif dtype_char == 'E' and is_half_allowed:
# complex32, not supported in NumPy
return runtime.CUDA_C_16F
else:
raise TypeError('dtype is not supported: {}'.format(dtype))
cdef _numpy_can_cast = numpy.can_cast
cpdef void _raise_if_invalid_cast(
from_dt, to_dt, str casting, argname="array data"
) except *:
"""Raise an error if a cast is not valid. Also checks whether the cast
goes from complex to real and warns if it does.
The error raised can be customized by giving `obj`. May pass a (lambda)
function to avoid string construction on success.
This function exists mainly to build a similar error everywhere.
"""
if from_dt is to_dt:
return
to_dt = get_dtype(to_dt) # may still be a type not a dtype instance
if casting == "same_kind" and from_dt.kind == to_dt.kind:
# same-kind is the most common casting used and for NumPy dtypes.
return
if _numpy_can_cast(from_dt, to_dt, casting):
if casting == "unsafe" and from_dt.kind == "c" and to_dt.kind in "iuf":
# Complex warning, we are dropping the imagine part:
warnings.warn(
'Casting complex values to real discards the imaginary part',
numpy.ComplexWarning)
return
# Casting is not possible, raise the error
if not isinstance(argname, str):
argname = argname()
raise TypeError(
f'Cannot cast {argname} from {from_dt!r} to {to_dt!r} '
f'according to the rule \'{casting}\'')
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment