Unverified Commit 9a05446f authored by PanZezhong1725's avatar PanZezhong1725 Committed by GitHub
Browse files

issue/461 InfiniCore 推理运行时


Co-authored-by: default avatarJiacheng Huang <huangjiacheng0709@outlook.com>
Co-authored-by: default avatarwooway777 <wooway777@gmail.com>
parent 37411f6d
from . import _infinicore
class device:
def __init__(self, type=None, index=None):
if type is None:
type = "cpu"
if isinstance(type, device):
self.type = type.type
self.index = type.index
return
if ":" in type:
if index is not None:
raise ValueError(
'`index` should not be provided when `type` contains `":"`.'
)
type, index = type.split(":")
index = int(index)
self.type = type
self.index = index
_type, _index = device._to_infinicore_device(type, index if index else 0)
self._underlying = _infinicore.Device(_type, _index)
def __repr__(self):
return f"device(type='{self.type}'{f', index={self.index}' if self.index is not None else ''})"
def __str__(self):
return f"{self.type}{f':{self.index}' if self.index is not None else ''}"
@staticmethod
def _to_infinicore_device(type, index):
all_device_types = tuple(_infinicore.Device.Type.__members__.values())[:-1]
all_device_count = tuple(
_infinicore.get_device_count(device) for device in all_device_types
)
torch_devices = {
torch_type: {
infinicore_type: 0
for infinicore_type in all_device_types
if _TORCH_DEVICE_MAP[infinicore_type] == torch_type
}
for torch_type in _TORCH_DEVICE_MAP.values()
}
for i, count in enumerate(all_device_count):
infinicore_device_type = _infinicore.Device.Type(i)
torch_devices[_TORCH_DEVICE_MAP[infinicore_device_type]][
infinicore_device_type
] += count
for infinicore_device_type, infinicore_device_count in torch_devices[
type
].items():
for i in range(infinicore_device_count):
if index == 0:
return infinicore_device_type, i
index -= 1
_TORCH_DEVICE_MAP = {
_infinicore.Device.Type.CPU: "cpu",
_infinicore.Device.Type.NVIDIA: "cuda",
_infinicore.Device.Type.CAMBRICON: "mlu",
_infinicore.Device.Type.ASCEND: "npu",
_infinicore.Device.Type.METAX: "cuda",
_infinicore.Device.Type.MOORE: "musa",
_infinicore.Device.Type.ILUVATAR: "cuda",
_infinicore.Device.Type.KUNLUN: "cuda",
_infinicore.Device.Type.SUGON: "cuda",
}
from . import _infinicore
class dtype:
def __init__(self, data_type):
"""An internal method. Please do not use this directly."""
self._underlying = data_type
def __repr__(self):
repr_map = {
_infinicore.DataType.BYTE: "uint8",
_infinicore.DataType.BOOL: "bool",
_infinicore.DataType.I8: "int8",
_infinicore.DataType.I16: "int16",
_infinicore.DataType.I32: "int32",
_infinicore.DataType.I64: "int64",
_infinicore.DataType.U8: "uint8",
_infinicore.DataType.U16: "uint16",
_infinicore.DataType.U32: "uint32",
_infinicore.DataType.U64: "uint64",
_infinicore.DataType.F8: "float8",
_infinicore.DataType.F16: "float16",
_infinicore.DataType.F32: "float32",
_infinicore.DataType.F64: "float64",
_infinicore.DataType.C16: "complex16",
_infinicore.DataType.C32: "complex32",
_infinicore.DataType.C64: "complex64",
_infinicore.DataType.C128: "complex128",
_infinicore.DataType.BF16: "bfloat16",
}
return f"infinicore.{repr_map[self._underlying]}"
float32 = dtype(_infinicore.DataType.F32)
float = float32
float64 = dtype(_infinicore.DataType.F64)
double = float64
complex32 = dtype(_infinicore.DataType.C32)
chalf = complex32
complex64 = dtype(_infinicore.DataType.C64)
cfloat = complex64
complex128 = dtype(_infinicore.DataType.C128)
cdouble = complex128
float16 = dtype(_infinicore.DataType.F16)
half = float16
bfloat16 = dtype(_infinicore.DataType.BF16)
uint8 = dtype(_infinicore.DataType.U8)
int8 = dtype(_infinicore.DataType.I8)
int16 = dtype(_infinicore.DataType.I16)
short = int16
int32 = dtype(_infinicore.DataType.I32)
int = int32
int64 = dtype(_infinicore.DataType.I64)
long = int64
bool = dtype(_infinicore.DataType.BOOL)
from infinicore.tensor import Tensor
from .. import _infinicore
def matmul(input, other, *, out=None):
if out is None:
return Tensor(_infinicore.matmul(input._underlying, other._underlying))
_infinicore.matmul_(out._underlying, input._underlying, other._underlying)
from infinicore.tensor import Tensor
from .. import _infinicore
def rearrange(input, other, *, out=None):
if out is None:
return Tensor(_infinicore.rearrange(input._underlying))
_infinicore.rearrange_(out._underlying, input._underlying)
from . import _infinicore
class Tensor:
def __init__(self, tensor):
"""An internal method. Please do not use this directly."""
self._underlying = tensor
@property
def shape(self):
return self._underlying.shape
@property
def dtype(self):
return self._underlying.dtype
@property
def device(self):
return self._underlying.device
@property
def ndim(self):
return self._underlying.ndim
def data_ptr(self):
return self._underlying.data_ptr
def size(self, dim=None):
if dim is None:
return self.shape
return self.shape[dim]
def stride(self, dim=None):
if dim is None:
return self._underlying.strides
return self._underlying.strides[dim]
def numel(self):
return self._underlying.numel()
def is_contiguous(self):
return self._underlying.is_contiguous()
def is_is_pinned(self):
return self._underlying.is_is_pinned()
def copy_(self, src):
return Tensor(self._underlying.copy_(src._underlying))
def to(self, *args, **kwargs):
return Tensor(
self._underlying.to(*tuple(arg._underlying for arg in args), **kwargs)
)
def as_strided(self, size, stride):
Tensor(self._underlying.as_strided(size, stride))
def contiguous(self):
return Tensor(self._underlying.contiguous())
def permute(self, dims):
return Tensor(self._underlying.permute(dims))
def view(self, shape):
return Tensor(self._underlying.view(shape))
def empty(size, *, dtype=None, device=None, pin_memory=False):
return Tensor(
_infinicore.empty(size, dtype._underlying, device._underlying, pin_memory)
)
def strided_empty(size, strides, *, dtype=None, device=None, pin_memory=False):
return Tensor(
_infinicore.strided_empty(
size, strides, dtype._underlying, device._underlying, pin_memory
)
)
def zeros(size, *, dtype=None, device=None, pin_memory=False):
return Tensor(
_infinicore.zeros(size, dtype._underlying, device._underlying, pin_memory)
)
def ones(size, *, dtype=None, device=None, pin_memory=False):
return Tensor(
_infinicore.ones(size, dtype._underlying, device._underlying, pin_memory)
)
def from_blob(data_ptr, size, *, dtype=None, device=None):
return Tensor(
_infinicore.from_blob(data_ptr, size, dtype._underlying, device._underlying)
)
def strided_from_blob(data_ptr, size, strides, *, dtype=None, device=None):
return Tensor(
_infinicore.strided_from_blob(
data_ptr, size, strides, dtype._underlying, device._underlying
)
)
import glob
import os
import shutil
import subprocess
from pathlib import Path
from setuptools import setup
from setuptools.command.build_py import build_py
from setuptools.command.build import build
INSTALLATION_DIR = os.getenv("INFINI_ROOT", str(Path.home() / ".infini"))
LIB_DIR = os.path.join(INSTALLATION_DIR, "lib")
PACKAGE_NAME = "infinicore"
PACKAGE_DIR = os.path.join(INSTALLATION_DIR, PACKAGE_NAME)
class BuildPy(build_py):
class Build(build):
def run(self):
subprocess.run(["xmake", "build", "-y"])
subprocess.run(["xmake", "build"])
subprocess.run(["xmake", "install"])
built_lib = glob.glob(os.path.join(LIB_DIR, f"{PACKAGE_NAME}.*"))[0]
os.makedirs(PACKAGE_DIR, exist_ok=True)
self.copy_file(built_lib, PACKAGE_DIR)
subprocess.run(["xmake", "build", "-y", "_infinicore"])
subprocess.run(["xmake", "install", "_infinicore"])
installation_dir = os.getenv("INFINI_ROOT", str(Path.home() / ".infini"))
lib_dir = os.path.join(installation_dir, "lib")
lib_path = glob.glob(os.path.join(lib_dir, "_infinicore.*"))[0]
package_dir = os.path.join(self.build_lib, "infinicore")
os.makedirs(package_dir, exist_ok=True)
shutil.move(lib_path, package_dir)
setup(
cmdclass={"build_py": BuildPy},
package_dir={"": "."},
)
setup(package_dir={"": "python"}, cmdclass={"build": Build})
#include "device_caching_allocator.hpp"
#include <infinirt.h>
#include "../../utils.hpp"
namespace infinicore {
DeviceCachingAllocator::DeviceCachingAllocator(Device device) : MemoryAllocator(), device_(device) {}
std::byte *DeviceCachingAllocator::allocate(size_t size) {
void *ptr = nullptr;
INFINICORE_CHECK_ERROR(infinirtMallocAsync(&ptr, size, context::getStream()));
return (std::byte *)ptr;
}
void DeviceCachingAllocator::deallocate(std::byte *ptr) {
INFINICORE_CHECK_ERROR(infinirtFreeAsync(ptr, context::getStream()));
}
} // namespace infinicore
#pragma once
#include "memory_allocator.hpp"
#include "../context_impl.hpp"
namespace infinicore {
class DeviceCachingAllocator : public MemoryAllocator {
public:
explicit DeviceCachingAllocator(Device device);
~DeviceCachingAllocator() = default;
std::byte *allocate(size_t size) override;
void deallocate(std::byte *ptr) override;
private:
Device device_;
};
} // namespace infinicore
#include "device_pinned_allocator.hpp"
#include <infinirt.h>
#include "../../utils.hpp"
namespace infinicore {
DevicePinnedHostAllocator::DevicePinnedHostAllocator(Device device) : MemoryAllocator(), owner_(device) {}
DevicePinnedHostAllocator::~DevicePinnedHostAllocator() {
gc();
}
std::byte *DevicePinnedHostAllocator::allocate(size_t size) {
void *ptr;
INFINICORE_CHECK_ERROR(infinirtMallocHost(&ptr, size));
return (std::byte *)ptr;
}
void DevicePinnedHostAllocator::deallocate(std::byte *ptr) {
if (owner_ == context::getDevice()) {
INFINICORE_CHECK_ERROR(infinirtFreeHost(ptr));
gc();
} else {
gc_queue_.push(ptr);
}
}
void DevicePinnedHostAllocator::gc() {
while (gc_queue_.empty() == false) {
std::byte *p = gc_queue_.front();
INFINICORE_CHECK_ERROR(infinirtFreeHost(p));
gc_queue_.pop();
}
}
} // namespace infinicore
#pragma once
#include "memory_allocator.hpp"
#include "../context_impl.hpp"
#include <queue>
namespace infinicore {
class DevicePinnedHostAllocator : public MemoryAllocator {
public:
explicit DevicePinnedHostAllocator(Device device);
~DevicePinnedHostAllocator();
std::byte *allocate(size_t size) override;
void deallocate(std::byte *ptr) override;
void gc();
private:
Device owner_;
/// TODO: this is not thread-safe
std::queue<std::byte *> gc_queue_;
};
} // namespace infinicore
#include "host_allocator.hpp"
#include <infinirt.h>
namespace infinicore {
std::byte *HostAllocator::allocate(size_t size) {
return (std::byte *)std::malloc(size);
}
void HostAllocator::deallocate(std::byte *ptr) {
std::free(ptr);
}
} // namespace infinicore
#pragma once
#include "memory_allocator.hpp"
namespace infinicore {
class HostAllocator : public MemoryAllocator {
public:
HostAllocator() = default;
~HostAllocator() = default;
std::byte *allocate(size_t size) override;
void deallocate(std::byte *ptr) override;
};
} // namespace infinicore
#pragma once
#include "infinicore/memory.hpp"
#include <memory>
namespace infinicore {
class MemoryAllocator {
public:
virtual ~MemoryAllocator() = default;
virtual std::byte *allocate(size_t size) = 0;
virtual void deallocate(std::byte *ptr) = 0;
};
} // namespace infinicore
#include "context_impl.hpp"
#include "../utils.hpp"
namespace infinicore {
Runtime *ContextImpl::getCurrentRuntime() {
return current_runtime_;
}
Runtime *ContextImpl::getCpuRuntime() {
return runtime_table_[int(Device::Type::CPU)][0].get();
}
void ContextImpl::setDevice(Device device) {
if (device == getCurrentRuntime()->device()) {
// Do nothing if the device is already set.
return;
}
if (runtime_table_[int(device.getType())][device.getIndex()] == nullptr) {
// Lazy initialization of runtime if never set before.
runtime_table_[int(device.getType())][device.getIndex()] = std::unique_ptr<Runtime>(new Runtime(device));
current_runtime_ = runtime_table_[int(device.getType())][device.getIndex()].get();
} else {
current_runtime_ = runtime_table_[int(device.getType())][device.getIndex()].get()->activate();
}
}
size_t ContextImpl::getDeviceCount(Device::Type type) {
return runtime_table_[int(type)].size();
}
ContextImpl &ContextImpl::singleton() {
static ContextImpl instance;
return instance;
}
ContextImpl::ContextImpl() {
std::vector<int> device_counter(size_t(Device::Type::COUNT));
INFINICORE_CHECK_ERROR(infinirtGetAllDeviceCount(device_counter.data()));
// Reserve runtime slot for all devices.
runtime_table_[0].resize(device_counter[0]);
runtime_table_[0][0] = std::unique_ptr<Runtime>(new Runtime(Device(Device::Type::CPU, 0)));
// Context will try to use the first non-cpu available device as the default runtime.
for (int i = int(Device::Type::COUNT) - 1; i > 0; i--) {
if (device_counter[i] > 0) {
runtime_table_[i].resize(device_counter[i]);
if (current_runtime_ == nullptr) {
runtime_table_[i][0] = std::unique_ptr<Runtime>(new Runtime(Device(Device::Type(i), 0)));
current_runtime_ = runtime_table_[i][0].get();
}
}
}
if (current_runtime_ == nullptr) {
current_runtime_ = runtime_table_[0][0].get();
}
}
namespace context {
void setDevice(Device device) {
ContextImpl::singleton().setDevice(device);
}
Device getDevice() {
return ContextImpl::singleton().getCurrentRuntime()->device();
}
size_t getDeviceCount(Device::Type type) {
return ContextImpl::singleton().getDeviceCount(type);
}
infinirtStream_t getStream() {
return ContextImpl::singleton().getCurrentRuntime()->stream();
}
infiniopHandle_t getInfiniopHandle() {
return ContextImpl::singleton().getCurrentRuntime()->infiniopHandle();
}
void syncStream() {
return ContextImpl::singleton().getCurrentRuntime()->syncStream();
}
void syncDevice() {
return ContextImpl::singleton().getCurrentRuntime()->syncDevice();
}
std::shared_ptr<Memory> allocateMemory(size_t size) {
return ContextImpl::singleton().getCurrentRuntime()->allocateMemory(size);
}
std::shared_ptr<Memory> allocateHostMemory(size_t size) {
return ContextImpl::singleton().getCpuRuntime()->allocateMemory(size);
}
std::shared_ptr<Memory> allocatePinnedHostMemory(size_t size) {
return ContextImpl::singleton().getCurrentRuntime()->allocatePinnedHostMemory(size);
}
void memcpyH2D(void *dst, const void *src, size_t size) {
return ContextImpl::singleton().getCurrentRuntime()->memcpyH2D(dst, src, size);
}
void memcpyD2H(void *dst, const void *src, size_t size) {
return ContextImpl::singleton().getCurrentRuntime()->memcpyD2H(dst, src, size);
}
void memcpyD2D(void *dst, const void *src, size_t size) {
return ContextImpl::singleton().getCurrentRuntime()->memcpyD2D(dst, src, size);
}
void memcpyH2H(void *dst, const void *src, size_t size) {
return ContextImpl::singleton().getCpuRuntime()->memcpyD2D(dst, src, size);
}
} // namespace context
} // namespace infinicore
#pragma once
#include "infinicore/context/context.hpp"
#include "runtime/runtime.hpp"
#include <array>
#include <vector>
namespace infinicore {
class ContextImpl {
private:
std::array<std::vector<std::unique_ptr<Runtime>>, size_t(Device::Type::COUNT)> runtime_table_;
Runtime *current_runtime_ = nullptr;
protected:
ContextImpl();
public:
Runtime *getCurrentRuntime();
Runtime *getCpuRuntime();
void setDevice(Device);
size_t getDeviceCount(Device::Type type);
static ContextImpl &singleton();
friend class Runtime;
};
} // namespace infinicore
#include "runtime.hpp"
#include "../../utils.hpp"
#include "../allocators/device_caching_allocator.hpp"
#include "../allocators/device_pinned_allocator.hpp"
#include "../allocators/host_allocator.hpp"
namespace infinicore {
Runtime::Runtime(Device device) : device_(device) {
activate();
INFINICORE_CHECK_ERROR(infinirtStreamCreate(&stream_));
INFINICORE_CHECK_ERROR(infiniopCreateHandle(&infiniop_handle_));
if (device_.getType() == Device::Type::CPU) {
device_memory_allocator_ = std::make_unique<HostAllocator>();
} else {
device_memory_allocator_ = std::make_unique<DeviceCachingAllocator>(device);
pinned_host_memory_allocator_ = std::make_unique<DevicePinnedHostAllocator>(device);
}
}
Runtime::~Runtime() {
activate();
if (pinned_host_memory_allocator_) {
pinned_host_memory_allocator_.reset();
}
device_memory_allocator_.reset();
infiniopDestroyHandle(infiniop_handle_);
infinirtStreamDestroy(stream_);
}
Runtime *Runtime::activate() {
INFINICORE_CHECK_ERROR(infinirtSetDevice((infiniDevice_t)device_.getType(), (int)device_.getIndex()));
return this;
}
Device Runtime::device() const {
return device_;
}
infinirtStream_t Runtime::stream() const {
return stream_;
}
infiniopHandle_t Runtime::infiniopHandle() const {
return infiniop_handle_;
}
void Runtime::syncStream() {
INFINICORE_CHECK_ERROR(infinirtStreamSynchronize(stream_));
}
void Runtime::syncDevice() {
INFINICORE_CHECK_ERROR(infinirtDeviceSynchronize());
}
std::shared_ptr<Memory> Runtime::allocateMemory(size_t size) {
std::byte *data_ptr = device_memory_allocator_->allocate(size);
return std::make_shared<Memory>(
data_ptr, size, device_,
[alloc = device_memory_allocator_.get()](std::byte *p) {
alloc->deallocate(p);
});
}
std::shared_ptr<Memory> Runtime::allocatePinnedHostMemory(size_t size) {
std::byte *data_ptr = pinned_host_memory_allocator_->allocate(size);
return std::make_shared<Memory>(
data_ptr, size, device_,
[alloc = pinned_host_memory_allocator_.get()](std::byte *p) {
alloc->deallocate(p);
},
true);
}
void Runtime::memcpyH2D(void *dst, const void *src, size_t size) {
INFINICORE_CHECK_ERROR(infinirtMemcpyAsync(dst, src, size, INFINIRT_MEMCPY_H2D, stream_));
}
void Runtime::memcpyD2H(void *dst, const void *src, size_t size) {
INFINICORE_CHECK_ERROR(infinirtMemcpy(dst, src, size, INFINIRT_MEMCPY_D2H));
}
void Runtime::memcpyD2D(void *dst, const void *src, size_t size) {
INFINICORE_CHECK_ERROR(infinirtMemcpyAsync(dst, src, size, INFINIRT_MEMCPY_D2D, stream_));
}
std::string Runtime::toString() const {
return fmt::format("Runtime({})", device_.toString());
}
} // namespace infinicore
#pragma once
#include "../allocators/memory_allocator.hpp"
#include "infinicore/context/context.hpp"
#include <infiniop.h>
#include <infinirt.h>
namespace infinicore {
class ContextImpl;
class Runtime {
private:
Device device_;
infinirtStream_t stream_;
infiniopHandle_t infiniop_handle_;
std::unique_ptr<MemoryAllocator> device_memory_allocator_;
std::unique_ptr<MemoryAllocator> pinned_host_memory_allocator_;
protected:
Runtime(Device device);
public:
~Runtime();
Runtime *activate();
Device device() const;
infinirtStream_t stream() const;
infiniopHandle_t infiniopHandle() const;
void syncStream();
void syncDevice();
std::shared_ptr<Memory> allocateMemory(size_t size);
std::shared_ptr<Memory> allocatePinnedHostMemory(size_t size);
void memcpyH2D(void *dst, const void *src, size_t size);
void memcpyD2H(void *dst, const void *src, size_t size);
void memcpyD2D(void *dst, const void *src, size_t size);
std::string toString() const;
friend class ContextImpl;
};
} // namespace infinicore
#include <infinicore.hpp>
#include <map>
#include <string>
#include "infinicore.hpp"
namespace infinicore {
Device::Device(const Type &type, const Index &index) : type_{type}, index_{index} {}
const Device::Type &Device::get_type() const {
const Device::Type &Device::getType() const {
return type_;
}
const Device::Index &Device::get_index() const {
const Device::Index &Device::getIndex() const {
return index_;
}
std::string Device::to_string() const {
return to_string(type_) + ":" + std::to_string(index_);
std::string Device::toString() const {
return toString(type_) + ":" + std::to_string(index_);
}
std::string Device::to_string(const Type &type) {
std::string Device::toString(const Type &type) {
switch (type) {
case Type::cpu:
return "cpu";
case Type::cuda:
return "cuda";
case Type::meta:
return "meta";
case Type::CPU:
return "CPU";
case Type::NVIDIA:
return "NVIDIA";
case Type::CAMBRICON:
return "CAMBRICON";
case Type::ASCEND:
return "ASCEND";
case Type::METAX:
return "METAX";
case Type::MOORE:
return "MOORE";
case Type::ILUVATAR:
return "ILUVATAR";
case Type::KUNLUN:
return "KUNLUN";
case Type::SUGON:
return "SUGON";
}
// TODO: Add error handling.
return "";
}
bool Device::operator==(const Device &other) const {
return type_ == other.type_ && index_ == other.index_;
}
bool Device::operator!=(const Device &other) const {
return type_ != other.type_ || index_ != other.index_;
}
} // namespace infinicore
......@@ -2,34 +2,82 @@
namespace infinicore {
std::string to_string(const DataType &dtype) {
std::string str{"infinicore."};
std::string toString(const DataType &dtype) {
switch (dtype) {
case DataType::BYTE:
return "BYTE";
case DataType::BOOL:
return "BOOL";
case DataType::I8:
return "I8";
case DataType::I16:
return "I16";
case DataType::I32:
return "I32";
case DataType::I64:
return "I64";
case DataType::U8:
return "U8";
case DataType::U16:
return "U16";
case DataType::U32:
return "U32";
case DataType::U64:
return "U64";
case DataType::F8:
return "F8";
case DataType::F16:
return "F16";
case DataType::F32:
return "F32";
case DataType::F64:
return "F64";
case DataType::C16:
return "C16";
case DataType::C32:
return "C32";
case DataType::C64:
return "C64";
case DataType::C128:
return "C128";
case DataType::BF16:
return "BF16";
}
// TODO: Add error handling.
return "";
}
size_t dsize(const DataType &dtype) {
switch (dtype) {
case DataType::bfloat16:
str += "bfloat16";
break;
case DataType::float16:
str += "float16";
break;
case DataType::float32:
str += "float32";
break;
case DataType::float64:
str += "float64";
break;
case DataType::int32:
str += "int32";
break;
case DataType::int64:
str += "int64";
break;
case DataType::uint8:
str += "uint8";
break;
case DataType::BYTE:
case DataType::BOOL:
case DataType::F8:
case DataType::I8:
case DataType::U8:
return 1;
case DataType::I16:
case DataType::U16:
case DataType::F16:
case DataType::BF16:
case DataType::C16:
return 2;
case DataType::I32:
case DataType::U32:
case DataType::F32:
case DataType::C32:
return 4;
case DataType::I64:
case DataType::U64:
case DataType::F64:
case DataType::C64:
return 8;
case DataType::C128:
return 16;
}
return str;
// TODO: Add error handling.
return 0;
}
} // namespace infinicore
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment