issue/461 InfiniCore 推理运行时

Co-authored-by: Jiacheng Huang <huangjiacheng0709@outlook.com> Co-authored-by: wooway777 <wooway777@gmail.com>

issue/461 InfiniCore 推理运行时
Co-authored-by: Jiacheng Huang <huangjiacheng0709@outlook.com> Co-authored-by: wooway777 <wooway777@gmail.com>
9a05446f · PanZezhong1725 · GitHub · 37411f6d · 9a05446f · 9a05446f
Unverified Commit 9a05446f authored Oct 11, 2025 by PanZezhong1725 Committed by GitHub Oct 11, 2025
20 changed files
--- a/python/infinicore/device.py
+++ b/python/infinicore/device.py
+from . import _infinicore
+
+
+class device:
+    def __init__(self, type=None, index=None):
+        if type is None:
+            type = "cpu"
+
+        if isinstance(type, device):
+            self.type = type.type
+            self.index = type.index
+
+            return
+
+        if ":" in type:
+            if index is not None:
+                raise ValueError(
+                    '`index` should not be provided when `type` contains `":"`.'
+                )
+
+            type, index = type.split(":")
+            index = int(index)
+
+        self.type = type
+
+        self.index = index
+
+        _type, _index = device._to_infinicore_device(type, index if index else 0)
+
+        self._underlying = _infinicore.Device(_type, _index)
+
+    def __repr__(self):
+        return f"device(type='{self.type}'{f', index={self.index}' if self.index is not None else ''})"
+
+    def __str__(self):
+        return f"{self.type}{f':{self.index}' if self.index is not None else ''}"
+
+    @staticmethod
+    def _to_infinicore_device(type, index):
+        all_device_types = tuple(_infinicore.Device.Type.__members__.values())[:-1]
+        all_device_count = tuple(
+            _infinicore.get_device_count(device) for device in all_device_types
+        )
+
+        torch_devices = {
+            torch_type: {
+                infinicore_type: 0
+                for infinicore_type in all_device_types
+                if _TORCH_DEVICE_MAP[infinicore_type] == torch_type
+            }
+            for torch_type in _TORCH_DEVICE_MAP.values()
+        }
+
+        for i, count in enumerate(all_device_count):
+            infinicore_device_type = _infinicore.Device.Type(i)
+            torch_devices[_TORCH_DEVICE_MAP[infinicore_device_type]][
+                infinicore_device_type
+            ] += count
+
+        for infinicore_device_type, infinicore_device_count in torch_devices[
+            type
+        ].items():
+            for i in range(infinicore_device_count):
+                if index == 0:
+                    return infinicore_device_type, i
+
+                index -= 1
+
+
+_TORCH_DEVICE_MAP = {
+    _infinicore.Device.Type.CPU: "cpu",
+    _infinicore.Device.Type.NVIDIA: "cuda",
+    _infinicore.Device.Type.CAMBRICON: "mlu",
+    _infinicore.Device.Type.ASCEND: "npu",
+    _infinicore.Device.Type.METAX: "cuda",
+    _infinicore.Device.Type.MOORE: "musa",
+    _infinicore.Device.Type.ILUVATAR: "cuda",
+    _infinicore.Device.Type.KUNLUN: "cuda",
+    _infinicore.Device.Type.SUGON: "cuda",
+}
--- a/python/infinicore/dtype.py
+++ b/python/infinicore/dtype.py
+from . import _infinicore
+
+
+class dtype:
+    def __init__(self, data_type):
+        """An internal method. Please do not use this directly."""
+
+        self._underlying = data_type
+
+    def __repr__(self):
+        repr_map = {
+            _infinicore.DataType.BYTE: "uint8",
+            _infinicore.DataType.BOOL: "bool",
+            _infinicore.DataType.I8: "int8",
+            _infinicore.DataType.I16: "int16",
+            _infinicore.DataType.I32: "int32",
+            _infinicore.DataType.I64: "int64",
+            _infinicore.DataType.U8: "uint8",
+            _infinicore.DataType.U16: "uint16",
+            _infinicore.DataType.U32: "uint32",
+            _infinicore.DataType.U64: "uint64",
+            _infinicore.DataType.F8: "float8",
+            _infinicore.DataType.F16: "float16",
+            _infinicore.DataType.F32: "float32",
+            _infinicore.DataType.F64: "float64",
+            _infinicore.DataType.C16: "complex16",
+            _infinicore.DataType.C32: "complex32",
+            _infinicore.DataType.C64: "complex64",
+            _infinicore.DataType.C128: "complex128",
+            _infinicore.DataType.BF16: "bfloat16",
+        }
+
+        return f"infinicore.{repr_map[self._underlying]}"
+
+
+float32 = dtype(_infinicore.DataType.F32)
+float = float32
+float64 = dtype(_infinicore.DataType.F64)
+double = float64
+complex32 = dtype(_infinicore.DataType.C32)
+chalf = complex32
+complex64 = dtype(_infinicore.DataType.C64)
+cfloat = complex64
+complex128 = dtype(_infinicore.DataType.C128)
+cdouble = complex128
+float16 = dtype(_infinicore.DataType.F16)
+half = float16
+bfloat16 = dtype(_infinicore.DataType.BF16)
+uint8 = dtype(_infinicore.DataType.U8)
+int8 = dtype(_infinicore.DataType.I8)
+int16 = dtype(_infinicore.DataType.I16)
+short = int16
+int32 = dtype(_infinicore.DataType.I32)
+int = int32
+int64 = dtype(_infinicore.DataType.I64)
+long = int64
+bool = dtype(_infinicore.DataType.BOOL)
--- a/python/infinicore/ops/__init__.py
+++ b/python/infinicore/ops/__init__.py
--- a/python/infinicore/ops/matmul.py
+++ b/python/infinicore/ops/matmul.py
+from infinicore.tensor import Tensor
+
+from .. import _infinicore
+
+
+def matmul(input, other, *, out=None):
+    if out is None:
+        return Tensor(_infinicore.matmul(input._underlying, other._underlying))
+
+    _infinicore.matmul_(out._underlying, input._underlying, other._underlying)
--- a/python/infinicore/ops/rearrange.py
+++ b/python/infinicore/ops/rearrange.py
+from infinicore.tensor import Tensor
+
+from .. import _infinicore
+
+
+def rearrange(input, other, *, out=None):
+    if out is None:
+        return Tensor(_infinicore.rearrange(input._underlying))
+
+    _infinicore.rearrange_(out._underlying, input._underlying)
--- a/python/infinicore/tensor.py
+++ b/python/infinicore/tensor.py
+from . import _infinicore
+
+
+class Tensor:
+    def __init__(self, tensor):
+        """An internal method. Please do not use this directly."""
+
+        self._underlying = tensor
+
+    @property
+    def shape(self):
+        return self._underlying.shape
+
+    @property
+    def dtype(self):
+        return self._underlying.dtype
+
+    @property
+    def device(self):
+        return self._underlying.device
+
+    @property
+    def ndim(self):
+        return self._underlying.ndim
+
+    def data_ptr(self):
+        return self._underlying.data_ptr
+
+    def size(self, dim=None):
+        if dim is None:
+            return self.shape
+
+        return self.shape[dim]
+
+    def stride(self, dim=None):
+        if dim is None:
+            return self._underlying.strides
+
+        return self._underlying.strides[dim]
+
+    def numel(self):
+        return self._underlying.numel()
+
+    def is_contiguous(self):
+        return self._underlying.is_contiguous()
+
+    def is_is_pinned(self):
+        return self._underlying.is_is_pinned()
+
+    def copy_(self, src):
+        return Tensor(self._underlying.copy_(src._underlying))
+
+    def to(self, *args, **kwargs):
+        return Tensor(
+            self._underlying.to(*tuple(arg._underlying for arg in args), **kwargs)
+        )
+
+    def as_strided(self, size, stride):
+        Tensor(self._underlying.as_strided(size, stride))
+
+    def contiguous(self):
+        return Tensor(self._underlying.contiguous())
+
+    def permute(self, dims):
+        return Tensor(self._underlying.permute(dims))
+
+    def view(self, shape):
+        return Tensor(self._underlying.view(shape))
+
+
+def empty(size, *, dtype=None, device=None, pin_memory=False):
+    return Tensor(
+        _infinicore.empty(size, dtype._underlying, device._underlying, pin_memory)
+    )
+
+
+def strided_empty(size, strides, *, dtype=None, device=None, pin_memory=False):
+    return Tensor(
+        _infinicore.strided_empty(
+            size, strides, dtype._underlying, device._underlying, pin_memory
+        )
+    )
+
+
+def zeros(size, *, dtype=None, device=None, pin_memory=False):
+    return Tensor(
+        _infinicore.zeros(size, dtype._underlying, device._underlying, pin_memory)
+    )
+
+
+def ones(size, *, dtype=None, device=None, pin_memory=False):
+    return Tensor(
+        _infinicore.ones(size, dtype._underlying, device._underlying, pin_memory)
+    )
+
+
+def from_blob(data_ptr, size, *, dtype=None, device=None):
+    return Tensor(
+        _infinicore.from_blob(data_ptr, size, dtype._underlying, device._underlying)
+    )
+
+
+def strided_from_blob(data_ptr, size, strides, *, dtype=None, device=None):
+    return Tensor(
+        _infinicore.strided_from_blob(
+            data_ptr, size, strides, dtype._underlying, device._underlying
+        )
+    )
--- a/setup.py
+++ b/setup.py
 import glob
 import os
+import shutil
 import subprocess
 from pathlib import Path

 from setuptools import setup
-from setuptools.command.build_py import build_py
+from setuptools.command.build import build

-INSTALLATION_DIR = os.getenv("INFINI_ROOT", str(Path.home() / ".infini"))

-LIB_DIR = os.path.join(INSTALLATION_DIR, "lib")
-
-PACKAGE_NAME = "infinicore"
-
-PACKAGE_DIR = os.path.join(INSTALLATION_DIR, PACKAGE_NAME)
-
-
-class BuildPy(build_py):
+class Build(build):
    def run(self):
-        subprocess.run(["xmake", "build", "-y"])
+        subprocess.run(["xmake", "build"])
        subprocess.run(["xmake", "install"])
-        built_lib = glob.glob(os.path.join(LIB_DIR, f"{PACKAGE_NAME}.*"))[0]
-        os.makedirs(PACKAGE_DIR, exist_ok=True)
-        self.copy_file(built_lib, PACKAGE_DIR)
+        subprocess.run(["xmake", "build", "-y", "_infinicore"])
+        subprocess.run(["xmake", "install", "_infinicore"])
+
+        installation_dir = os.getenv("INFINI_ROOT", str(Path.home() / ".infini"))
+        lib_dir = os.path.join(installation_dir, "lib")
+        lib_path = glob.glob(os.path.join(lib_dir, "_infinicore.*"))[0]
+        package_dir = os.path.join(self.build_lib, "infinicore")
+        os.makedirs(package_dir, exist_ok=True)
+        shutil.move(lib_path, package_dir)


-setup(
-    cmdclass={"build_py": BuildPy},
-    package_dir={"": "."},
-)
+setup(package_dir={"": "python"}, cmdclass={"build": Build})
--- a/src/infinicore/context/allocators/device_caching_allocator.cc
+++ b/src/infinicore/context/allocators/device_caching_allocator.cc
+#include "device_caching_allocator.hpp"
+
+#include <infinirt.h>
+
+#include "../../utils.hpp"
+
+namespace infinicore {
+DeviceCachingAllocator::DeviceCachingAllocator(Device device) : MemoryAllocator(), device_(device) {}
+
+std::byte *DeviceCachingAllocator::allocate(size_t size) {
+    void *ptr = nullptr;
+    INFINICORE_CHECK_ERROR(infinirtMallocAsync(&ptr, size, context::getStream()));
+    return (std::byte *)ptr;
+}
+
+void DeviceCachingAllocator::deallocate(std::byte *ptr) {
+    INFINICORE_CHECK_ERROR(infinirtFreeAsync(ptr, context::getStream()));
+}
+} // namespace infinicore
--- a/src/infinicore/context/allocators/device_caching_allocator.hpp
+++ b/src/infinicore/context/allocators/device_caching_allocator.hpp
+#pragma once
+
+#include "memory_allocator.hpp"
+
+#include "../context_impl.hpp"
+
+namespace infinicore {
+class DeviceCachingAllocator : public MemoryAllocator {
+public:
+    explicit DeviceCachingAllocator(Device device);
+    ~DeviceCachingAllocator() = default;
+
+    std::byte *allocate(size_t size) override;
+    void deallocate(std::byte *ptr) override;
+
+private:
+    Device device_;
+};
+
+} // namespace infinicore
--- a/src/infinicore/context/allocators/device_pinned_allocator.cc
+++ b/src/infinicore/context/allocators/device_pinned_allocator.cc
+#include "device_pinned_allocator.hpp"
+
+#include <infinirt.h>
+
+#include "../../utils.hpp"
+
+namespace infinicore {
+DevicePinnedHostAllocator::DevicePinnedHostAllocator(Device device) : MemoryAllocator(), owner_(device) {}
+
+DevicePinnedHostAllocator::~DevicePinnedHostAllocator() {
+    gc();
+}
+
+std::byte *DevicePinnedHostAllocator::allocate(size_t size) {
+    void *ptr;
+    INFINICORE_CHECK_ERROR(infinirtMallocHost(&ptr, size));
+    return (std::byte *)ptr;
+}
+
+void DevicePinnedHostAllocator::deallocate(std::byte *ptr) {
+    if (owner_ == context::getDevice()) {
+        INFINICORE_CHECK_ERROR(infinirtFreeHost(ptr));
+        gc();
+    } else {
+        gc_queue_.push(ptr);
+    }
+}
+
+void DevicePinnedHostAllocator::gc() {
+    while (gc_queue_.empty() == false) {
+        std::byte *p = gc_queue_.front();
+        INFINICORE_CHECK_ERROR(infinirtFreeHost(p));
+        gc_queue_.pop();
+    }
+}
+
+} // namespace infinicore
--- a/src/infinicore/context/allocators/device_pinned_allocator.hpp
+++ b/src/infinicore/context/allocators/device_pinned_allocator.hpp
+#pragma once
+
+#include "memory_allocator.hpp"
+
+#include "../context_impl.hpp"
+
+#include <queue>
+
+namespace infinicore {
+class DevicePinnedHostAllocator : public MemoryAllocator {
+public:
+    explicit DevicePinnedHostAllocator(Device device);
+    ~DevicePinnedHostAllocator();
+
+    std::byte *allocate(size_t size) override;
+    void deallocate(std::byte *ptr) override;
+
+    void gc();
+
+private:
+    Device owner_;
+
+    /// TODO: this is not thread-safe
+    std::queue<std::byte *> gc_queue_;
+};
+
+} // namespace infinicore
--- a/src/infinicore/context/allocators/host_allocator.cc
+++ b/src/infinicore/context/allocators/host_allocator.cc
+#include "host_allocator.hpp"
+
+#include <infinirt.h>
+
+namespace infinicore {
+std::byte *HostAllocator::allocate(size_t size) {
+    return (std::byte *)std::malloc(size);
+}
+
+void HostAllocator::deallocate(std::byte *ptr) {
+    std::free(ptr);
+}
+
+} // namespace infinicore
--- a/src/infinicore/context/allocators/host_allocator.hpp
+++ b/src/infinicore/context/allocators/host_allocator.hpp
+#pragma once
+
+#include "memory_allocator.hpp"
+
+namespace infinicore {
+class HostAllocator : public MemoryAllocator {
+public:
+    HostAllocator() = default;
+    ~HostAllocator() = default;
+
+    std::byte *allocate(size_t size) override;
+    void deallocate(std::byte *ptr) override;
+};
+
+} // namespace infinicore
--- a/src/infinicore/context/allocators/memory_allocator.hpp
+++ b/src/infinicore/context/allocators/memory_allocator.hpp
+#pragma once
+
+#include "infinicore/memory.hpp"
+
+#include <memory>
+
+namespace infinicore {
+class MemoryAllocator {
+public:
+    virtual ~MemoryAllocator() = default;
+
+    virtual std::byte *allocate(size_t size) = 0;
+    virtual void deallocate(std::byte *ptr) = 0;
+};
+} // namespace infinicore
--- a/src/infinicore/context/context_impl.cc
+++ b/src/infinicore/context/context_impl.cc
+#include "context_impl.hpp"
+
+#include "../utils.hpp"
+
+namespace infinicore {
+
+Runtime *ContextImpl::getCurrentRuntime() {
+    return current_runtime_;
+}
+
+Runtime *ContextImpl::getCpuRuntime() {
+    return runtime_table_[int(Device::Type::CPU)][0].get();
+}
+
+void ContextImpl::setDevice(Device device) {
+    if (device == getCurrentRuntime()->device()) {
+        // Do nothing if the device is already set.
+        return;
+    }
+
+    if (runtime_table_[int(device.getType())][device.getIndex()] == nullptr) {
+        // Lazy initialization of runtime if never set before.
+        runtime_table_[int(device.getType())][device.getIndex()] = std::unique_ptr<Runtime>(new Runtime(device));
+        current_runtime_ = runtime_table_[int(device.getType())][device.getIndex()].get();
+    } else {
+        current_runtime_ = runtime_table_[int(device.getType())][device.getIndex()].get()->activate();
+    }
+}
+
+size_t ContextImpl::getDeviceCount(Device::Type type) {
+    return runtime_table_[int(type)].size();
+}
+
+ContextImpl &ContextImpl::singleton() {
+    static ContextImpl instance;
+    return instance;
+}
+
+ContextImpl::ContextImpl() {
+    std::vector<int> device_counter(size_t(Device::Type::COUNT));
+    INFINICORE_CHECK_ERROR(infinirtGetAllDeviceCount(device_counter.data()));
+
+    // Reserve runtime slot for all devices.
+    runtime_table_[0].resize(device_counter[0]);
+    runtime_table_[0][0] = std::unique_ptr<Runtime>(new Runtime(Device(Device::Type::CPU, 0)));
+
+    // Context will try to use the first non-cpu available device as the default runtime.
+    for (int i = int(Device::Type::COUNT) - 1; i > 0; i--) {
+        if (device_counter[i] > 0) {
+            runtime_table_[i].resize(device_counter[i]);
+            if (current_runtime_ == nullptr) {
+                runtime_table_[i][0] = std::unique_ptr<Runtime>(new Runtime(Device(Device::Type(i), 0)));
+                current_runtime_ = runtime_table_[i][0].get();
+            }
+        }
+    }
+
+    if (current_runtime_ == nullptr) {
+        current_runtime_ = runtime_table_[0][0].get();
+    }
+}
+
+namespace context {
+
+void setDevice(Device device) {
+    ContextImpl::singleton().setDevice(device);
+}
+
+Device getDevice() {
+    return ContextImpl::singleton().getCurrentRuntime()->device();
+}
+
+size_t getDeviceCount(Device::Type type) {
+    return ContextImpl::singleton().getDeviceCount(type);
+}
+
+infinirtStream_t getStream() {
+    return ContextImpl::singleton().getCurrentRuntime()->stream();
+}
+
+infiniopHandle_t getInfiniopHandle() {
+    return ContextImpl::singleton().getCurrentRuntime()->infiniopHandle();
+}
+
+void syncStream() {
+    return ContextImpl::singleton().getCurrentRuntime()->syncStream();
+}
+
+void syncDevice() {
+    return ContextImpl::singleton().getCurrentRuntime()->syncDevice();
+}
+
+std::shared_ptr<Memory> allocateMemory(size_t size) {
+    return ContextImpl::singleton().getCurrentRuntime()->allocateMemory(size);
+}
+
+std::shared_ptr<Memory> allocateHostMemory(size_t size) {
+    return ContextImpl::singleton().getCpuRuntime()->allocateMemory(size);
+}
+
+std::shared_ptr<Memory> allocatePinnedHostMemory(size_t size) {
+    return ContextImpl::singleton().getCurrentRuntime()->allocatePinnedHostMemory(size);
+}
+
+void memcpyH2D(void *dst, const void *src, size_t size) {
+    return ContextImpl::singleton().getCurrentRuntime()->memcpyH2D(dst, src, size);
+}
+
+void memcpyD2H(void *dst, const void *src, size_t size) {
+    return ContextImpl::singleton().getCurrentRuntime()->memcpyD2H(dst, src, size);
+}
+
+void memcpyD2D(void *dst, const void *src, size_t size) {
+    return ContextImpl::singleton().getCurrentRuntime()->memcpyD2D(dst, src, size);
+}
+
+void memcpyH2H(void *dst, const void *src, size_t size) {
+    return ContextImpl::singleton().getCpuRuntime()->memcpyD2D(dst, src, size);
+}
+
+} // namespace context
+
+} // namespace infinicore
--- a/src/infinicore/context/context_impl.hpp
+++ b/src/infinicore/context/context_impl.hpp
+#pragma once
+#include "infinicore/context/context.hpp"
+#include "runtime/runtime.hpp"
+
+#include <array>
+#include <vector>
+
+namespace infinicore {
+class ContextImpl {
+private:
+    std::array<std::vector<std::unique_ptr<Runtime>>, size_t(Device::Type::COUNT)> runtime_table_;
+    Runtime *current_runtime_ = nullptr;
+
+protected:
+    ContextImpl();
+
+public:
+    Runtime *getCurrentRuntime();
+
+    Runtime *getCpuRuntime();
+
+    void setDevice(Device);
+
+    size_t getDeviceCount(Device::Type type);
+
+    static ContextImpl &singleton();
+
+    friend class Runtime;
+};
+} // namespace infinicore
--- a/src/infinicore/context/runtime/runtime.cc
+++ b/src/infinicore/context/runtime/runtime.cc
+#include "runtime.hpp"
+
+#include "../../utils.hpp"
+
+#include "../allocators/device_caching_allocator.hpp"
+#include "../allocators/device_pinned_allocator.hpp"
+#include "../allocators/host_allocator.hpp"
+
+namespace infinicore {
+Runtime::Runtime(Device device) : device_(device) {
+    activate();
+    INFINICORE_CHECK_ERROR(infinirtStreamCreate(&stream_));
+    INFINICORE_CHECK_ERROR(infiniopCreateHandle(&infiniop_handle_));
+    if (device_.getType() == Device::Type::CPU) {
+        device_memory_allocator_ = std::make_unique<HostAllocator>();
+    } else {
+        device_memory_allocator_ = std::make_unique<DeviceCachingAllocator>(device);
+        pinned_host_memory_allocator_ = std::make_unique<DevicePinnedHostAllocator>(device);
+    }
+}
+Runtime::~Runtime() {
+    activate();
+    if (pinned_host_memory_allocator_) {
+        pinned_host_memory_allocator_.reset();
+    }
+    device_memory_allocator_.reset();
+    infiniopDestroyHandle(infiniop_handle_);
+    infinirtStreamDestroy(stream_);
+}
+
+Runtime *Runtime::activate() {
+    INFINICORE_CHECK_ERROR(infinirtSetDevice((infiniDevice_t)device_.getType(), (int)device_.getIndex()));
+    return this;
+}
+
+Device Runtime::device() const {
+    return device_;
+}
+
+infinirtStream_t Runtime::stream() const {
+    return stream_;
+}
+
+infiniopHandle_t Runtime::infiniopHandle() const {
+    return infiniop_handle_;
+}
+
+void Runtime::syncStream() {
+    INFINICORE_CHECK_ERROR(infinirtStreamSynchronize(stream_));
+}
+
+void Runtime::syncDevice() {
+    INFINICORE_CHECK_ERROR(infinirtDeviceSynchronize());
+}
+
+std::shared_ptr<Memory> Runtime::allocateMemory(size_t size) {
+    std::byte *data_ptr = device_memory_allocator_->allocate(size);
+    return std::make_shared<Memory>(
+        data_ptr, size, device_,
+        [alloc = device_memory_allocator_.get()](std::byte *p) {
+            alloc->deallocate(p);
+        });
+}
+
+std::shared_ptr<Memory> Runtime::allocatePinnedHostMemory(size_t size) {
+    std::byte *data_ptr = pinned_host_memory_allocator_->allocate(size);
+    return std::make_shared<Memory>(
+        data_ptr, size, device_,
+        [alloc = pinned_host_memory_allocator_.get()](std::byte *p) {
+            alloc->deallocate(p);
+        },
+        true);
+}
+
+void Runtime::memcpyH2D(void *dst, const void *src, size_t size) {
+    INFINICORE_CHECK_ERROR(infinirtMemcpyAsync(dst, src, size, INFINIRT_MEMCPY_H2D, stream_));
+}
+
+void Runtime::memcpyD2H(void *dst, const void *src, size_t size) {
+    INFINICORE_CHECK_ERROR(infinirtMemcpy(dst, src, size, INFINIRT_MEMCPY_D2H));
+}
+
+void Runtime::memcpyD2D(void *dst, const void *src, size_t size) {
+    INFINICORE_CHECK_ERROR(infinirtMemcpyAsync(dst, src, size, INFINIRT_MEMCPY_D2D, stream_));
+}
+
+std::string Runtime::toString() const {
+    return fmt::format("Runtime({})", device_.toString());
+}
+
+} // namespace infinicore
--- a/src/infinicore/context/runtime/runtime.hpp
+++ b/src/infinicore/context/runtime/runtime.hpp
+#pragma once
+
+#include "../allocators/memory_allocator.hpp"
+#include "infinicore/context/context.hpp"
+
+#include <infiniop.h>
+#include <infinirt.h>
+
+namespace infinicore {
+class ContextImpl;
+class Runtime {
+private:
+    Device device_;
+    infinirtStream_t stream_;
+    infiniopHandle_t infiniop_handle_;
+    std::unique_ptr<MemoryAllocator> device_memory_allocator_;
+    std::unique_ptr<MemoryAllocator> pinned_host_memory_allocator_;
+
+protected:
+    Runtime(Device device);
+
+public:
+    ~Runtime();
+
+    Runtime *activate();
+
+    Device device() const;
+    infinirtStream_t stream() const;
+    infiniopHandle_t infiniopHandle() const;
+
+    void syncStream();
+    void syncDevice();
+
+    std::shared_ptr<Memory> allocateMemory(size_t size);
+    std::shared_ptr<Memory> allocatePinnedHostMemory(size_t size);
+
+    void memcpyH2D(void *dst, const void *src, size_t size);
+    void memcpyD2H(void *dst, const void *src, size_t size);
+    void memcpyD2D(void *dst, const void *src, size_t size);
+
+    std::string toString() const;
+
+    friend class ContextImpl;
+};
+} // namespace infinicore
--- a/src/infinicore/device.cc
+++ b/src/infinicore/device.cc
-#include <infinicore.hpp>
+#include <map>
+#include <string>
+
+#include "infinicore.hpp"

 namespace infinicore {

 Device::Device(const Type &type, const Index &index) : type_{type}, index_{index} {}

-const Device::Type &Device::get_type() const {
+const Device::Type &Device::getType() const {
    return type_;
 }

-const Device::Index &Device::get_index() const {
+const Device::Index &Device::getIndex() const {
    return index_;
 }

-std::string Device::to_string() const {
-    return to_string(type_) + ":" + std::to_string(index_);
+std::string Device::toString() const {
+    return toString(type_) + ":" + std::to_string(index_);
 }

-std::string Device::to_string(const Type &type) {
+std::string Device::toString(const Type &type) {
    switch (type) {
-    case Type::cpu:
-        return "cpu";
-    case Type::cuda:
-        return "cuda";
-    case Type::meta:
-        return "meta";
+    case Type::CPU:
+        return "CPU";
+    case Type::NVIDIA:
+        return "NVIDIA";
+    case Type::CAMBRICON:
+        return "CAMBRICON";
+    case Type::ASCEND:
+        return "ASCEND";
+    case Type::METAX:
+        return "METAX";
+    case Type::MOORE:
+        return "MOORE";
+    case Type::ILUVATAR:
+        return "ILUVATAR";
+    case Type::KUNLUN:
+        return "KUNLUN";
+    case Type::SUGON:
+        return "SUGON";
    }

    // TODO: Add error handling.
    return "";
 }

+bool Device::operator==(const Device &other) const {
+    return type_ == other.type_ && index_ == other.index_;
+}
+
+bool Device::operator!=(const Device &other) const {
+    return type_ != other.type_ || index_ != other.index_;
+}
 } // namespace infinicore
--- a/src/infinicore/dtype.cc
+++ b/src/infinicore/dtype.cc
@@ -2,34 +2,82 @@

 namespace infinicore {

-std::string to_string(const DataType &dtype) {
-    std::string str{"infinicore."};
+std::string toString(const DataType &dtype) {
+    switch (dtype) {
+    case DataType::BYTE:
+        return "BYTE";
+    case DataType::BOOL:
+        return "BOOL";
+    case DataType::I8:
+        return "I8";
+    case DataType::I16:
+        return "I16";
+    case DataType::I32:
+        return "I32";
+    case DataType::I64:
+        return "I64";
+    case DataType::U8:
+        return "U8";
+    case DataType::U16:
+        return "U16";
+    case DataType::U32:
+        return "U32";
+    case DataType::U64:
+        return "U64";
+    case DataType::F8:
+        return "F8";
+    case DataType::F16:
+        return "F16";
+    case DataType::F32:
+        return "F32";
+    case DataType::F64:
+        return "F64";
+    case DataType::C16:
+        return "C16";
+    case DataType::C32:
+        return "C32";
+    case DataType::C64:
+        return "C64";
+    case DataType::C128:
+        return "C128";
+    case DataType::BF16:
+        return "BF16";
+    }
+
+    // TODO: Add error handling.
+    return "";
+}

+size_t dsize(const DataType &dtype) {
    switch (dtype) {
-    case DataType::bfloat16:
-        str += "bfloat16";
-        break;
-    case DataType::float16:
-        str += "float16";
-        break;
-    case DataType::float32:
-        str += "float32";
-        break;
-    case DataType::float64:
-        str += "float64";
-        break;
-    case DataType::int32:
-        str += "int32";
-        break;
-    case DataType::int64:
-        str += "int64";
-        break;
-    case DataType::uint8:
-        str += "uint8";
-        break;
+    case DataType::BYTE:
+    case DataType::BOOL:
+    case DataType::F8:
+    case DataType::I8:
+    case DataType::U8:
+        return 1;
+    case DataType::I16:
+    case DataType::U16:
+    case DataType::F16:
+    case DataType::BF16:
+    case DataType::C16:
+        return 2;
+    case DataType::I32:
+    case DataType::U32:
+    case DataType::F32:
+    case DataType::C32:
+        return 4;
+    case DataType::I64:
+    case DataType::U64:
+    case DataType::F64:
+    case DataType::C64:
+        return 8;
+    case DataType::C128:
+        return 16;
    }

-    return str;
+    // TODO: Add error handling.
+    return 0;
 }

 } // namespace infinicore