Unverified Commit 9a05446f authored by PanZezhong1725's avatar PanZezhong1725 Committed by GitHub
Browse files

issue/461 InfiniCore 推理运行时


Co-authored-by: default avatarJiacheng Huang <huangjiacheng0709@outlook.com>
Co-authored-by: default avatarwooway777 <wooway777@gmail.com>
parent 37411f6d
...@@ -19,6 +19,9 @@ jobs: ...@@ -19,6 +19,9 @@ jobs:
- name: checkout code - name: checkout code
uses: actions/checkout@v4 uses: actions/checkout@v4
with:
submodules: recursive
fetch-depth: 0
- name: Check Format - name: Check Format
run: | run: |
......
...@@ -10,6 +10,7 @@ build/ ...@@ -10,6 +10,7 @@ build/
# Python # Python
__pycache__/ __pycache__/
*.egg-info/
# Log # Log
*.log *.log
...@@ -22,3 +23,8 @@ cache/ ...@@ -22,3 +23,8 @@ cache/
#GGUF #GGUF
*.gguf *.gguf
# Compressed
*.gz
*.zip
*.tar
[submodule "third_party/spdlog"]
path = third_party/spdlog
url = https://github.com/gabime/spdlog.git
...@@ -28,6 +28,20 @@ API 定义以及使用方式详见 [`InfiniCore文档`](https://github.com/Infin ...@@ -28,6 +28,20 @@ API 定义以及使用方式详见 [`InfiniCore文档`](https://github.com/Infin
## 配置和使用 ## 配置和使用
### 子模块
由于仓库中含有子模块,所以在克隆时请添加 `--recursive``--recurse-submodules`,如:
```shell
git clone --recursive https://github.com/InfiniTensor/InfiniCore.git
```
或者在普通克隆后进行更新:
```shell
git submodule update --init --recursive
```
### 一键安装 ### 一键安装
`script/` 目录中提供了 `install.py` 安装脚本。使用方式如下: `script/` 目录中提供了 `install.py` 安装脚本。使用方式如下:
......
#ifndef __INFINICORE_API_HPP__ #pragma once
#define __INFINICORE_API_HPP__
#include "infinicore/ops.hpp"
#include "infinicore/tensor.hpp" #include "infinicore/tensor.hpp"
#endif
#pragma once
#include <cstddef>
#include <functional>
#include <iostream>
#include <list>
#include <optional>
#include <stdexcept>
#include <unordered_map>
namespace infinicore::common {
template <typename Key, typename Value>
class LRUCache {
public:
using KeyValuePair = std::pair<Key, Value>;
using ListIt = typename std::list<KeyValuePair>::iterator;
using Destructor = std::function<void(Value &)>;
explicit LRUCache(size_t capacity = 100, Destructor destructor = nullptr)
: capacity_(capacity), destructor_(destructor) {
if (capacity == 0) {
capacity_ = UINT64_MAX; // effectively unbounded
}
}
~LRUCache() {
cleanup();
}
bool contains(const Key &key) const {
return map_.find(key) != map_.end();
}
void put(const Key &key, const Value &value) {
auto it = map_.find(key);
if (it != map_.end()) {
if (destructor_) {
destructor_(it->second->second);
}
it->second->second = value;
touch(it);
} else {
// insert new
if (list_.size() >= capacity_) {
evictLRU();
}
list_.emplace_front(key, value);
map_[key] = list_.begin();
}
}
std::optional<Value> get(const Key &key) {
auto it = map_.find(key);
if (it == map_.end()) {
return std::nullopt;
}
touch(it);
return it->second->second;
}
std::optional<Value> get(const Key &key) const {
auto it = map_.find(key);
if (it == map_.end()) {
return std::nullopt;
}
// Note: can't touch in const context
return it->second->second;
}
void setDestructor(Destructor destructor) {
destructor_ = destructor;
}
void setCapacity(size_t capacity) {
capacity_ = capacity;
while (list_.size() > capacity_) {
evictLRU();
}
}
void clear() {
if (destructor_) {
for (auto &item : list_) {
safeDestruct(item.second);
}
}
list_.clear();
map_.clear();
}
const std::list<KeyValuePair> &getAllItems() const {
return list_;
}
protected:
std::list<KeyValuePair> list_; // front = most recent, back = least
private:
void touch(typename std::unordered_map<Key, ListIt>::iterator it) {
// move this key to front (most recent)
list_.splice(list_.begin(), list_, it->second);
it->second = list_.begin();
}
void safeDestruct(Value &value) {
if (!destructor_) {
return;
}
try {
destructor_(value);
} catch (const std::exception &e) {
// Built-in default error handling
std::cerr << "Cache destructor error (type: " << typeid(Value).name()
<< "): " << e.what() << std::endl;
}
}
void evictLRU() {
if (!list_.empty()) {
auto &kv = list_.back();
safeDestruct(kv.second);
map_.erase(kv.first);
list_.pop_back();
}
}
void cleanup() {
clear();
}
size_t capacity_;
std::unordered_map<Key, ListIt> map_;
Destructor destructor_;
};
} // namespace infinicore::common
#pragma once
#include "../tensor.hpp"
#include <type_traits>
namespace infinicore {
// Base hash_combine for arithmetic types
template <typename T>
std::enable_if_t<std::is_arithmetic_v<T>, void>
hash_combine(size_t &seed, const T &value) {
seed ^= std::hash<T>{}(value) + 0x9e3779b9 + (seed << 6) + (seed >> 2);
}
// Specialization for Tensor
inline void hash_combine(size_t &seed, Tensor tensor) {
hash_combine(seed, static_cast<size_t>(tensor->dtype()));
for (Size shape : tensor->shape()) {
hash_combine(seed, shape);
}
for (Stride stride : tensor->strides()) {
hash_combine(seed, static_cast<size_t>(stride));
}
}
// Specialization for std::string
inline void hash_combine(size_t &seed, const std::string &str) {
hash_combine(seed, std::hash<std::string>{}(str));
}
// Specialization for const char*
inline void hash_combine(size_t &seed, const char *str) {
hash_combine(seed, std::string(str));
}
// Variadic template for multiple arguments
template <typename First, typename... Rest>
void hash_combine(size_t &seed, const First &first, const Rest &...rest) {
hash_combine(seed, first);
hash_combine(seed, rest...);
}
// Base case for variadic template
inline void hash_combine(size_t &seed) {
// Base case - do nothing
}
// Convenience function to hash multiple values
template <typename... Types>
size_t hash_combine(const Types &...values) {
size_t seed = 0;
hash_combine(seed, values...);
return seed;
}
} // namespace infinicore
#pragma once
#include "../device.hpp"
#include "../memory.hpp"
#include <infiniop.h>
#include <infinirt.h>
#include <memory>
namespace infinicore {
namespace context {
void setDevice(Device device);
Device getDevice();
size_t getDeviceCount(Device::Type type);
infinirtStream_t getStream();
infiniopHandle_t getInfiniopHandle();
void syncStream();
void syncDevice();
std::shared_ptr<Memory> allocateMemory(size_t size);
std::shared_ptr<Memory> allocateHostMemory(size_t size);
std::shared_ptr<Memory> allocatePinnedHostMemory(size_t size);
void memcpyH2D(void *dst, const void *src, size_t size);
void memcpyD2H(void *dst, const void *src, size_t size);
void memcpyD2D(void *dst, const void *src, size_t size);
void memcpyH2H(void *dst, const void *src, size_t size);
} // namespace context
} // namespace infinicore
#ifndef __INFINICORE_DEVICE_API_HPP__ #pragma once
#define __INFINICORE_DEVICE_API_HPP__
#include <cstdint> #include <cstdint>
#include <string> #include <string>
#include "infinicore.h"
namespace infinicore { namespace infinicore {
class Device { class Device {
...@@ -11,20 +12,31 @@ public: ...@@ -11,20 +12,31 @@ public:
using Index = std::size_t; using Index = std::size_t;
enum class Type { enum class Type {
cpu, CPU = INFINI_DEVICE_CPU,
cuda, NVIDIA = INFINI_DEVICE_NVIDIA,
meta, CAMBRICON = INFINI_DEVICE_CAMBRICON,
ASCEND = INFINI_DEVICE_ASCEND,
METAX = INFINI_DEVICE_METAX,
MOORE = INFINI_DEVICE_MOORE,
ILUVATAR = INFINI_DEVICE_ILUVATAR,
KUNLUN = INFINI_DEVICE_KUNLUN,
SUGON = INFINI_DEVICE_SUGON,
COUNT = INFINI_DEVICE_TYPE_COUNT,
}; };
Device(const Type &type, const Index &index = 0); Device(const Type &type = Type::CPU, const Index &index = 0);
const Type &getType() const;
const Type &get_type() const; const Index &getIndex() const;
const Index &get_index() const; std::string toString() const;
std::string to_string() const; static std::string toString(const Type &type);
static std::string to_string(const Type &type); bool operator==(const Device &other) const;
bool operator!=(const Device &other) const;
private: private:
Type type_; Type type_;
...@@ -33,5 +45,3 @@ private: ...@@ -33,5 +45,3 @@ private:
}; };
} // namespace infinicore } // namespace infinicore
#endif
#ifndef __INFINICORE_DTYPE_API_HPP__ #pragma once
#define __INFINICORE_DTYPE_API_HPP__
#include <infinicore.h> #include <infinicore.h>
#include <string>
namespace infinicore { namespace infinicore {
enum class DataType { enum class DataType {
bfloat16 = INFINI_DTYPE_BF16, BYTE = INFINI_DTYPE_BYTE,
float16 = INFINI_DTYPE_F16, BOOL = INFINI_DTYPE_BOOL,
float32 = INFINI_DTYPE_F32, I8 = INFINI_DTYPE_I8,
float64 = INFINI_DTYPE_F64, I16 = INFINI_DTYPE_I16,
int32 = INFINI_DTYPE_I32, I32 = INFINI_DTYPE_I32,
int64 = INFINI_DTYPE_I64, I64 = INFINI_DTYPE_I64,
uint8 = INFINI_DTYPE_U8, U8 = INFINI_DTYPE_U8,
U16 = INFINI_DTYPE_U16,
U32 = INFINI_DTYPE_U32,
U64 = INFINI_DTYPE_U64,
F8 = INFINI_DTYPE_F8,
F16 = INFINI_DTYPE_F16,
F32 = INFINI_DTYPE_F32,
F64 = INFINI_DTYPE_F64,
C16 = INFINI_DTYPE_C16,
C32 = INFINI_DTYPE_C32,
C64 = INFINI_DTYPE_C64,
C128 = INFINI_DTYPE_C128,
BF16 = INFINI_DTYPE_BF16,
}; };
std::string to_string(const DataType &dtype); std::string toString(const DataType &dtype);
size_t dsize(const DataType &dtype);
} // namespace infinicore } // namespace infinicore
#endif
#pragma once
#include "device.hpp"
#include <cstddef>
#include <functional>
namespace infinicore {
class Memory {
public:
using Deleter = std::function<void(std::byte *)>;
Memory(std::byte *data, size_t size, Device device, Deleter deleter, bool pin_memory = false);
~Memory();
std::byte *data();
Device device() const;
size_t size() const;
bool is_pinned() const;
private:
std::byte *data_;
size_t size_;
Device device_;
Deleter deleter_;
bool is_pinned_;
};
} // namespace infinicore
#pragma once
#include "../../common/LRUCache.hpp"
#include "../../context/context.hpp"
#include <array>
#include <functional>
#include <memory>
#include <vector>
namespace infinicore::op::common {
template <typename Key, typename Value>
class OpCache {
private:
using BaseCache = infinicore::common::LRUCache<Key, Value>;
using Destructor = typename BaseCache::Destructor;
using CacheVector = std::vector<BaseCache>;
public:
explicit OpCache(size_t capacity = 100, Destructor destructor = nullptr)
: capacity_(capacity), destructor_(destructor) {}
~OpCache() {
clear();
}
BaseCache &getCache(Device::Type device_type, size_t device_index) {
auto &cache_vector = caches_[static_cast<size_t>(device_type)];
if (cache_vector.size() <= device_index) {
cache_vector.resize(device_index + 1, BaseCache(capacity_, destructor_));
} else {
cache_vector[device_index].setDestructor(destructor_);
}
return cache_vector[device_index];
}
void setCapacity(size_t capacity) {
capacity_ = capacity;
for (auto &vec : caches_) {
for (auto &cache : vec) {
cache.setCapacity(capacity);
}
}
}
void clear() {
Device current_device = context::getDevice();
for (size_t type_idx = 0; type_idx < caches_.size(); ++type_idx) {
auto &vec = caches_[type_idx];
for (size_t dev_idx = 0; dev_idx < vec.size(); ++dev_idx) {
Device target_device(static_cast<Device::Type>(type_idx), dev_idx);
if (current_device != target_device) {
context::setDevice(target_device);
}
vec[dev_idx].clear();
if (current_device != target_device) {
context::setDevice(current_device);
}
}
vec.clear();
}
caches_ = {};
}
private:
size_t capacity_;
Destructor destructor_;
std::array<CacheVector, static_cast<size_t>(Device::Type::COUNT)> caches_ = {};
};
} // namespace infinicore::op::common
#pragma once
#include "../../device.hpp"
#include <array>
namespace infinicore::op::common {
template <typename Fn>
class OpDispatcher {
public:
void registerDevice(Device::Type device_type, Fn fn, bool override_existing=true) {
if (table_[(size_t)device_type] == nullptr || override_existing){
table_[(size_t)device_type] = fn;
}
}
void registerDevice(std::initializer_list<Device::Type> device_types, Fn fn, bool override_existing=true) {
for (auto device_type : device_types) {
registerDevice(device_type, fn, override_existing);
}
}
void registerAll(Fn fn, bool override_existing=true) {
for (size_t device_type = 0; device_type < static_cast<size_t>(Device::Type::COUNT); ++device_type) {
registerDevice((Device::Type)device_type, fn, override_existing);
}
}
Fn lookup(Device::Type device_type) const {
return table_.at((size_t)device_type);
}
private:
std::array<Fn, static_cast<size_t>(Device::Type::COUNT)> table_;
};
} // namespace infinicore::op::common
#pragma once
#include "../../context/context.hpp"
#include "../../tensor.hpp"
#include "dispatcher.hpp"
#pragma once
#include "../device.hpp"
#include "common/op.hpp"
namespace infinicore::op {
class Matmul {
public:
using schema = void (*)(Tensor, Tensor, Tensor);
static void execute(Tensor c, Tensor a, Tensor b);
static common::OpDispatcher<schema> &dispatcher();
};
Tensor matmul(Tensor a, Tensor b);
void matmul_(Tensor c, Tensor a, Tensor b);
} // namespace infinicore::op
#pragma once
#include "common/op.hpp"
namespace infinicore::op {
class Ones {
public:
using schema = void (*)(Tensor);
static void execute(Tensor output);
static common::OpDispatcher<schema> &dispatcher();
};
Tensor ones();
void ones_(Tensor output);
} // namespace infinicore::op
#pragma once
#include "../device.hpp"
#include "common/op.hpp"
namespace infinicore::op {
class Rearrange {
public:
using schema = void (*)(Tensor, Tensor);
static void execute(Tensor y, Tensor x);
static common::OpDispatcher<schema> &dispatcher();
};
Tensor rearrange(Tensor x);
void rearrange_(Tensor y, Tensor x);
} // namespace infinicore::op
#pragma once
#include "op/matmul.hpp"
#include "op/ones.hpp"
#include "op/rearrange.hpp"
#ifndef __INFINICORE_TENSOR_API_HPP__ #pragma once
#define __INFINICORE_TENSOR_API_HPP__
#include <vector>
#include "device.hpp" #include "device.hpp"
#include "dtype.hpp" #include "dtype.hpp"
#include "memory.hpp"
#include <algorithm>
#include <cassert>
#include <functional>
#include <memory>
#include <vector>
#include <infiniop.h>
namespace infinicore { namespace infinicore {
using Size = std::size_t;
using Stride = std::ptrdiff_t;
using Shape = std::vector<Size>;
using Strides = std::vector<Stride>;
class TensorImpl;
struct TensorMetaData {
Shape shape;
Strides strides;
DataType dtype;
infiniopTensorDescriptor_t desc;
TensorMetaData(const Shape &shape, const Strides &strides, const DataType &dtype);
};
struct TensorData {
size_t offset;
std::shared_ptr<Memory> memory;
};
struct TensorSliceParams {
size_t dim;
size_t start;
Size len;
};
class Tensor { class Tensor {
public: public:
using Size = std::size_t; static Tensor empty(const Shape &shape,
const DataType &dtype,
const Device &device,
bool pin_memory = false);
using Stride = std::ptrdiff_t; static Tensor strided_empty(const Shape &shape,
const Strides &strides,
const DataType &dtype,
const Device &device,
bool pin_memory = false);
using Shape = std::vector<Size>; static Tensor zeros(const Shape &shape,
const DataType &dtype,
const Device &device,
bool pin_memory = false);
using Strides = std::vector<Stride>; static Tensor ones(const Shape &shape,
const DataType &dtype,
const Device &device,
bool pin_memory = false);
Tensor(const Shape &shape, const DataType &dtype, const Device &device); static Tensor from_blob(void *raw_ptr,
const Shape &shape,
const DataType &dtype,
const Device &device);
const Shape &get_shape() const; static Tensor strided_from_blob(void *raw_ptr,
const Shape &shape,
const Strides &strides,
const DataType &dtype,
const Device &device);
const DataType &get_dtype() const; Tensor(const Tensor &) = default;
Tensor(Tensor &&) = default;
Tensor &operator=(const Tensor &) = default;
Tensor &operator=(Tensor &&) = default;
const Device &get_device() const; TensorImpl *operator->();
const TensorImpl *operator->() const;
private: protected:
Shape shape_; explicit Tensor(std::shared_ptr<TensorImpl> impl) : impl_(std::move(impl)) {}
std::shared_ptr<TensorImpl> impl_;
friend class TensorImpl;
};
class TensorImpl : public std::enable_shared_from_this<TensorImpl> {
public:
TensorImpl(const Shape &shape, const DataType &dtype);
TensorImpl(const Shape &shape, const Strides &strides, const DataType &dtype);
std::byte *data();
const std::byte *data() const;
const Shape &shape() const;
const Strides &strides() const;
bool is_contiguous() const;
Size ndim() const;
Size numel() const;
DataType dtype_; Size size(size_t dim) const;
Device device_; Stride stride(size_t dim) const;
DataType dtype() const;
Device device() const;
infiniopTensorDescriptor_t desc() const;
bool is_pinned() const;
std::string info() const;
///
/// Data Transfer APIs
///
/**
* Returns a new tensor with the same data on a different device.
* If the new device passed is same as the current device, the original tensor is returned.
*
* @param device The device of the new tensor
*
* @return A new tensor with the same data on the specified device
*/
Tensor to(Device device) const;
/**
* Copy Data from another tensor to this tensor.
* Currently, only contigous tensors of the same dtype and shape are supported.
*
* @param src The source tensor to copy from
*
* @return A new tensor with the same data on the specified device
*/
void copy_from(Tensor src);
/**
* Return a tensor with the same data in contiguous arrangement as current tensor.
* If this tensor is already contiguous, the original tensor is returned.
*
* @return A new tensor with the same data on the specified device
*/
Tensor contiguous() const;
///
/// View APIs
///
/**
* Returns a new tensor that is a narrowed version of the current tensor.
* The returned tensor shares the same underlying storage with the original tensor.
*
* @param slices A vector of slice parameters specifying the dimension, start index,
* and length for each dimension to narrow
* @return A new tensor with narrowed dimensions
*
* Example:
* // Narrow dimension 0 from index 2 to 5 (length 3)
* // and dimension 1 from index 1 to 3 (length 2)
* tensor.narrow({{0, 2, 3}, {1, 1, 2}});
*/
Tensor narrow(const std::vector<TensorSliceParams> &slices) const;
/**
* Returns a new tensor with the dimensions permuted (reordered) according to the given order.
* The returned tensor shares the same underlying storage with the original tensor.
*
* @param order The desired ordering of dimensions
* @return A new tensor with permuted dimensions
*
* Example:
* // For a 3D tensor with shape [2, 3, 4], permute to [2, 0, 1]
* // This swaps the dimensions: dim0->dim2, dim1->dim0, dim2->dim1
* tensor->permute({2, 0, 1});
*/
Tensor permute(const Shape &order) const;
/**
* Returns a new tensor with the same data but a different shape.
* The returned tensor shares the same underlying storage with the original tensor.
* The tensor is rearranged if the new shape is not compatible with the current shape.
*
* @param new_shape The desired new shape
* @return A new tensor with the specified shape
*
* Example:
* // Reshape a 2x3 tensor (6 elements) to a 3x2 tensor
* tensor->view({3, 2});
*/
Tensor view(const Shape &new_shape) const;
/**
* Insecurely returns a new tensor with the specified shape and strides.
* The returned tensor shares the same underlying storage with the original tensor.
*
* @param new_shape The desired new shape
* @param new_strides The desired new strides
* @return A new tensor with the specified shape and strides
*
* Example:
* // Create a non-contiguous view with custom strides
* tensor->as_strided({2, 3}, {6, 2}); // Stride of 6 for dim0, 2 for dim1
*/
Tensor as_strided(const Shape &new_shape, const Strides &new_strides) const;
protected:
static std::shared_ptr<TensorImpl> empty(
const Shape &shape,
const DataType &dtype,
const Device &device,
bool pin_memory = false);
static std::shared_ptr<TensorImpl> strided_empty(
const Shape &shape,
const Strides &strides,
const DataType &dtype,
const Device &device,
bool pin_memory = false);
static std::shared_ptr<TensorImpl> zeros(
const Shape &shape,
const DataType &dtype,
const Device &device,
bool pin_memory = false);
static std::shared_ptr<TensorImpl> ones(
const Shape &shape,
const DataType &dtype,
const Device &device,
bool pin_memory = false);
static std::shared_ptr<TensorImpl> from_blob(
void *raw_ptr,
const Shape &shape,
const DataType &dtype,
const Device &device);
static std::shared_ptr<TensorImpl> strided_from_blob(
void *raw_ptr,
const Shape &shape,
const Strides &strides,
const DataType &dtype,
const Device &device);
friend class Tensor;
private:
TensorMetaData meta_;
TensorData data_;
}; };
} // namespace infinicore } // namespace infinicore
#endif
from infinicore.device import device
from infinicore.dtype import (
bfloat16,
bool,
cdouble,
cfloat,
chalf,
complex32,
complex64,
complex128,
double,
float,
float16,
float32,
float64,
half,
int,
int8,
int16,
int32,
int64,
long,
short,
uint8,
)
from infinicore.ops.matmul import matmul
from infinicore.ops.rearrange import rearrange
from infinicore.tensor import (
empty,
from_blob,
ones,
strided_empty,
strided_from_blob,
zeros,
)
__all__ = [
# Classes.
"device",
# Data Types.
"bfloat16",
"bool",
"cdouble",
"cfloat",
"chalf",
"complex32",
"complex64",
"complex128",
"double",
"float",
"float16",
"float32",
"float64",
"half",
"int",
"int8",
"int16",
"int32",
"int64",
"long",
"short",
"uint8",
# Operations.
"matmul",
"rearrange",
"empty",
"from_blob",
"ones",
"strided_empty",
"strided_from_blob",
"zeros",
]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment