Unverified Commit 07aa6990 authored by PanZezhong1725's avatar PanZezhong1725 Committed by GitHub
Browse files

Merge pull request #22 from InfiniTensor/issue/21

Issue/21 - Inference Process Modualization
parents be0e66ef bfae3bbb
#ifndef CACHE_MANAGER_HPP
#define CACHE_MANAGER_HPP
#include <functional>
#include <memory>
#include <unordered_map>
#include <vector>
#include "../tensor.hpp"
#include "../utils.hpp"
#include "infinicore_infer.h"
class IDescriptorDestroyer {
public:
virtual ~IDescriptorDestroyer() = default;
virtual void destroy(void *descriptor) = 0;
};
template <typename DescriptorType>
class DescriptorDestroyer : public IDescriptorDestroyer {
using DestroyFunc = infiniStatus_t (*)(DescriptorType);
DestroyFunc destroyFunc;
public:
DescriptorDestroyer(DestroyFunc func) : destroyFunc(func) {}
void destroy(void *descriptor) override {
destroyFunc(*static_cast<DescriptorType *>(descriptor));
}
};
template <typename DescriptorType>
class LRUDescriptorCache {
private:
struct CacheNode {
size_t key;
DescriptorType desc;
CacheNode *prev;
CacheNode *next;
CacheNode() : key(0), desc(), prev(nullptr), next(nullptr) {}
CacheNode(size_t k, const DescriptorType &d) : key(k), desc(d), prev(nullptr), next(nullptr) {}
};
std::unordered_map<size_t, CacheNode *> cache;
CacheNode *head;
CacheNode *tail;
const size_t capacity;
size_t size;
std::unique_ptr<IDescriptorDestroyer> destroyer;
void removeNode(CacheNode *node) {
node->prev->next = node->next;
node->next->prev = node->prev;
if (destroyer) {
destroyer->destroy(&node->desc);
}
cache.erase(node->key);
delete node;
--size;
}
void addToTop(CacheNode *node) {
node->next = head->next;
node->next->prev = node;
node->prev = head;
head->next = node;
cache[node->key] = node;
if (++size > capacity) {
removeNode(tail->prev);
}
}
void moveToTop(CacheNode *node) {
node->prev->next = node->next;
node->next->prev = node->prev;
node->next = head->next;
node->next->prev = node;
node->prev = head;
head->next = node;
}
public:
template <typename DestroyFunc>
LRUDescriptorCache(size_t c, DestroyFunc destroyFunc)
: capacity(c), size(0), destroyer(std::make_unique<DescriptorDestroyer<DescriptorType>>(destroyFunc)) {
head = new CacheNode();
tail = new CacheNode();
head->next = tail;
tail->prev = head;
}
~LRUDescriptorCache() {
while (head->next != tail) {
removeNode(head->next);
}
delete head;
delete tail;
}
bool get(size_t key, DescriptorType &out_desc) {
auto it = cache.find(key);
if (it == cache.end()) {
return false;
}
CacheNode *node = it->second;
moveToTop(node);
out_desc = node->desc;
return true;
}
void put(size_t key, const DescriptorType &descriptor) {
auto it = cache.find(key);
if (it != cache.end()) {
// Key already exists, update the descriptor
CacheNode *node = it->second;
if (destroyer) {
destroyer->destroy(&node->desc);
}
node->desc = descriptor;
moveToTop(node);
return;
}
// Check if we need to evict
if (size >= capacity) {
removeNode(tail->prev);
}
// Create new node and add to top
CacheNode *node = new CacheNode(key, descriptor);
addToTop(node);
}
LRUDescriptorCache(const LRUDescriptorCache &) = delete;
LRUDescriptorCache &operator=(const LRUDescriptorCache &) = delete;
};
class CacheManager {
private:
const size_t DEFAULT_CACHE_CAPACITY = 128;
LRUDescriptorCache<infiniopAddDescriptor_t> add_cache;
LRUDescriptorCache<infiniopRMSNormDescriptor_t> rms_norm_cache;
LRUDescriptorCache<infiniopGemmDescriptor_t> gemm_cache;
LRUDescriptorCache<infiniopRoPEDescriptor_t> rope_cache;
LRUDescriptorCache<infiniopRearrangeDescriptor_t> rearrange_cache;
LRUDescriptorCache<infiniopCausalSoftmaxDescriptor_t> causal_softmax_cache;
LRUDescriptorCache<infiniopSwiGLUDescriptor_t> swiglu_cache;
LRUDescriptorCache<infiniopRandomSampleDescriptor_t> random_sample_cache;
public:
CacheManager(size_t capacity = 100)
: add_cache(capacity, infiniopDestroyAddDescriptor),
rms_norm_cache(capacity, infiniopDestroyRMSNormDescriptor),
gemm_cache(capacity, infiniopDestroyGemmDescriptor),
rope_cache(capacity, infiniopDestroyRoPEDescriptor),
rearrange_cache(capacity, infiniopDestroyRearrangeDescriptor),
causal_softmax_cache(capacity, infiniopDestroyCausalSoftmaxDescriptor),
swiglu_cache(capacity, infiniopDestroySwiGLUDescriptor),
random_sample_cache(capacity, infiniopDestroyRandomSampleDescriptor) {}
// Add operations
bool getAddDescriptor(size_t key, infiniopAddDescriptor_t &desc) {
return add_cache.get(key, desc);
}
void putAddDescriptor(size_t key, const infiniopAddDescriptor_t &desc) {
add_cache.put(key, desc);
}
// RMSNorm operations
bool getRMSNormDescriptor(size_t key, infiniopRMSNormDescriptor_t &desc) {
return rms_norm_cache.get(key, desc);
}
void putRMSNormDescriptor(size_t key, const infiniopRMSNormDescriptor_t &desc) {
rms_norm_cache.put(key, desc);
}
// GEMM operations
bool getGemmDescriptor(size_t key, infiniopGemmDescriptor_t &desc) {
return gemm_cache.get(key, desc);
}
void putGemmDescriptor(size_t key, const infiniopGemmDescriptor_t &desc) {
gemm_cache.put(key, desc);
}
// RoPE operations
bool getRoPEDescriptor(size_t key, infiniopRoPEDescriptor_t &desc) {
return rope_cache.get(key, desc);
}
void putRoPEDescriptor(size_t key, const infiniopRoPEDescriptor_t &desc) {
rope_cache.put(key, desc);
}
// Rearrange operations
bool getRearrangeDescriptor(size_t key, infiniopRearrangeDescriptor_t &desc) {
return rearrange_cache.get(key, desc);
}
void putRearrangeDescriptor(size_t key, const infiniopRearrangeDescriptor_t &desc) {
rearrange_cache.put(key, desc);
}
// Softmax operations
bool getCausalSoftmaxDescriptor(size_t key, infiniopCausalSoftmaxDescriptor_t &desc) {
return causal_softmax_cache.get(key, desc);
}
void putCausalSoftmaxDescriptor(size_t key, const infiniopCausalSoftmaxDescriptor_t &desc) {
causal_softmax_cache.put(key, desc);
}
// SwiGLU operations
bool getSwiGLUDescriptor(size_t key, infiniopSwiGLUDescriptor_t &desc) {
return swiglu_cache.get(key, desc);
}
void putSwiGLUDescriptor(size_t key, const infiniopSwiGLUDescriptor_t &desc) {
swiglu_cache.put(key, desc);
}
// Random Sample operations
bool getRandomSampleDescriptor(size_t key, infiniopRandomSampleDescriptor_t &desc) {
return random_sample_cache.get(key, desc);
}
void putRandomSampleDescriptor(size_t key, const infiniopRandomSampleDescriptor_t &desc) {
random_sample_cache.put(key, desc);
}
template <typename... Tensors>
static size_t createDescriptorKey(Tensors... tensors) {
size_t seed = 0;
(..., (tensors ? hash_combine(seed, tensors->seed()) : (void)0));
return seed;
}
};
#endif // CACHE_MANAGER_HPP
#include "inference_context.hpp"
#include "../tensor.hpp"
#include "../utils.hpp"
InferenceContext::InferenceContext(DeviceResource *rsrc, CacheManager *cache_manager, infinirtStream_t stream)
: rsrc(rsrc), cache_manager(cache_manager), stream(stream) {}
void InferenceContext::ensure_workspace(size_t required_size) {
if (required_size > current_workspace_size || !workspace_storage) {
workspace_storage = Storage::createFromPool(required_size, rsrc->memory_pool);
current_workspace_size = required_size;
}
}
void InferenceContext::add(std::shared_ptr<Tensor> c,
std::shared_ptr<Tensor> a,
std::shared_ptr<Tensor> b) {
size_t key = CacheManager::createDescriptorKey(c, a, b);
infiniopAddDescriptor_t desc;
if (!cache_manager->getAddDescriptor(key, desc)) {
RUN_INFINI(infiniopCreateAddDescriptor(rsrc->handle, &desc, c->desc(), a->desc(), b->desc()));
cache_manager->putAddDescriptor(key, desc);
}
size_t workspace_size = 0;
RUN_INFINI(infiniopGetAddWorkspaceSize(desc, &workspace_size));
ensure_workspace(workspace_size);
void *workspace = workspace_storage->memory();
RUN_INFINI(infiniopAdd(
desc, workspace, workspace_size,
c->data(), a->data(), b->data(), stream));
}
void InferenceContext::rmsnorm(std::shared_ptr<Tensor> y,
std::shared_ptr<Tensor> x,
std::shared_ptr<Tensor> w,
float epsilon) {
size_t key = CacheManager::createDescriptorKey(y, x, w);
infiniopRMSNormDescriptor_t desc;
if (!cache_manager->getRMSNormDescriptor(key, desc)) {
RUN_INFINI(infiniopCreateRMSNormDescriptor(
rsrc->handle, &desc, y->desc(), x->desc(), w->desc(), epsilon));
cache_manager->putRMSNormDescriptor(key, desc);
}
size_t workspace_size = 0;
RUN_INFINI(infiniopGetRMSNormWorkspaceSize(desc, &workspace_size));
ensure_workspace(workspace_size);
void *workspace = workspace_storage->memory();
RUN_INFINI(infiniopRMSNorm(
desc, workspace, workspace_size,
y->data(), x->data(), w->data(), stream));
}
void InferenceContext::gemm(std::shared_ptr<Tensor> c,
std::shared_ptr<Tensor> a,
std::shared_ptr<Tensor> b,
float alpha, float beta) {
size_t key = CacheManager::createDescriptorKey(c, a, b);
infiniopGemmDescriptor_t desc;
if (!cache_manager->getGemmDescriptor(key, desc)) {
RUN_INFINI(infiniopCreateGemmDescriptor(rsrc->handle, &desc, c->desc(), a->desc(), b->desc()));
cache_manager->putGemmDescriptor(key, desc);
}
size_t workspace_size = 0;
RUN_INFINI(infiniopGetGemmWorkspaceSize(desc, &workspace_size));
ensure_workspace(workspace_size);
void *workspace = workspace_storage->memory();
RUN_INFINI(infiniopGemm(
desc, workspace, workspace_size,
c->data(), a->data(), b->data(), alpha, beta, stream));
}
void InferenceContext::rearrange(std::shared_ptr<Tensor> dst,
std::shared_ptr<Tensor> src) {
size_t key = CacheManager::createDescriptorKey(dst, src);
infiniopRearrangeDescriptor_t desc;
if (!cache_manager->getRearrangeDescriptor(key, desc)) {
RUN_INFINI(infiniopCreateRearrangeDescriptor(rsrc->handle, &desc, dst->desc(), src->desc()));
cache_manager->putRearrangeDescriptor(key, desc);
}
RUN_INFINI(infiniopRearrange(
desc,
dst->data(),
src->data(),
stream));
}
void InferenceContext::rope(std::shared_ptr<Tensor> q,
std::shared_ptr<Tensor> k,
std::shared_ptr<Tensor> pos,
std::shared_ptr<Tensor> sin,
std::shared_ptr<Tensor> cos) {
size_t key = CacheManager::createDescriptorKey(q, k, pos, sin, cos);
infiniopRoPEDescriptor_t desc;
if (!cache_manager->getRoPEDescriptor(key, desc)) {
RUN_INFINI(infiniopCreateRoPEDescriptor(
rsrc->handle, &desc, q->desc(), k->desc(),
pos->desc(), sin->desc(), cos->desc()));
cache_manager->putRoPEDescriptor(key, desc);
}
size_t workspace_size = 0;
RUN_INFINI(infiniopGetRoPEWorkspaceSize(desc, &workspace_size));
ensure_workspace(workspace_size);
void *workspace = workspace_storage->memory();
RUN_INFINI(infiniopRoPE(
desc, workspace, workspace_size,
q->data(), k->data(), pos->data(),
sin->data(), cos->data(), stream));
}
void InferenceContext::causalSoftmax(std::shared_ptr<Tensor> y,
std::shared_ptr<Tensor> x) {
size_t key = CacheManager::createDescriptorKey(y, x);
infiniopCausalSoftmaxDescriptor_t desc;
if (!cache_manager->getCausalSoftmaxDescriptor(key, desc)) {
RUN_INFINI(infiniopCreateCausalSoftmaxDescriptor(
rsrc->handle, &desc, y->desc(), x->desc()));
cache_manager->putCausalSoftmaxDescriptor(key, desc);
}
size_t workspace_size = 0;
RUN_INFINI(infiniopGetCausalSoftmaxWorkspaceSize(desc, &workspace_size));
ensure_workspace(workspace_size);
void *workspace = workspace_storage->memory();
RUN_INFINI(infiniopCausalSoftmax(desc, workspace, workspace_size,
y->data(), x->data(), stream));
}
void InferenceContext::swiglu(std::shared_ptr<Tensor> out,
std::shared_ptr<Tensor> up,
std::shared_ptr<Tensor> gate) {
size_t key = CacheManager::createDescriptorKey(out, up, gate);
infiniopSwiGLUDescriptor_t desc;
if (!cache_manager->getSwiGLUDescriptor(key, desc)) {
RUN_INFINI(infiniopCreateSwiGLUDescriptor(
rsrc->handle, &desc, out->desc(), up->desc(), gate->desc()));
cache_manager->putSwiGLUDescriptor(key, desc);
}
size_t workspace_size = 0;
RUN_INFINI(infiniopGetSwiGLUWorkspaceSize(desc, &workspace_size));
ensure_workspace(workspace_size);
void *workspace = workspace_storage->memory();
RUN_INFINI(infiniopSwiGLU(desc, workspace, workspace_size,
out->data(), up->data(), gate->data(), stream));
}
void InferenceContext::randomSample(std::shared_ptr<Tensor> out,
std::shared_ptr<Tensor> prob,
float random_val, float top_p, uint32_t top_k, float temperature) {
size_t key = CacheManager::createDescriptorKey(out, prob);
infiniopRandomSampleDescriptor_t desc;
if (!cache_manager->getRandomSampleDescriptor(key, desc)) {
RUN_INFINI(infiniopCreateRandomSampleDescriptor(
rsrc->handle, &desc, out->desc(), prob->desc()));
cache_manager->putRandomSampleDescriptor(key, desc);
}
size_t workspace_size = 0;
RUN_INFINI(infiniopGetRandomSampleWorkspaceSize(desc, &workspace_size));
ensure_workspace(workspace_size);
void *workspace = workspace_storage->memory();
RUN_INFINI(infiniopRandomSample(
desc, workspace, workspace_size,
out->data(), prob->data(),
random_val, top_p, top_k, temperature,
stream));
}
void InferenceContext::linear(std::shared_ptr<Tensor> c,
std::shared_ptr<Tensor> a,
std::shared_ptr<Tensor> b,
float alpha, float beta,
std::shared_ptr<Tensor> residual,
std::shared_ptr<Tensor> bias) {
bool residual_flag = residual != nullptr;
if (bias && !residual) {
int ndim_diff = c->ndim() - 1;
ASSERT_EQ(bias->ndim(), 1);
ASSERT_EQ(bias->shape()[0], c->shape()[ndim_diff]);
std::vector<ptrdiff_t> strides(ndim_diff, 0);
strides.push_back(bias->strides()[0]);
rearrange(c, bias->view_as(c->shape(), strides));
residual = c;
}
if (residual) {
if (residual->data() == c->data()) {
if (beta == 0.0) {
gemm(c, a, b, alpha, 1.0);
} else {
auto c_copy = Tensor::buffer(c->dtype(), c->shape(), rsrc->memory_pool);
c_copy->copyFrom(c, rsrc->handle, stream);
gemm(c, a, b, alpha, beta);
add(c, c, c_copy);
}
} else {
gemm(c, a, b, alpha, beta);
add(c, c, residual);
}
} else {
gemm(c, a, b, alpha, beta);
}
if (bias && residual_flag) {
int ndim_diff = c->ndim() - 1;
ASSERT_EQ(bias->ndim(), 1);
ASSERT_EQ(bias->shape()[0], c->shape()[ndim_diff]);
std::vector<ptrdiff_t> strides(ndim_diff, 0);
strides.push_back(bias->strides()[0]);
add(c, c, bias->view_as(c->shape(), strides));
}
}
#pragma once
#include "cache_manager.hpp"
#include "jiuge/jiuge_impl.hpp"
#include "jiuge/jiuge_weight.hpp"
#include <cassert>
struct InferenceContext {
DeviceResource *rsrc;
CacheManager *cache_manager;
infinirtStream_t stream;
std::shared_ptr<Storage> workspace_storage;
size_t current_workspace_size = 0;
InferenceContext(DeviceResource *rsrc, CacheManager *cache_manager, infinirtStream_t stream);
void ensure_workspace(size_t required_size);
void add(std::shared_ptr<Tensor> c,
std::shared_ptr<Tensor> a,
std::shared_ptr<Tensor> b);
void rmsnorm(std::shared_ptr<Tensor> y,
std::shared_ptr<Tensor> x,
std::shared_ptr<Tensor> w,
float epsilon);
void gemm(std::shared_ptr<Tensor> c,
std::shared_ptr<Tensor> a,
std::shared_ptr<Tensor> b,
float alpha, float beta);
void rearrange(std::shared_ptr<Tensor> dst,
std::shared_ptr<Tensor> src);
void rope(std::shared_ptr<Tensor> q,
std::shared_ptr<Tensor> k,
std::shared_ptr<Tensor> pos,
std::shared_ptr<Tensor> sin,
std::shared_ptr<Tensor> cos);
void causalSoftmax(std::shared_ptr<Tensor> y,
std::shared_ptr<Tensor> x);
void swiglu(std::shared_ptr<Tensor> out,
std::shared_ptr<Tensor> up,
std::shared_ptr<Tensor> gate);
void randomSample(std::shared_ptr<Tensor> out,
std::shared_ptr<Tensor> prob,
float random_val, float top_p, uint32_t top_k, float temperature);
void linear(std::shared_ptr<Tensor> c,
std::shared_ptr<Tensor> a,
std::shared_ptr<Tensor> b,
float alpha, float beta,
std::shared_ptr<Tensor> residual,
std::shared_ptr<Tensor> bias);
};
namespace {
thread_local InferenceContext *tls_inference_context = nullptr;
}
inline InferenceContext &getInferenceContext() {
assert(tls_inference_context != nullptr && "InferenceContext not set for this thread");
return *tls_inference_context;
}
inline void setInferenceContext(InferenceContext *ctx) {
tls_inference_context = ctx;
}
inline void add(std::shared_ptr<Tensor> c, std::shared_ptr<Tensor> a, std::shared_ptr<Tensor> b) {
getInferenceContext().add(c, a, b);
}
inline void rmsnorm(std::shared_ptr<Tensor> y, std::shared_ptr<Tensor> x,
std::shared_ptr<Tensor> w, float epsilon) {
getInferenceContext().rmsnorm(y, x, w, epsilon);
}
inline void gemm(std::shared_ptr<Tensor> c, std::shared_ptr<Tensor> a,
std::shared_ptr<Tensor> b, float alpha, float beta) {
getInferenceContext().gemm(c, a, b, alpha, beta);
}
inline void rearrange(std::shared_ptr<Tensor> dst, std::shared_ptr<Tensor> src) {
getInferenceContext().rearrange(dst, src);
}
inline void rope(std::shared_ptr<Tensor> q, std::shared_ptr<Tensor> k,
std::shared_ptr<Tensor> pos, std::shared_ptr<Tensor> sin,
std::shared_ptr<Tensor> cos) {
getInferenceContext().rope(q, k, pos, sin, cos);
}
inline void causalSoftmax(std::shared_ptr<Tensor> y, std::shared_ptr<Tensor> x) {
getInferenceContext().causalSoftmax(y, x);
}
inline void swiglu(std::shared_ptr<Tensor> out, std::shared_ptr<Tensor> up,
std::shared_ptr<Tensor> gate) {
getInferenceContext().swiglu(out, up, gate);
}
inline void randomSample(std::shared_ptr<Tensor> out, std::shared_ptr<Tensor> prob,
float random_val, float top_p, uint32_t top_k, float temperature) {
getInferenceContext().randomSample(out, prob, random_val, top_p, top_k, temperature);
}
inline void linear(std::shared_ptr<Tensor> c, std::shared_ptr<Tensor> a,
std::shared_ptr<Tensor> b, float alpha, float beta,
std::shared_ptr<Tensor> residual, std::shared_ptr<Tensor> bias) {
getInferenceContext().linear(c, a, b, alpha, beta, residual, bias);
}
This diff is collapsed.
......@@ -51,10 +51,12 @@ private:
std::vector<size_t> _shape;
std::vector<ptrdiff_t> _strides;
infiniopTensorDescriptor_t _desc;
size_t _seed;
TensorDesc(infiniDtype_t dtype, const std::vector<size_t> &shape,
const std::vector<ptrdiff_t> &strides) : _dtype(dtype), _shape(shape), _strides(strides), _desc(nullptr) {}
const std::vector<ptrdiff_t> &strides) : _dtype(dtype), _shape(shape), _strides(strides), _desc(nullptr) { computeTensorDesHash(); }
void resetDesc();
void computeTensorDesHash();
public:
~TensorDesc();
......@@ -74,6 +76,7 @@ public:
infiniopTensorDescriptor_t desc() const;
bool isContigous() const;
std::string info() const;
size_t seed() const { return _seed; }
void dimMerge(size_t dim_start, size_t dim_end);
void dimSplit(size_t dim, const std::vector<size_t> &dims);
......@@ -83,7 +86,7 @@ public:
class Tensor : public std::enable_shared_from_this<Tensor> {
private:
std::shared_ptr<Storage> _storage;
std::shared_ptr<TensorDesc> _desc;
std::shared_ptr<const TensorDesc> _desc;
ptrdiff_t _offset;
......@@ -127,6 +130,11 @@ public:
void debug(const std::string &filename) const;
void debug() const;
std::string info() const;
size_t seed() const;
std::shared_ptr<Tensor> view(const std::vector<size_t> &new_shape) const;
std::shared_ptr<Tensor> view_as(const std::vector<size_t> &new_shape) const;
std::shared_ptr<Tensor> view_as(const std::vector<size_t> &new_shape, const std::vector<ptrdiff_t> &new_strides) const;
~Tensor();
};
......
......@@ -62,6 +62,16 @@ void TensorDesc::resetDesc() {
}
}
void TensorDesc::computeTensorDesHash() {
_seed = 0;
for (auto dim : this->shape()) {
hash_combine(_seed, dim);
}
for (auto stride : this->strides()) {
hash_combine(_seed, static_cast<size_t>(stride));
}
}
bool TensorDesc::isContigous() const {
auto ndim = this->ndim();
auto shape = this->shape();
......@@ -258,6 +268,86 @@ std::string Tensor::info() const {
return this->_desc->info();
}
size_t Tensor::seed() const {
return this->_desc->seed();
}
std::shared_ptr<Tensor> Tensor::view(const std::vector<size_t> &new_shape) const {
// Step 1: Validate total size
size_t numel = 1;
for (size_t dim : this->_desc->shape()) {
numel *= dim;
}
size_t new_numel = 1;
for (size_t dim : new_shape) {
new_numel *= dim;
}
ASSERT_EQ(numel, new_numel);
// Step 2: Get current shape and strides
const std::vector<size_t> &old_shape = this->_desc->shape();
const std::vector<ptrdiff_t> &old_strides = this->_desc->strides();
// Step 3: Create merged shape and strides
std::vector<size_t> merged_shape;
std::vector<ptrdiff_t> merged_strides;
if (!old_shape.empty()) {
merged_shape.push_back(old_shape[0]);
merged_strides.push_back(old_strides[0]);
for (size_t i = 1; i < old_shape.size(); ++i) {
if (old_strides[i] * static_cast<ptrdiff_t>(old_shape[i]) == merged_strides.back()) {
merged_shape.back() *= old_shape[i];
merged_strides.back() = old_strides[i];
} else {
merged_shape.push_back(old_shape[i]);
merged_strides.push_back(old_strides[i]);
}
}
}
// Step 4: Compute new strides by splitting merged dimensions
std::vector<ptrdiff_t> new_strides(new_shape.size());
size_t merged_idx = 0;
ptrdiff_t current_stride = merged_strides[0];
size_t remaining_size = merged_shape[0];
for (size_t i = 0; i < new_shape.size(); ++i) {
// Find which merged dimension contains this new dimension
while (new_shape[i] > remaining_size) {
ASSERT(++merged_idx < merged_shape.size());
current_stride = merged_strides[merged_idx];
remaining_size = merged_shape[merged_idx];
}
ASSERT_EQ(remaining_size % new_shape[i], 0);
new_strides[i] = current_stride * (remaining_size / new_shape[i]);
remaining_size /= new_shape[i];
}
return this->view_as(new_shape, new_strides);
}
std::shared_ptr<Tensor> Tensor::view_as(const std::vector<size_t> &new_shape) const {
std::shared_ptr<Tensor> tensor = std::make_shared<Tensor>();
tensor->_storage = this->_storage;
tensor->_desc = TensorDesc::create(this->dtype(), new_shape);
tensor->_offset = this->_offset;
return tensor;
}
std::shared_ptr<Tensor> Tensor::view_as(const std::vector<size_t> &new_shape, const std::vector<ptrdiff_t> &new_strides) const {
std::shared_ptr<Tensor> tensor = std::make_shared<Tensor>();
tensor->_storage = this->_storage;
tensor->_desc = TensorDesc::create(this->dtype(), new_shape, new_strides);
tensor->_offset = this->_offset;
return tensor;
}
void Tensor::debug(const std::string &filename) const {
RUN_INFINI(infinirtDeviceSynchronize());
......
......@@ -63,11 +63,18 @@ void TensorDesc::dimMerge(size_t dim_start, size_t dim_end) {
this->_shape = new_shape;
this->_strides = new_strides;
this->resetDesc();
this->computeTensorDesHash();
}
std::shared_ptr<Tensor> Tensor::dimMerge(size_t dim_start, size_t dim_end) {
this->_desc->dimMerge(dim_start, dim_end);
return shared_from_this();
auto new_desc = TensorDesc::create(_desc->dtype(), _desc->shape(), _desc->strides());
new_desc->dimMerge(dim_start, dim_end);
auto tensor = std::make_shared<Tensor>();
tensor->_storage = _storage;
tensor->_desc = new_desc;
tensor->_offset = _offset;
return tensor;
}
void TensorDesc::dimSplit(size_t dim, const std::vector<size_t> &dims) {
......@@ -89,11 +96,18 @@ void TensorDesc::dimSplit(size_t dim, const std::vector<size_t> &dims) {
this->_shape = new_shape;
this->_strides = new_strides;
this->resetDesc();
this->computeTensorDesHash();
}
std::shared_ptr<Tensor> Tensor::dimSplit(size_t dim, const std::vector<size_t> &dims) {
this->_desc->dimSplit(dim, dims);
return shared_from_this();
auto new_desc = TensorDesc::create(_desc->dtype(), _desc->shape(), _desc->strides());
new_desc->dimSplit(dim, dims);
auto tensor = std::make_shared<Tensor>();
tensor->_storage = _storage;
tensor->_desc = new_desc;
tensor->_offset = _offset;
return tensor;
}
void TensorDesc::permute(const std::vector<size_t> &order) {
......@@ -108,9 +122,16 @@ void TensorDesc::permute(const std::vector<size_t> &order) {
this->_shape = new_shape;
this->_strides = new_strides;
this->resetDesc();
this->computeTensorDesHash();
}
std::shared_ptr<Tensor> Tensor::permute(const std::vector<size_t> &order) {
this->_desc->permute(order);
return shared_from_this();
auto new_desc = TensorDesc::create(_desc->dtype(), _desc->shape(), _desc->strides());
new_desc->permute(order);
auto tensor = std::make_shared<Tensor>();
tensor->_storage = _storage;
tensor->_desc = new_desc;
tensor->_offset = _offset;
return tensor;
}
......@@ -119,4 +119,9 @@ inline uint16_t f32_to_bf16(float val) {
return bf16_bits;
}
// Hash combine utility (similar to boost::hash_combine)
inline void hash_combine(size_t &seed, size_t value) {
seed ^= value + 0x9e3779b9 + (seed << 6) + (seed >> 2);
}
#endif
......@@ -12,6 +12,7 @@ target("infinicore_infer")
set_languages("cxx17")
set_warnings("all", "error")
add_files("src/models/*.cpp")
add_files("src/models/*/*.cpp")
add_files("src/tensor/*.cpp")
add_files("src/allocator/*.cpp")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment