Unverified Commit f5e6d729 authored by thatPepe's avatar thatPepe Committed by GitHub
Browse files

Issue/497 - Enhanced Test Framework (#520)

* issue/497 - add dtype __eq__ and __hash__

* issue/497 - simplified infinicore test functions

* issue/497 - improved test framework

greatly reduced the code required for specific operators;
added strided tensor support;

* issue/497 - add add interface to assist test

* issue/497 - generalized test framework based on add

* issue/497 - support non-contiguous tensors in result comparison

* issue/497 - temporarily fixed strided tensor creation

* issue/497 - rms norm interface

* issue/497 - now requires test function definition

* issue/497 - support mixed dtype

* issue/497 - initial rms norm test

* issue/497 - unified in place and out of place tests

* issue/497 - renamed src/infinicore/op

* issue/497 - reduced comments

* issue/497 - attention

* issue/497 - removed generic parameter mapping

* issue/497 - temporary attention test

* issue/497 - captitalize op name initial

* issue/497 - add a script to run all op tests

* issue/497 - fix comments

* issue/497 - simplified infinicore tensor creation from torch

* issue/497 - support tensor init modes

* issue/497 - support tensor from/to files

* issue/497 - adjust naming
parent 37c76a90
#pragma once #pragma once
#include "op/matmul.hpp" #include "ops/add.hpp"
#include "op/ones.hpp" #include "ops/attention.hpp"
#include "op/rearrange.hpp" #include "ops/matmul.hpp"
#include "ops/ones.hpp"
#include "ops/rearrange.hpp"
#include "ops/rms_norm.hpp"
#pragma once
#include "../device.hpp"
#include "common/op.hpp"
namespace infinicore::op {
class Add {
public:
using schema = void (*)(Tensor, Tensor, Tensor);
static void execute(Tensor c, Tensor a, Tensor b);
static common::OpDispatcher<schema> &dispatcher();
};
Tensor add(Tensor a, Tensor b);
void add_(Tensor c, Tensor a, Tensor b);
Tensor operator+(Tensor a, Tensor b);
} // namespace infinicore::op
#pragma once
#include "../device.hpp"
#include "common/op.hpp"
namespace infinicore::op {
class Attention {
public:
using schema = void (*)(Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, size_t);
static void execute(Tensor out, Tensor q, Tensor k, Tensor v, Tensor k_cache, Tensor v_cache, size_t pos);
static common::OpDispatcher<schema> &dispatcher();
};
Tensor attention(Tensor q, Tensor k, Tensor v, Tensor k_cache, Tensor v_cache, size_t pos);
void attention_(Tensor out, Tensor q, Tensor k, Tensor v, Tensor k_cache, Tensor v_cache, size_t pos);
} // namespace infinicore::op
...@@ -8,19 +8,19 @@ namespace infinicore::op::common { ...@@ -8,19 +8,19 @@ namespace infinicore::op::common {
template <typename Fn> template <typename Fn>
class OpDispatcher { class OpDispatcher {
public: public:
void registerDevice(Device::Type device_type, Fn fn, bool override_existing=true) { void registerDevice(Device::Type device_type, Fn fn, bool override_existing = true) {
if (table_[(size_t)device_type] == nullptr || override_existing){ if (table_[(size_t)device_type] == nullptr || override_existing) {
table_[(size_t)device_type] = fn; table_[(size_t)device_type] = fn;
} }
} }
void registerDevice(std::initializer_list<Device::Type> device_types, Fn fn, bool override_existing=true) { void registerDevice(std::initializer_list<Device::Type> device_types, Fn fn, bool override_existing = true) {
for (auto device_type : device_types) { for (auto device_type : device_types) {
registerDevice(device_type, fn, override_existing); registerDevice(device_type, fn, override_existing);
} }
} }
void registerAll(Fn fn, bool override_existing=true) { void registerAll(Fn fn, bool override_existing = true) {
for (size_t device_type = 0; device_type < static_cast<size_t>(Device::Type::COUNT); ++device_type) { for (size_t device_type = 0; device_type < static_cast<size_t>(Device::Type::COUNT); ++device_type) {
registerDevice((Device::Type)device_type, fn, override_existing); registerDevice((Device::Type)device_type, fn, override_existing);
} }
......
#pragma once
#include "../device.hpp"
#include "common/op.hpp"
namespace infinicore::op {
class RMSNorm {
public:
using schema = void (*)(Tensor, Tensor, Tensor, float);
static void execute(Tensor y, Tensor x, Tensor weight, float epsilon = 1e-5f);
static common::OpDispatcher<schema> &dispatcher();
};
Tensor rms_norm(Tensor x, Tensor weight, float epsilon = 1e-5f);
void rms_norm_(Tensor y, Tensor x, Tensor weight, float epsilon = 1e-5f);
} // namespace infinicore::op
...@@ -25,8 +25,11 @@ from infinicore.dtype import ( ...@@ -25,8 +25,11 @@ from infinicore.dtype import (
uint8, uint8,
) )
from infinicore.ntops import use_ntops from infinicore.ntops import use_ntops
from infinicore.ops.add import add
from infinicore.ops.attention import attention
from infinicore.ops.matmul import matmul from infinicore.ops.matmul import matmul
from infinicore.ops.rearrange import rearrange from infinicore.ops.rearrange import rearrange
from infinicore.ops.rms_norm import rms_norm
from infinicore.tensor import ( from infinicore.tensor import (
empty, empty,
from_blob, from_blob,
...@@ -66,8 +69,11 @@ __all__ = [ ...@@ -66,8 +69,11 @@ __all__ = [
# `ntops` integration. # `ntops` integration.
"use_ntops", "use_ntops",
# Operations. # Operations.
"add",
"attention",
"matmul", "matmul",
"rearrange", "rearrange",
"rms_norm",
"empty", "empty",
"from_blob", "from_blob",
"ones", "ones",
......
...@@ -4,7 +4,6 @@ from infinicore.lib import _infinicore ...@@ -4,7 +4,6 @@ from infinicore.lib import _infinicore
class dtype: class dtype:
def __init__(self, data_type): def __init__(self, data_type):
"""An internal method. Please do not use this directly.""" """An internal method. Please do not use this directly."""
self._underlying = data_type self._underlying = data_type
def __repr__(self): def __repr__(self):
...@@ -29,9 +28,31 @@ class dtype: ...@@ -29,9 +28,31 @@ class dtype:
_infinicore.DataType.C128: "complex128", _infinicore.DataType.C128: "complex128",
_infinicore.DataType.BF16: "bfloat16", _infinicore.DataType.BF16: "bfloat16",
} }
return f"infinicore.{repr_map[self._underlying]}" return f"infinicore.{repr_map[self._underlying]}"
def __eq__(self, other):
"""
Compare two dtype objects for equality.
Args:
other: The object to compare with
Returns:
bool: True if both objects are dtype instances with the same underlying data type
"""
if not isinstance(other, dtype):
return False
return self._underlying == other._underlying
def __hash__(self):
"""
Return a hash value for the dtype object.
Returns:
int: Hash value based on the underlying data type
"""
return hash(self._underlying)
float32 = dtype(_infinicore.DataType.F32) float32 = dtype(_infinicore.DataType.F32)
float = float32 float = float32
......
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def add(input, other, *, out=None):
if out is None:
return Tensor(_infinicore.add(input._underlying, other._underlying))
_infinicore.add_(out._underlying, input._underlying, other._underlying)
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def attention(q, k, v, k_cache, v_cache, pos, *, out=None):
if out is None:
return Tensor(
_infinicore.attention(
q._underlying,
k._underlying,
v._underlying,
k_cache._underlying,
v_cache._underlying,
pos,
)
)
_infinicore.attention_(
out._underlying,
q._underlying,
k._underlying,
v._underlying,
k_cache._underlying,
v_cache._underlying,
pos,
)
from infinicore.lib import _infinicore
from infinicore.tensor import Tensor
def rms_norm(input, weight, epsilon=1e-5, *, out=None):
if out is None:
return Tensor(
_infinicore.rms_norm(input._underlying, weight._underlying, epsilon)
)
_infinicore.rms_norm_(
out._underlying, input._underlying, weight._underlying, epsilon
)
#include "infinicore/ops/add.hpp"
namespace infinicore::op {
common::OpDispatcher<Add::schema> &Add::dispatcher() {
static common::OpDispatcher<Add::schema> dispatcher_;
return dispatcher_;
};
void Add::execute(Tensor c, Tensor a, Tensor b) {
dispatcher().lookup(context::getDevice().getType())(c, a, b);
}
Tensor add(Tensor a, Tensor b) {
auto c = Tensor::empty(a->shape(), a->dtype(), a->device());
add_(c, a, b);
return c;
}
void add_(Tensor c, Tensor a, Tensor b) {
Add::execute(c, a, b);
}
} // namespace infinicore::op
#include "../../utils.hpp"
#include "infinicore/common/hash.hpp"
#include "infinicore/ops/add.hpp"
#include "infinicore/ops/common/cache.hpp"
#include <infiniop.h>
namespace infinicore::op::add_impl::infiniop {
thread_local common::OpCache<size_t, infiniopAddDescriptor_t> caches(
100, // capacity
[](infiniopAddDescriptor_t &desc) {
if (desc != nullptr) {
INFINICORE_CHECK_ERROR(infiniopDestroyAddDescriptor(desc));
desc = nullptr;
}
});
void calculate(Tensor c, Tensor a, Tensor b) {
size_t seed = hash_combine(c, b, a);
auto device_type = context::getDevice().getType();
auto device_index = context::getDevice().getIndex();
auto &cache = caches.getCache(device_type, device_index);
auto desc_opt = cache.get(seed);
infiniopAddDescriptor_t desc = nullptr;
if (!desc_opt) {
INFINICORE_CHECK_ERROR(infiniopCreateAddDescriptor(
context::getInfiniopHandle(), &desc,
c->desc(), a->desc(), b->desc()));
cache.put(seed, desc);
} else {
desc = *desc_opt;
}
size_t workspace_size = 0;
INFINICORE_CHECK_ERROR(infiniopGetAddWorkspaceSize(desc, &workspace_size));
std::shared_ptr<Memory> workspace = context::allocateMemory(workspace_size);
INFINICORE_CHECK_ERROR(infiniopAdd(
desc, workspace->data(), workspace_size,
c->data(), a->data(), b->data(), context::getStream()));
}
static bool registered = []() {
Add::dispatcher().registerAll(&calculate, false);
return true;
}();
} // namespace infinicore::op::add_impl::infiniop
#include "infinicore/ops/attention.hpp"
namespace infinicore::op {
common::OpDispatcher<Attention::schema> &Attention::dispatcher() {
static common::OpDispatcher<Attention::schema> dispatcher_;
return dispatcher_;
};
void Attention::execute(Tensor out, Tensor q, Tensor k, Tensor v, Tensor k_cache, Tensor v_cache, size_t pos) {
dispatcher().lookup(context::getDevice().getType())(out, q, k, v, k_cache, v_cache, pos);
}
Tensor attention(Tensor q, Tensor k, Tensor v, Tensor k_cache, Tensor v_cache, size_t pos) {
size_t n_q_head = q->shape()[0];
size_t seq_len = q->shape()[1];
size_t head_dim = q->shape()[2];
Shape shape = {seq_len, n_q_head, head_dim};
auto out = Tensor::empty(shape, q->dtype(), q->device());
attention_(out, q, k, v, k_cache, v_cache, pos);
return out;
}
void attention_(Tensor out, Tensor q, Tensor k, Tensor v, Tensor k_cache, Tensor v_cache, size_t pos) {
Attention::execute(out, q, k, v, k_cache, v_cache, pos);
}
} // namespace infinicore::op
#include "../../utils.hpp"
#include "infinicore/common/hash.hpp"
#include "infinicore/ops/attention.hpp"
#include "infinicore/ops/common/cache.hpp"
#include <infiniop.h>
namespace infinicore::op::attention_impl::infiniop {
thread_local common::OpCache<size_t, infiniopAttentionDescriptor_t> caches(
100, // capacity
[](infiniopAttentionDescriptor_t &desc) {
if (desc != nullptr) {
INFINICORE_CHECK_ERROR(infiniopDestroyAttentionDescriptor(desc));
desc = nullptr;
}
});
void calculate(Tensor out, Tensor q, Tensor k, Tensor v, Tensor k_cache, Tensor v_cache, size_t pos) {
size_t seed = hash_combine(out, q, k, v, k_cache, v_cache, pos);
auto device_type = context::getDevice().getType();
auto device_index = context::getDevice().getIndex();
auto &cache = caches.getCache(device_type, device_index);
auto desc_opt = cache.get(seed);
infiniopAttentionDescriptor_t desc = nullptr;
if (!desc_opt) {
INFINICORE_CHECK_ERROR(infiniopCreateAttentionDescriptor(
context::getInfiniopHandle(), &desc,
out->desc(), q->desc(), k->desc(), v->desc(),
k_cache->desc(), v_cache->desc(), pos));
cache.put(seed, desc);
} else {
desc = *desc_opt;
}
size_t workspace_size = 0;
INFINICORE_CHECK_ERROR(infiniopGetAttentionWorkspaceSize(desc, &workspace_size));
std::shared_ptr<Memory> workspace = context::allocateMemory(workspace_size);
INFINICORE_CHECK_ERROR(infiniopAttention(
desc, workspace->data(), workspace_size,
out->data(), q->data(), k->data(), v->data(),
k_cache->data(), v_cache->data(), context::getStream()));
}
static bool registered = []() {
Attention::dispatcher().registerAll(&calculate, false);
return true;
}();
} // namespace infinicore::op::attention_impl::infiniop
#include "infinicore/op/matmul.hpp" #include "infinicore/ops/matmul.hpp"
namespace infinicore::op { namespace infinicore::op {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment