".github/git@developer.sourcefind.cn:tsoc/superbenchmark.git" did not exist on "81a4146bc1c5f10f1f8cd6862db4f9524966d705"
Unverified Commit e950d3fd authored by PanZezhong1725's avatar PanZezhong1725 Committed by GitHub
Browse files

Merge pull request #94 from PanZezhong1725/issue/68

issue/68 测试框架,matmul测例生成
parents b394e3d6 b03d744c
...@@ -41,6 +41,10 @@ jobs: ...@@ -41,6 +41,10 @@ jobs:
if: matrix.os != 'windows-latest' if: matrix.os != 'windows-latest'
run: xmake install run: xmake install
- name: build infiniop-test
if: matrix.os != 'windows-latest'
run: xmake build infiniop-test
- name: python test - name: python test
if: matrix.os != 'windows-latest' if: matrix.os != 'windows-latest'
run: | run: |
......
...@@ -19,3 +19,6 @@ cache/ ...@@ -19,3 +19,6 @@ cache/
# JSON # JSON
*.json *.json
#GGUF
*.gguf
#ifndef __INFINIOPTEST_FILE_MAPPING_HPP__
#define __INFINIOPTEST_FILE_MAPPING_HPP__
#ifdef _WIN32 // windows
#include <windows.h>
#else // linux
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
#endif
#include <cstddef>
#include <memory>
#include <string>
class FileMapping {
private:
void *_ptr;
size_t _size;
#ifdef _WIN32
HANDLE _file_handle = NULL;
HANDLE _file_mapping = NULL;
#endif
public:
FileMapping(const std::string &filepath);
~FileMapping();
void *ptr() const;
size_t size() const;
};
#endif // __INFINIOPTEST_FILE_MAPPING_HPP__
#ifndef __INFINIOPTEST_GGUF_HPP__
#define __INFINIOPTEST_GGUF_HPP__
#include "file_mapping.hpp"
#include <cstdint>
#include <memory>
#include <stdexcept>
#include <string>
#include <unordered_map>
#include <vector>
#ifdef _WIN32
#include <windows.h>
#endif
typedef enum {
GGUF_TYPE_UINT8 = 0,
GGUF_TYPE_INT8 = 1,
GGUF_TYPE_UINT16 = 2,
GGUF_TYPE_INT16 = 3,
GGUF_TYPE_UINT32 = 4,
GGUF_TYPE_INT32 = 5,
GGUF_TYPE_FLOAT32 = 6,
GGUF_TYPE_BOOL = 7,
GGUF_TYPE_STRING = 8,
GGUF_TYPE_ARRAY = 9,
GGUF_TYPE_UINT64 = 10,
GGUF_TYPE_INT64 = 11,
GGUF_TYPE_FLOAT64 = 12,
GGUF_TYPE_COUNT, // marks the end of the enum
} GGUF_TYPE;
constexpr const char *GGUF_TYPE_NAME[GGUF_TYPE_COUNT] = {
"GGUF_TYPE_UINT8",
"GGUF_TYPE_INT8",
"GGUF_TYPE_UINT16",
"GGUF_TYPE_INT16",
"GGUF_TYPE_UINT32",
"GGUF_TYPE_INT32",
"GGUF_TYPE_FLOAT32",
"GGUF_TYPE_BOOL",
"GGUF_TYPE_STRING",
"GGUF_TYPE_ARRAY",
"GGUF_TYPE_UINT64",
"GGUF_TYPE_INT64",
"GGUF_TYPE_FLOAT64",
};
struct gguf_str {
uint64_t n;
char *data;
};
static const size_t GGUF_TYPE_SIZE[GGUF_TYPE_COUNT] = {
sizeof(uint8_t), // GGUF_TYPE_UINT8
sizeof(int8_t), // GGUF_TYPE_INT8
sizeof(uint16_t), // GGUF_TYPE_UINT16
sizeof(int16_t), // GGUF_TYPE_INT16
sizeof(uint32_t), // GGUF_TYPE_UINT32
sizeof(int32_t), // GGUF_TYPE_INT32
sizeof(float), // GGUF_TYPE_FLOAT32
sizeof(bool), // GGUF_TYPE_BOOL
sizeof(gguf_str), // GGUF_TYPE_STRING
0, // GGUF_TYPE_ARRAY (undefined)
sizeof(uint64_t), // GGUF_TYPE_UINT64
sizeof(int64_t), // GGUF_TYPE_INT64
sizeof(double), // GGUF_TYPE_FLOAT64
};
inline std::string ggufDataToString(const uint8_t *data, GGUF_TYPE gguf_type) {
switch (gguf_type) {
#define RETURN_GGUF_DATA(CASE, CTYPE) \
case CASE: \
return std::to_string(*reinterpret_cast<const CTYPE *>(data));
RETURN_GGUF_DATA(GGUF_TYPE_UINT8, uint8_t)
RETURN_GGUF_DATA(GGUF_TYPE_INT8, int8_t)
RETURN_GGUF_DATA(GGUF_TYPE_UINT16, uint16_t)
RETURN_GGUF_DATA(GGUF_TYPE_INT16, int16_t)
RETURN_GGUF_DATA(GGUF_TYPE_UINT32, uint32_t)
RETURN_GGUF_DATA(GGUF_TYPE_INT32, int32_t)
RETURN_GGUF_DATA(GGUF_TYPE_FLOAT32, float)
RETURN_GGUF_DATA(GGUF_TYPE_BOOL, bool)
RETURN_GGUF_DATA(GGUF_TYPE_UINT64, uint64_t)
RETURN_GGUF_DATA(GGUF_TYPE_INT64, int64_t)
RETURN_GGUF_DATA(GGUF_TYPE_FLOAT64, double)
RETURN_GGUF_DATA(GGUF_TYPE_STRING, char)
case GGUF_TYPE_ARRAY:
throw std::runtime_error("GGUF_TYPE_ARRAY should be processed element by element");
default:
return "GGUF_TYPE_UNKNOWN";
}
#undef RETURN_GGUF_DATA
}
struct GGUFKeyValue {
std::string key;
GGUF_TYPE gguf_type; // gguf_type
std::vector<uint8_t> value;
std::string toString() const;
};
typedef enum {
GGML_TYPE_F32 = 0,
GGML_TYPE_F16 = 1,
GGML_TYPE_Q4_0 = 2,
GGML_TYPE_Q4_1 = 3,
GGML_TYPE_Q5_0 = 6,
GGML_TYPE_Q5_1 = 7,
GGML_TYPE_Q8_0 = 8,
GGML_TYPE_Q8_1 = 9,
GGML_TYPE_Q2_K = 10,
GGML_TYPE_Q3_K = 11,
GGML_TYPE_Q4_K = 12,
GGML_TYPE_Q5_K = 13,
GGML_TYPE_Q6_K = 14,
GGML_TYPE_Q8_K = 15,
GGML_TYPE_IQ2_XXS = 16,
GGML_TYPE_IQ2_XS = 17,
GGML_TYPE_IQ3_XXS = 18,
GGML_TYPE_IQ1_S = 19,
GGML_TYPE_IQ4_NL = 20,
GGML_TYPE_IQ3_S = 21,
GGML_TYPE_IQ2_S = 22,
GGML_TYPE_IQ4_XS = 23,
GGML_TYPE_I8 = 24,
GGML_TYPE_I16 = 25,
GGML_TYPE_I32 = 26,
GGML_TYPE_I64 = 27,
GGML_TYPE_F64 = 28,
GGML_TYPE_IQ1_M = 29,
GGML_TYPE_BF16 = 30,
GGML_TYPE_TQ1_0 = 34,
GGML_TYPE_TQ2_0 = 35,
GGML_TYPE_COUNT = 36,
} GGML_TYPE;
inline size_t ggmlTypeSize(GGML_TYPE ggml_type) {
switch (ggml_type) {
case GGML_TYPE_F32:
return 4;
case GGML_TYPE_F16:
return 2;
case GGML_TYPE_I8:
return 1;
case GGML_TYPE_I16:
return 2;
case GGML_TYPE_I32:
return 4;
case GGML_TYPE_I64:
return 8;
case GGML_TYPE_F64:
return 8;
case GGML_TYPE_BF16:
return 2;
default:
throw std::runtime_error("GGML_TYPE_SIZE: Unsupported GGML_TYPE");
}
return 0;
}
constexpr const char *GGML_TYPE_NAME[GGML_TYPE_COUNT] = {
"F32",
"F16",
"Q4_0",
"Q4_1",
nullptr, // 4 (gap)
nullptr, // 5 (gap)
"Q5_0",
"Q5_1",
"Q8_0",
"Q8_1",
"Q2_K",
"Q3_K",
"Q4_K",
"Q5_K",
"Q6_K",
"Q8_K",
"IQ2_XXS",
"IQ2_XS",
"IQ3_XXS",
"IQ1_S",
"IQ4_NL",
"IQ3_S",
"IQ2_S",
"IQ4_XS",
"I8",
"I16",
"I32",
"I64",
"F64",
"IQ1_M",
"BF16",
nullptr, // 31 (gap)
nullptr, // 32 (gap)
nullptr, // 33 (gap)
"TQ1_0",
"TQ2_0",
};
struct GGUFTensorInfo {
std::string name;
uint32_t ndim;
std::vector<int64_t> shape;
GGML_TYPE ggml_type;
uint64_t data_offset;
std::string toString() const;
};
class GGUFFileReader {
public:
GGUFFileReader(const std::string &filepath);
~GGUFFileReader() = default;
std::string toString() const;
const std::unordered_map<std::string, std::shared_ptr<GGUFKeyValue>> &getAttributeMap() const;
const std::unordered_map<std::string, std::shared_ptr<GGUFTensorInfo>> &getTensorInfoMap() const;
std::shared_ptr<FileMapping> getFileMapping() const { return _file; }
void *getGgmlStart() const { return _cursor; }
private:
void readHeader();
void readMetaKVs();
void readTensorInfos();
std::string readString();
template <typename T>
T read();
std::shared_ptr<FileMapping> _file;
void *_data = nullptr;
uint8_t *_cursor = nullptr;
uint32_t _version;
int64_t _num_tensors;
int64_t _num_meta_kvs;
std::vector<std::shared_ptr<GGUFKeyValue>> _meta_kvs;
std::vector<std::shared_ptr<GGUFTensorInfo>> _tensor_infos;
std::unordered_map<std::string, std::shared_ptr<GGUFKeyValue>> _attributes_map;
std::unordered_map<std::string, std::shared_ptr<GGUFTensorInfo>> _tensors_info_map;
};
#endif
#ifndef __INFINIOPTEST_OPS_HPP__
#define __INFINIOPTEST_OPS_HPP__
#include "test.hpp"
/*
* Declare all the tests here
*/
DECLARE_INFINIOP_TEST(matmul)
#define REGISTER_INFINIOP_TEST(name) \
{ \
#name, \
{ infiniop_test::name::Test::build, \
infiniop_test::name::Test::attribute_names(), \
infiniop_test::name::Test::tensor_names() } \
}
/*
* Register all the tests here
*/
#define TEST_BUILDER_MAPPINGS \
{ \
REGISTER_INFINIOP_TEST(matmul), \
}
namespace infiniop_test {
// Global variable for {op_name: builder} mappings
extern std::unordered_map<std::string, const TestBuilder> TEST_BUILDERS;
} // namespace infiniop_test
#endif
#ifndef __INFINIOPTEST_TENSOR_HPP__
#define __INFINIOPTEST_TENSOR_HPP__
#include "file_mapping.hpp"
#include "gguf.hpp"
#include <infiniop.h>
inline infiniDtype_t ggmlTypeToInfiniType(GGML_TYPE type) {
switch (type) {
case GGML_TYPE_I8:
return INFINI_DTYPE_I8;
case GGML_TYPE_I16:
return INFINI_DTYPE_I16;
case GGML_TYPE_I32:
return INFINI_DTYPE_I32;
case GGML_TYPE_I64:
return INFINI_DTYPE_I64;
case GGML_TYPE_F16:
return INFINI_DTYPE_F16;
case GGML_TYPE_BF16:
return INFINI_DTYPE_BF16;
case GGML_TYPE_F32:
return INFINI_DTYPE_F32;
case GGML_TYPE_F64:
return INFINI_DTYPE_F64;
default:
throw std::runtime_error("Unsupported GGML type");
}
}
namespace infiniop_test {
class Memory {
private:
void *_ptr;
size_t _size;
infiniDevice_t _device;
int _device_id;
std::shared_ptr<FileMapping> _file_mapping;
public:
Memory(size_t size, infiniDevice_t device, int device_id);
Memory(const std::shared_ptr<FileMapping> &file_mapping, void *ptr, size_t size);
~Memory();
void *ptr() const { return _ptr; }
size_t size() const { return _size; }
infiniDevice_t device() const { return _device; }
int device_id() const { return _device_id; }
};
class Tensor {
private:
infiniopTensorDescriptor_t _desc;
std::shared_ptr<Memory> _memory;
std::vector<size_t> _shape;
std::vector<ptrdiff_t> _strides;
size_t _offset;
GGML_TYPE _ggml_type;
public:
Tensor(const GGUFTensorInfo *info,
const void *ggml_ptr,
const GGUFKeyValue *strides_meta = nullptr);
Tensor(std::shared_ptr<Memory> memory, size_t offset,
const std::vector<size_t> &shape,
const std::vector<ptrdiff_t> &strides,
GGML_TYPE dtype);
~Tensor();
infiniopTensorDescriptor_t desc() const { return _desc; }
std::vector<size_t> shape() const { return std::vector<size_t>(_shape); }
std::vector<ptrdiff_t> strides() const { return std::vector<ptrdiff_t>(_strides); }
GGML_TYPE ggml_type() const { return _ggml_type; }
void *data() const;
std::shared_ptr<Tensor> to(infiniDevice_t device, int device_id = 0) const;
std::string info() const;
void debug() const;
};
} // namespace infiniop_test
#endif
#ifndef __INFINIOPTEST_HPP__
#define __INFINIOPTEST_HPP__
#include "gguf.hpp"
#include "tensor.hpp"
#include <functional>
#include <sstream>
#include <unordered_map>
#include <vector>
#define RESET "\033[0m"
#define GREEN "\033[32m"
#define RED "\033[31m"
#define YELLOW "\033[33m"
namespace infiniop_test {
enum class TestStatus {
PASS,
TEST_INIT_FAILED,
OP_CREATION_FAILED,
OP_EXECUTION_FAILED,
RESULT_INCORRECT,
};
// Result of a testcase
class Result {
private:
TestStatus _status;
double _time = 0.;
std::string _description;
std::string _error_message;
public:
Result(TestStatus status_, double time_, const std::string &description_, const std::string &error_message_)
: _status(status_), _time(time_), _description(description_), _error_message(error_message_) {}
bool isPassed() const { return _status == TestStatus::PASS; }
std::string toString() const;
};
// Quick macro for creating a test result
#define TEST_PASSED(delay) std::make_shared<infiniop_test::Result>(infiniop_test::TestStatus::PASS, delay, toString(), "")
#define TEST_FAILED(reason, msg) std::make_shared<infiniop_test::Result>(infiniop_test::TestStatus::reason, 0., toString(), msg)
#define TEST_INIT_FAILED(op_name) std::make_shared<infiniop_test::Result>(infiniop_test::TestStatus::TEST_INIT_FAILED, 0., "Invalid " + std::string(op_name), "")
// Run all tests read from a GGUF file
std::vector<std::shared_ptr<Result>> runAllTests(
const GGUFFileReader &,
infiniDevice_t device, int device_id,
size_t warm_ups, size_t iterations,
double rtol, double atol);
// Run a single test read from a GGUF file
std::shared_ptr<Result> runTest(
const GGUFFileReader &,
infiniDevice_t device, int device_id,
size_t warm_ups, size_t iterations,
double rtol, double atol,
size_t test_id);
// Check if two tensors are close within given tolerance
void allClose(std::shared_ptr<Tensor> actual, std::shared_ptr<Tensor> expected, double rtol = 1e-3, double atol = 1e-3);
// Helper function for benchmarking a function
double benchmark(std::function<void()> func, size_t warmups, size_t iterations);
} // namespace infiniop_test
namespace infiniop_test::base {
// Base class for a testcase, each operator test should inherit from this class
class Test {
public:
virtual std::shared_ptr<infiniop_test::Result> run(
infiniopHandle_t handle, infiniDevice_t device, int device_id,
size_t warm_ups, size_t iterations)
= 0;
virtual std::string toString() const = 0;
};
} // namespace infiniop_test::base
// Quick macro for declaring a new testcase
#define DECLARE_INFINIOP_TEST(name) \
namespace infiniop_test::name { \
class Test : public infiniop_test::base::Test { \
double _rtol, _atol; \
\
public: \
static std::string op_name() { return #name; } \
static std::shared_ptr<Test> build( \
std::unordered_map<std::string, std::vector<uint8_t>> attributes, \
std::unordered_map<std::string, std::shared_ptr<Tensor>> tensors, \
double, double); \
\
static std::vector<std::string> attribute_names(); \
static std::vector<std::string> tensor_names(); \
\
std::shared_ptr<infiniop_test::Result> run( \
infiniopHandle_t handle, infiniDevice_t device, int device_id, \
size_t warm_ups, size_t iterations) override; \
\
std::string toString() const override; \
\
~Test(); \
\
private: \
struct Attributes; \
Attributes *_attributes; \
Test() = delete; \
Test(double rtol, double atol) : _rtol(rtol), _atol(atol) {} \
}; \
}
namespace infiniop_test {
using BuilderFunc = std::function<std::shared_ptr<infiniop_test::base::Test>(
std::unordered_map<std::string, std::vector<uint8_t>>,
std::unordered_map<std::string, std::shared_ptr<Tensor>>,
double, double)>;
// Testcase Registry
// Each testcase should provid a formatted builder, attribute names, and tensor names
struct TestBuilder {
BuilderFunc build;
std::vector<std::string> attribute_names;
std::vector<std::string> tensor_names;
};
} // namespace infiniop_test
#endif
#ifndef __INFINIOPTEST_UTILS_HPP__
#define __INFINIOPTEST_UTILS_HPP__
#include "../../utils.h"
#include "gguf.hpp"
#include <cstring>
#include <iostream>
#define CHECK_OR(cmd, action) CHECK_API_OR(cmd, INFINI_STATUS_SUCCESS, action)
inline double getVal(void *ptr, GGML_TYPE ggml_type) {
switch (ggml_type) {
case GGML_TYPE_F16:
return utils::cast<double>(*(fp16_t *)ptr);
case GGML_TYPE_F32:
return *(float *)ptr;
case GGML_TYPE_F64:
return *(double *)ptr;
case GGML_TYPE_I8:
return *(int8_t *)ptr;
case GGML_TYPE_I16:
return *(int16_t *)ptr;
case GGML_TYPE_I32:
return *(int32_t *)ptr;
case GGML_TYPE_I64:
return (double)(*(int64_t *)ptr);
default:
throw std::runtime_error("Unsupported data type");
}
}
#endif
#include "file_mapping.hpp"
#include <stdexcept>
FileMapping::FileMapping(const std::string &filepath) {
#ifdef _WIN32
_file_handle = CreateFile(filepath.c_str(), GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL);
if (_file_handle == INVALID_HANDLE_VALUE) {
throw std::runtime_error("Failed to open GGUF file");
}
_file_mapping = CreateFileMapping(_file_handle, NULL, PAGE_READONLY, 0, 0, NULL);
if (!_file_mapping) {
CloseHandle(_file_handle);
throw std::runtime_error("Failed to create file mapping");
}
_ptr = MapViewOfFile(_file_mapping, FILE_MAP_READ, 0, 0, 0);
if (!_ptr) {
CloseHandle(_file_mapping);
CloseHandle(_file_handle);
throw std::runtime_error("Failed to map view of file");
}
_size = GetFileSize(_file_handle, NULL);
#else
int fd = open(filepath.c_str(), O_RDONLY);
if (fd == -1) {
throw std::runtime_error("Failed to open GGUF file");
}
struct stat sb;
if (fstat(fd, &sb) == -1) {
close(fd);
throw std::runtime_error("Failed to get file size");
}
_size = sb.st_size;
_ptr = mmap(NULL, _size, PROT_READ, MAP_PRIVATE, fd, 0);
close(fd);
if (_ptr == MAP_FAILED) {
throw std::runtime_error("Failed to mmap file");
}
#endif
}
FileMapping::~FileMapping() {
#ifdef _WIN32
if (_ptr) {
UnmapViewOfFile(_ptr);
}
if (_file_mapping) {
CloseHandle(_file_mapping);
}
if (_file_handle) {
CloseHandle(_file_handle);
}
#else
if (_ptr) {
munmap(_ptr, _size);
}
#endif
}
void *FileMapping::ptr() const {
return _ptr;
}
size_t FileMapping::size() const {
return _size;
}
#include "gguf.hpp"
#include <cstring>
#include <fstream>
#include <iostream>
#include <sstream>
#include <stdexcept>
#ifdef _WIN32
#include <windows.h>
#else
#include <sys/mman.h>
#include <sys/stat.h>
#include <unistd.h>
#endif
std::string GGUFKeyValue::toString() const {
std::ostringstream oss;
oss << "Key: " << key << ", Type: " << GGUF_TYPE_NAME[gguf_type] << ", Value: ";
if (gguf_type == GGUF_TYPE_STRING) {
std::string str(value.begin(), value.end());
oss << str;
} else if (value.size() > GGUF_TYPE_SIZE[gguf_type]) {
oss << "[";
for (size_t i = 0; i < value.size() / GGUF_TYPE_SIZE[gguf_type]; ++i) {
oss << ggufDataToString(value.data() + i * GGUF_TYPE_SIZE[gguf_type], gguf_type);
if (i < value.size() / GGUF_TYPE_SIZE[gguf_type] - 1) {
oss << ", ";
}
}
oss << "]";
} else {
oss << ggufDataToString(value.data(), gguf_type);
}
return oss.str();
}
std::string GGUFTensorInfo::toString() const {
std::ostringstream oss;
oss << "Name: " << name << ", NDims: " << ndim << ", Shape: [";
for (size_t i = 0; i < shape.size(); ++i) {
oss << shape[i];
if (i < shape.size() - 1) {
oss << ", ";
}
}
oss << "], DataType: " << GGML_TYPE_NAME[ggml_type] << ", DataOffset: " << data_offset;
return oss.str();
}
GGUFFileReader::GGUFFileReader(const std::string &filepath) {
try {
_file = std::make_shared<FileMapping>(filepath);
} catch (const std::exception &e) {
throw e;
}
_data = _file->ptr();
_cursor = reinterpret_cast<uint8_t *>(_data);
readHeader();
readMetaKVs();
readTensorInfos();
size_t padding = (size_t)(32 - ((char *)_cursor - (char *)_data) % 32) % 32;
_cursor += padding;
}
const std::unordered_map<std::string, std::shared_ptr<GGUFKeyValue>> &
GGUFFileReader::getAttributeMap() const {
return _attributes_map;
}
const std::unordered_map<std::string, std::shared_ptr<GGUFTensorInfo>> &
GGUFFileReader::getTensorInfoMap() const {
return _tensors_info_map;
}
void GGUFFileReader::readHeader() {
if (std::memcmp(_cursor, "GGUF", 4) != 0) {
throw std::runtime_error("Invalid GGUF magic");
}
_cursor += 4;
_version = read<uint32_t>();
_num_tensors = read<int64_t>();
_num_meta_kvs = read<int64_t>();
_attributes_map = std::unordered_map<std::string, std::shared_ptr<GGUFKeyValue>>();
_tensors_info_map = std::unordered_map<std::string, std::shared_ptr<GGUFTensorInfo>>();
}
void GGUFFileReader::readMetaKVs() {
for (int64_t i = 0; i < _num_meta_kvs; ++i) {
auto kv = std::make_shared<GGUFKeyValue>();
kv->key = readString();
kv->gguf_type = read<GGUF_TYPE>();
if (kv->gguf_type == GGUF_TYPE_ARRAY) {
GGUF_TYPE array_type = read<GGUF_TYPE>();
uint64_t array_size = read<uint64_t>();
kv->value.resize(array_size * GGUF_TYPE_SIZE[array_type]);
kv->gguf_type = array_type;
std::memcpy(kv->value.data(), _cursor, kv->value.size());
_cursor += kv->value.size();
} else if (kv->gguf_type == GGUF_TYPE_STRING) {
uint64_t str_size = read<uint64_t>();
kv->value.resize(str_size);
std::memcpy(kv->value.data(), _cursor, str_size);
_cursor += str_size;
} else {
kv->value.resize(GGUF_TYPE_SIZE[kv->gguf_type]);
std::memcpy(kv->value.data(), _cursor, kv->value.size());
_cursor += kv->value.size();
}
_meta_kvs.push_back(kv);
_attributes_map.emplace(kv->key, kv);
}
}
void GGUFFileReader::readTensorInfos() {
for (int64_t i = 0; i < _num_tensors; ++i) {
auto tensor_info = std::make_shared<GGUFTensorInfo>();
tensor_info->name = readString();
tensor_info->ndim = read<uint32_t>();
tensor_info->shape.resize(tensor_info->ndim);
for (size_t j = 0; j < tensor_info->ndim; ++j) {
tensor_info->shape[j] = read<int64_t>();
}
tensor_info->ggml_type = read<GGML_TYPE>();
tensor_info->data_offset = read<uint64_t>();
_tensor_infos.push_back(tensor_info);
_tensors_info_map.emplace(tensor_info->name, tensor_info);
}
}
std::string GGUFFileReader::readString() {
uint64_t length = read<uint64_t>();
std::string str(reinterpret_cast<const char *>(_cursor), length);
_cursor += length;
return str;
}
template <typename T>
T GGUFFileReader::read() {
T value;
std::memcpy(&value, _cursor, sizeof(T));
_cursor += sizeof(T);
return value;
}
std::string GGUFFileReader::toString() const {
std::ostringstream oss;
oss << "GGUF File Contents: " << std::endl;
oss << "Version: " << _version << std::endl;
oss << "Number of Meta KVs: " << _num_meta_kvs << std::endl;
oss << "Number of Tensors: " << _num_tensors << std::endl
<< std::endl;
oss << "Meta KVs: " << std::endl;
for (const auto &kv : _meta_kvs) {
oss << kv->toString() << std::endl;
}
oss << std::endl;
oss << "Tensor INFOs: " << std::endl;
for (const auto &info : _tensor_infos) {
oss << info->toString() << std::endl;
}
return oss.str();
}
#include "gguf.hpp"
#include "test.hpp"
#include <infinirt.h>
#include <iostream>
struct ParsedArgs {
std::string file_path; // Mandatory argument: test.gguf file path
infiniDevice_t device_type = INFINI_DEVICE_CPU; // Default to CPU
int device_id = 0; // CUDA device ID (if specified)
int warmups = 0; // Default to 0 if not given
int iterations = 0; // Default to 0 if not given
double atol = 0.001; // Default absolute tolerance
double rtol = 0.001; // Default relative tolerance
};
void printUsage() {
std::cout << "Usage:" << std::endl
<< std::endl;
std::cout << "infiniop-test <test.gguf> [--<device>[:id]] [--warmup <warmups>] [--run <iterations>] [--atol <atol>] [--rtol <rtol>]" << std::endl
<< std::endl;
std::cout << " <test.gguf>>" << std::endl;
std::cout << " Path to the test gguf file" << std::endl
<< std::endl;
std::cout << " --<device>[:id]" << std::endl;
std::cout << " (Optional) Specify the device type --(cpu|nvidia|cambricon|ascend|metax|moore|iluvatar|kunlun|sugon) and device ID (optional). CPU by default." << std::endl
<< std::endl;
std::cout << " --warmup <warmups>" << std::endl;
std::cout << " (Optional) Number of warmups to perform before timing. Default to 0." << std::endl
<< std::endl;
std::cout << " --run <iterations>" << std::endl;
std::cout << " (Optional) Number of iterations to perform for timing. Default to 0." << std::endl
<< std::endl;
std::cout << " --atol <absolute_tolerance>" << std::endl;
std::cout << " (Optional) Absolute tolerance for correctness check. Default to 0.001" << std::endl
<< std::endl;
std::cout << " --rtol <relative_tolerance>" << std::endl;
std::cout << " (Optional) Relative tolerance for correctness check. Default to 0.001" << std::endl
<< std::endl;
exit(-1);
}
#define PARSE_DEVICE(FLAG, DEVICE) \
else if (arg.find(FLAG) == 0) { \
size_t colon_pos = arg.find(':'); \
args.device_type = DEVICE; \
if (colon_pos != std::string::npos) { \
args.device_id = std::stoi(arg.substr(colon_pos + 1)); \
} else { \
args.device_id = 0; \
} \
}
ParsedArgs parseArgs(int argc, char *argv[]) {
if (argc < 2) {
printUsage();
}
if (std::string(argv[1]) == "--help" || std::string(argv[1]) == "-h") {
printUsage();
}
ParsedArgs args;
args.file_path = argv[1]; // First argument is always the test.gguf file
std::unordered_map<std::string, std::string> options;
try {
for (int i = 2; i < argc; ++i) {
std::string arg = argv[i];
if (arg.find("--cpu") == 0) {
args.device_id = 0;
}
PARSE_DEVICE("--nvidia", INFINI_DEVICE_NVIDIA)
PARSE_DEVICE("--cambricon", INFINI_DEVICE_CAMBRICON)
PARSE_DEVICE("--ascend", INFINI_DEVICE_ASCEND)
PARSE_DEVICE("--metax", INFINI_DEVICE_METAX)
PARSE_DEVICE("--moore", INFINI_DEVICE_MOORE)
PARSE_DEVICE("--iluvatar", INFINI_DEVICE_ILUVATAR)
PARSE_DEVICE("--kunlun", INFINI_DEVICE_KUNLUN)
PARSE_DEVICE("--sugon", INFINI_DEVICE_SUGON)
else if (arg == "--warmup" && i + 1 < argc) {
args.warmups = std::stoi(argv[++i]);
}
else if (arg == "--run" && i + 1 < argc) {
args.iterations = std::stoi(argv[++i]);
}
else if (arg == "--atol" && i + 1 < argc) {
args.atol = std::stod(argv[++i]);
}
else if (arg == "--rtol" && i + 1 < argc) {
args.rtol = std::stod(argv[++i]);
}
else {
printUsage();
}
}
} catch (const std::exception &) {
printUsage();
}
return args;
}
int main(int argc, char *argv[]) {
ParsedArgs args = parseArgs(argc, argv);
int failed = 0;
try {
std::cout << args.file_path << std::endl;
GGUFFileReader reader = GGUFFileReader(args.file_path);
// std::cout << reader.toString() << std::endl;
if (infinirtInit() != INFINI_STATUS_SUCCESS) {
std::cerr << "Error: Failed to initialize InfiniRT" << std::endl;
return -1;
}
auto results = infiniop_test::runAllTests(
reader,
(infiniDevice_t)args.device_type, args.device_id,
args.warmups, args.iterations,
args.rtol, args.atol);
std::cout << "=====================================" << std::endl;
for (auto result : results) {
if (!result->isPassed()) {
failed++;
}
std::cout << result->toString() << std::endl;
std::cout << "=====================================" << std::endl;
}
if (failed == 0) {
std::cout << GREEN << "All tests passed" << RESET << std::endl;
} else {
std::cout << RED << failed << " of " << results.size() << " tests failed" << RESET << std::endl;
}
} catch (const std::exception &e) {
std::cerr << "Error: " << e.what() << std::endl;
}
return failed;
}
#include "ops.hpp"
#include "utils.hpp"
#include <infinirt.h>
#include <iomanip>
#include <iostream>
namespace infiniop_test::matmul {
struct Test::Attributes {
float alpha;
float beta;
std::shared_ptr<Tensor> a;
std::shared_ptr<Tensor> b;
std::shared_ptr<Tensor> c;
std::shared_ptr<Tensor> ans;
};
std::shared_ptr<Test> Test::build(
std::unordered_map<std::string, std::vector<uint8_t>> attributes,
std::unordered_map<std::string, std::shared_ptr<Tensor>> tensors,
double rtol, double atol) {
auto test = std::shared_ptr<Test>(new Test(rtol, atol));
test->_attributes = new Attributes();
if (attributes.find("alpha") == attributes.end()
|| attributes.find("beta") == attributes.end()
|| tensors.find("a") == tensors.end()
|| tensors.find("b") == tensors.end()
|| tensors.find("c") == tensors.end()
|| tensors.find("ans") == tensors.end()) {
throw std::runtime_error("Invalid Test");
}
test->_attributes->alpha = *reinterpret_cast<float *>(attributes["alpha"].data());
test->_attributes->beta = *reinterpret_cast<float *>(attributes["beta"].data());
test->_attributes->a = tensors["a"];
test->_attributes->b = tensors["b"];
test->_attributes->c = tensors["c"];
test->_attributes->ans = tensors["ans"];
return test;
}
std::shared_ptr<infiniop_test::Result> Test::run(
infiniopHandle_t handle, infiniDevice_t device, int device_id, size_t warm_ups, size_t iterations) {
infiniopMatmulDescriptor_t op_desc;
auto alpha = _attributes->alpha;
auto beta = _attributes->beta;
auto a = _attributes->a->to(device, device_id);
auto b = _attributes->b->to(device, device_id);
auto c = _attributes->c->to(device, device_id);
CHECK_OR(infiniopCreateMatmulDescriptor(handle, &op_desc,
c->desc(),
a->desc(),
b->desc()),
return TEST_FAILED(OP_CREATION_FAILED, "Failed to create op descriptor."));
size_t workspace_size;
CHECK_OR(infiniopGetMatmulWorkspaceSize(op_desc, &workspace_size),
return TEST_FAILED(OP_CREATION_FAILED, "Failed to get workspace size."));
void *workspace;
CHECK_OR(infinirtMalloc(&workspace, workspace_size),
return TEST_FAILED(OP_CREATION_FAILED, "Failed to allocate workspace."));
CHECK_OR(infiniopMatmul(op_desc, workspace, workspace_size,
c->data(),
a->data(),
b->data(),
alpha,
beta,
nullptr),
return TEST_FAILED(OP_EXECUTION_FAILED, "Failed during execution."));
try {
allClose(c, _attributes->ans);
} catch (const std::exception &e) {
return TEST_FAILED(RESULT_INCORRECT, e.what());
}
double elapsed_time = 0.;
// add and subtract to avoid overflow
float beta_ = beta == .0f ? .0f : 1.f / beta;
float alpha_ = beta == .0f ? alpha : -beta_;
elapsed_time = benchmark(
[=]() {
infiniopMatmul(
op_desc, workspace, workspace_size,
c->data(),
a->data(),
b->data(),
alpha,
beta,
nullptr);
infiniopMatmul(
op_desc, workspace, workspace_size,
c->data(),
a->data(),
b->data(),
alpha_,
beta_,
nullptr);
},
(warm_ups + 1) / 2, (iterations + 1) / 2);
return TEST_PASSED(elapsed_time);
}
std::vector<std::string> Test::attribute_names() {
return {"alpha", "beta"};
}
std::vector<std::string> Test::tensor_names() {
return {"a", "b", "c", "ans"};
}
std::string Test::toString() const {
std::ostringstream oss;
oss << op_name() << std::endl;
oss << "- alpha=" << _attributes->alpha << ", beta=" << _attributes->beta << std::endl;
oss << "- a: " << _attributes->a->info() << std::endl;
oss << "- b: " << _attributes->b->info() << std::endl;
oss << "- c: " << _attributes->c->info() << std::endl;
oss << std::scientific << std::setprecision(2);
oss << "- rtol=" << _rtol << ", atol=" << _atol << std::endl;
return oss.str();
}
Test::~Test() {
delete _attributes;
}
} // namespace infiniop_test::matmul
#include "tensor.hpp"
#include "utils.hpp"
#include <cstring>
#include <infinirt.h>
#include <sstream>
template <typename T>
void printData(const T *data, const std::vector<size_t> &shape, const std::vector<ptrdiff_t> &strides, size_t dim) {
if (dim == shape.size() - 1) {
for (size_t i = 0; i < shape[dim]; i++) {
std::cout << *(data + i * strides[dim]) << " ";
}
std::cout << std::endl;
} else if (dim < shape.size() - 1) {
for (size_t i = 0; i < shape[dim]; i++) {
printData(data + i * strides[dim], shape, strides, dim + 1);
std::cout << std::endl;
}
}
}
template <>
void printData(const fp16_t *data, const std::vector<size_t> &shape,
const std::vector<ptrdiff_t> &strides, size_t dim) {
if (dim == shape.size() - 1) {
for (size_t i = 0; i < shape[dim]; i++) {
std::cout << utils::cast<float>(*(data + i * strides[dim])) << " ";
}
} else if (dim < shape.size() - 1) {
for (size_t i = 0; i < shape[dim]; i++) {
printData(data + i * strides[dim], shape, strides, dim + 1);
std::cout << std::endl;
}
}
}
// Calculate memory size & offset given shape & strides
inline void calculateTensorMemory(size_t &size, size_t &offset,
std::vector<size_t> shape,
std::vector<ptrdiff_t> strides,
size_t data_size) {
size_t ndim = shape.size();
offset = 0;
size = 0;
for (size_t i = 0; i < ndim; i++) {
if (shape[i] == 0) {
offset = 0;
size = 0;
return;
}
if (strides[i] > 0) {
size += (shape[i] - 1) * strides[i] * data_size;
} else if (strides[i] < 0) {
offset += (shape[i] - 1) * (size_t)(-strides[i]) * data_size;
}
}
size = offset + size + data_size;
}
namespace infiniop_test {
Memory::Memory(size_t size, infiniDevice_t device, int device_id) {
_file_mapping = nullptr;
_device = device;
_device_id = device_id;
_size = size;
if (device == INFINI_DEVICE_CPU) {
_ptr = std::malloc(size);
} else {
CHECK_OR(infinirtSetDevice(_device, _device_id), throw std::runtime_error("Error Creating Memory: set device"));
CHECK_OR(infinirtMalloc(&_ptr, _size), throw std::runtime_error("Error Creating Memory: malloc"));
}
}
Memory::Memory(const std::shared_ptr<FileMapping> &file_mapping, void *ptr, size_t size) {
_device = INFINI_DEVICE_CPU;
_device_id = 0;
_size = size;
_ptr = ptr;
_file_mapping = file_mapping;
}
Memory::~Memory() {
// if memory does not map to a file, free it manually
if (_file_mapping == nullptr) {
if (_device == INFINI_DEVICE_CPU) {
std::free(_ptr);
} else {
infinirtSetDevice(_device, _device_id);
infinirtFree(_ptr);
}
}
}
void *Tensor::data() const {
return (char *)(_memory->ptr()) + _offset;
}
Tensor::Tensor(const GGUFTensorInfo *info,
const void *ggml_ptr,
const GGUFKeyValue *strides_meta) {
_ggml_type = info->ggml_type;
_offset = 0;
size_t ndim = static_cast<size_t>(info->ndim);
_shape = std::vector<size_t>(ndim);
_strides = std::vector<ptrdiff_t>(ndim);
std::vector<ptrdiff_t> contiguous_strides(ndim);
for (size_t i = 0; i < ndim; i++) {
_shape[i] = static_cast<size_t>(info->shape[ndim - 1 - i]);
if (i == 0) {
contiguous_strides[ndim - 1] = (ptrdiff_t)1;
} else {
contiguous_strides[ndim - 1 - i] = (ptrdiff_t)_shape[ndim - i] * contiguous_strides[ndim - i];
}
}
if (strides_meta == nullptr) {
for (size_t i = 0; i < ndim; i++) {
_strides[i] = contiguous_strides[i];
}
} else {
for (size_t i = 0; i < ndim; i++) {
_shape[i] = static_cast<size_t>(info->shape[ndim - 1 - i]);
if (strides_meta->gguf_type == GGUF_TYPE_INT64) {
_strides[i] = (ptrdiff_t)(reinterpret_cast<const int64_t *>(
strides_meta->value.data())[ndim - 1 - i]);
} else if (strides_meta->gguf_type == GGUF_TYPE_INT32) {
_strides[i] = (ptrdiff_t)(reinterpret_cast<const int32_t *>(
strides_meta->value.data())[ndim - 1 - i]);
} else {
throw std::runtime_error("Error Creating Tensor: Unsupported strides type");
}
}
}
infiniopCreateTensorDescriptor(&_desc, ndim, _shape.data(), _strides.data(), ggmlTypeToInfiniType(_ggml_type));
size_t size;
calculateTensorMemory(size, _offset, _shape, _strides, ggmlTypeSize(_ggml_type));
_memory = std::make_shared<Memory>(size, INFINI_DEVICE_CPU, 0);
utils::rearrange(
(char *)_memory->ptr() + _offset,
(char *)ggml_ptr + info->data_offset,
_shape.data(),
_strides.data(),
contiguous_strides.data(),
ndim,
ggmlTypeSize(_ggml_type));
}
Tensor::Tensor(std::shared_ptr<Memory> memory, size_t offset,
const std::vector<size_t> &shape,
const std::vector<ptrdiff_t> &strides,
GGML_TYPE dtype) : _memory(memory), _shape(shape), _strides(strides), _offset(offset), _ggml_type(dtype) {
infiniopCreateTensorDescriptor(&_desc, shape.size(), shape.data(), strides.data(), ggmlTypeToInfiniType(dtype));
}
std::shared_ptr<Tensor> Tensor::to(infiniDevice_t device, int device_id) const {
if (device == _memory->device() && (device_id == _memory->device_id() || device == INFINI_DEVICE_CPU)) {
return std::make_shared<Tensor>(_memory, _offset, _shape, _strides, _ggml_type);
}
std::shared_ptr<Memory> memory;
if (device == INFINI_DEVICE_CPU) {
memory = std::make_shared<Memory>(_memory->size(), INFINI_DEVICE_CPU, 0);
CHECK_OR(infinirtSetDevice(_memory->device(), _memory->device_id()), throw std::runtime_error("Error Tensor::to: set device"));
CHECK_OR(infinirtMemcpy(memory->ptr(), _memory->ptr(), _memory->size(), INFINIRT_MEMCPY_D2H), throw std::runtime_error("Error Tensor::to: cpy"));
} else if (_memory->device() == INFINI_DEVICE_CPU) {
memory = std::make_shared<Memory>(_memory->size(), device, device_id);
CHECK_OR(infinirtMemcpy(memory->ptr(), _memory->ptr(), _memory->size(), INFINIRT_MEMCPY_H2D), throw std::runtime_error("Error Tensor::to: cpy"));
} else {
return to(INFINI_DEVICE_CPU, 0)->to(device, device_id);
}
return std::make_shared<Tensor>(memory, _offset, _shape, _strides, _ggml_type);
}
void Tensor::debug() const {
auto tensor = to(INFINI_DEVICE_CPU, 0);
std::cout << "Tensor: " << tensor->info() << std::endl;
switch (_ggml_type) {
case GGML_TYPE_F16:
printData((fp16_t *)(tensor->data()), _shape, _strides, 0);
break;
case GGML_TYPE_F32:
printData((float *)(tensor->data()), _shape, _strides, 0);
break;
case GGML_TYPE_F64:
printData((double *)(tensor->data()), _shape, _strides, 0);
break;
case GGML_TYPE_I8:
printData((int8_t *)(tensor->data()), _shape, _strides, 0);
break;
case GGML_TYPE_I16:
printData((int16_t *)(tensor->data()), _shape, _strides, 0);
break;
case GGML_TYPE_I32:
printData((int32_t *)(tensor->data()), _shape, _strides, 0);
break;
default:
std::cout << "Unsupported GGML type" << std::endl;
break;
}
}
std::string Tensor::info() const {
std::ostringstream oss;
oss << "Shape: [";
for (size_t i = 0; i < _shape.size(); ++i) {
oss << _shape[i];
if (i != _shape.size() - 1) {
oss << ", ";
}
}
oss << "]";
oss << ", Strides: [";
for (size_t i = 0; i < _strides.size(); ++i) {
oss << _strides[i];
if (i != _strides.size() - 1) {
oss << ", ";
}
}
oss << "]";
oss << ", Type: " << GGML_TYPE_NAME[_ggml_type];
return oss.str();
}
Tensor::~Tensor() {
infiniopDestroyTensorDescriptor(_desc);
}
} // namespace infiniop_test
#include "ops.hpp"
#include "tensor.hpp"
#include "utils.hpp"
#include <chrono>
#include <cmath>
#include <infinirt.h>
#include <iostream>
#include <numeric>
namespace infiniop_test {
std::unordered_map<std::string, const TestBuilder> TEST_BUILDERS = TEST_BUILDER_MAPPINGS;
std::string Result::toString() const {
std::ostringstream oss;
oss << "Status: ";
switch (_status) {
case TestStatus::PASS:
oss << GREEN << "PASS" << RESET;
break;
case TestStatus::TEST_INIT_FAILED:
oss << RED << "INVALID TEST" << RESET;
break;
case TestStatus::OP_CREATION_FAILED:
oss << RED << "OP CREATION FAILED" << RESET;
break;
case TestStatus::OP_EXECUTION_FAILED:
oss << RED << "EXECUTION FAILED" << RESET;
break;
case TestStatus::RESULT_INCORRECT:
oss << RED << "WRONG ANSWER" << RESET;
break;
default:
oss << YELLOW << "SKIPPED" << RESET;
break;
}
oss << std::endl;
oss << "Description: " << _description << std::endl;
if (_time > 0.) {
oss << "Time: " << _time << " us" << std::endl;
} else {
oss << "Time: N/A" << std::endl;
}
if (_error_message.size() > 0) {
oss << "Error: " << _error_message << std::endl;
}
return oss.str();
}
std::vector<std::shared_ptr<Result>> runAllTests(const GGUFFileReader &gguf_reader,
infiniDevice_t device, int device_id,
size_t warm_ups, size_t iterations,
double rtol, double atol) {
auto meta = gguf_reader.getAttributeMap();
auto count_meta = meta.find("test_count");
if (count_meta == meta.end()) {
throw std::runtime_error("Invalid GGUF file: missing test_count attribute");
}
size_t count = *(size_t *)(count_meta->second->value.data());
std::cout << "Found " << count << " tests" << std::endl;
auto results = std::vector<std::shared_ptr<Result>>(count);
try {
for (size_t i = 0; i < count; i++) {
results[i] = runTest(gguf_reader, device, device_id, warm_ups, iterations, rtol, atol, i);
}
} catch (const std::exception &e) {
std::cerr << "Error: " << e.what() << std::endl;
}
return results;
}
std::shared_ptr<Result> runTest(const GGUFFileReader &gguf_reader,
infiniDevice_t device, int device_id,
size_t warm_ups, size_t iterations,
double rtol, double atol, size_t test_id) {
auto meta = gguf_reader.getAttributeMap();
auto tensor_info = gguf_reader.getTensorInfoMap();
auto name_meta = meta.find("test." + std::to_string(test_id) + ".op_name");
if (name_meta != meta.end()) {
std::string op_name(name_meta->second->value.begin(), name_meta->second->value.end());
auto builder = TEST_BUILDERS.find(op_name)->second;
auto attrs = std::unordered_map<std::string, std::vector<uint8_t>>();
auto tensors = std::unordered_map<std::string, std::shared_ptr<Tensor>>();
infiniopHandle_t handle;
CHECK_OR(infinirtSetDevice(device, device_id), throw std::runtime_error("Failed to set device"));
CHECK_OR(infiniopCreateHandle(&handle), throw std::runtime_error("Failed to create handle"));
for (auto attr_name : builder.attribute_names) {
auto attr = meta.find("test." + std::to_string(test_id) + "." + attr_name);
if (attr != meta.end()) {
attrs[attr_name] = attr->second->value;
}
}
for (auto tensor_name : builder.tensor_names) {
auto info = tensor_info.find("test." + std::to_string(test_id) + "." + tensor_name);
if (info != tensor_info.end()) {
auto strides = meta.find("test." + std::to_string(test_id) + "." + tensor_name + ".strides");
tensors[tensor_name] = std::make_shared<Tensor>(
info->second.get(),
gguf_reader.getGgmlStart(),
strides != meta.end() ? strides->second.get() : nullptr);
}
}
std::shared_ptr<infiniop_test::base::Test> test;
try {
test = builder.build(attrs, tensors, rtol, atol);
} catch (const std::exception &e) {
return TEST_INIT_FAILED(op_name + "/n" + e.what());
}
std::shared_ptr<Result> result;
try {
result = test->run(handle, device, device_id, warm_ups, iterations);
} catch (const std::exception &e) {
return TEST_INIT_FAILED(op_name + "/n" + e.what());
}
CHECK_OR(infiniopDestroyHandle(handle), throw std::runtime_error("Failed to destroy handle"));
return result;
}
return TEST_INIT_FAILED("");
}
void incrementOffset(ptrdiff_t &offset_1, const std::vector<ptrdiff_t> &strides_1, size_t data_size_1,
ptrdiff_t &offset_2, const std::vector<ptrdiff_t> &strides_2, size_t data_size_2,
std::vector<size_t> &counter, const std::vector<size_t> &shape) {
for (ptrdiff_t d = shape.size() - 1; d >= 0; d--) {
counter[d] += 1;
offset_1 += strides_1[d] * data_size_1;
offset_2 += strides_2[d] * data_size_2;
if (counter[d] < shape[d]) {
break;
}
counter[d] = 0;
offset_1 -= shape[d] * strides_1[d] * data_size_1;
offset_2 -= shape[d] * strides_2[d] * data_size_2;
}
}
void allClose(std::shared_ptr<Tensor> actual_, std::shared_ptr<Tensor> expected_, double rtol, double atol) {
auto actual = actual_->to(INFINI_DEVICE_CPU);
auto expected = expected_->to(INFINI_DEVICE_CPU);
auto shape = actual->shape();
if (shape != expected->shape()) {
throw std::runtime_error("Shape mismatch.");
}
auto ndim = shape.size();
size_t total = std::accumulate(shape.begin(), shape.end(), (size_t)1, std::multiplies<size_t>());
auto counter = std::vector<size_t>(ndim, 0);
ptrdiff_t actual_offset = 0,
expected_offset = 0;
size_t num_failed = 0;
std::string first_failed_msg;
for (size_t i = 0; i < total; i++) {
double a_ = getVal((char *)actual->data() + actual_offset, actual->ggml_type());
double e_ = getVal((char *)expected->data() + expected_offset, expected->ggml_type());
if (std::fabs(a_ - e_) > atol || std::fabs(a_ - e_) > rtol * std::fmax(std::fabs(a_), std::fabs(e_))) {
if (num_failed == 0) {
first_failed_msg = "First failed at index " + std::to_string(i) + " with value " + std::to_string(a_) + " but should be " + std::to_string(e_) + ".";
}
num_failed++;
}
incrementOffset(actual_offset, actual->strides(), ggmlTypeSize(actual->ggml_type()),
expected_offset, expected->strides(), ggmlTypeSize(expected->ggml_type()),
counter, shape);
}
if (num_failed > 0) {
throw std::runtime_error(std::to_string(num_failed) + " out of " + std::to_string(total) + " values failed. " + first_failed_msg);
}
}
double benchmark(std::function<void()> func, size_t warmups, size_t iterations) {
if (iterations == 0) {
return 0.0;
}
for (size_t i = 0; i < warmups; ++i) {
func();
}
infinirtDeviceSynchronize();
auto start = std::chrono::high_resolution_clock::now();
for (size_t i = 0; i < iterations; ++i) {
func();
}
infinirtDeviceSynchronize();
auto end = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::nanoseconds>(end - start);
double average_time = duration.count() / iterations / 1e3; // average in us
return average_time;
}
} // namespace infiniop_test
...@@ -45,28 +45,26 @@ size_t check_equal( ...@@ -45,28 +45,26 @@ size_t check_equal(
return fails; return fails;
} }
int test_transpose_2d() { int test_transpose_any(size_t index, std::vector<size_t> shape, std::vector<ptrdiff_t> strides_a, std::vector<ptrdiff_t> strides_b) {
std::vector<size_t> shape = {3, 5};
std::vector<ptrdiff_t> strides_a = {5, 1};
std::vector<ptrdiff_t> strides_b = {1, 3};
auto numel = std::accumulate(shape.begin(), shape.end(), (size_t)1, std::multiplies<size_t>()); auto numel = std::accumulate(shape.begin(), shape.end(), (size_t)1, std::multiplies<size_t>());
std::vector<float> a(numel); std::vector<float> a(numel);
std::vector<float> b(numel); std::vector<float> b(numel);
for (size_t i = 0; i < numel; i++) { for (size_t i = 0; i < numel; i++) {
a[i] = i / numel; a[i] = (float)i / numel;
} }
utils::rearrange(b.data(), a.data(), shape.data(), strides_b.data(), strides_a.data(), 2, sizeof(float)); utils::rearrange(b.data(), a.data(), shape.data(), strides_b.data(), strides_a.data(), shape.size(), sizeof(float));
if (check_equal<float>(a.data(), b.data(), shape, strides_a, strides_b)) { auto fails = check_equal<float>(a.data(), b.data(), shape, strides_a, strides_b);
if (fails > 0) {
std::cout << "test_transpose " << index << " failed" << std::endl;
return 1; return 1;
} else { } else {
std::cout << "test_transpose_2d passed" << std::endl; std::cout << "test_transpose " << index << " passed" << std::endl;
return 0;
} }
return 0;
} }
int test_rearrange() { int test_rearrange() {
return test_transpose_2d(); return test_transpose_any(1, {3, 5}, {5, 1}, {1, 3})
+ test_transpose_any(2, {1, 2048}, {2048, 1}, {2048, 1});
} }
...@@ -46,6 +46,7 @@ std::optional<RearrangeMeta> RearrangeMeta::create( ...@@ -46,6 +46,7 @@ std::optional<RearrangeMeta> RearrangeMeta::create(
} }
return std::abs(a.dst) > std::abs(b.dst); return std::abs(a.dst) > std::abs(b.dst);
}); });
ndim = dims.size();
// # 合并连续维度 // # 合并连续维度
// ## 合并末尾连续维度到 unit // ## 合并末尾连续维度到 unit
for (auto it = dims.rbegin(); it != dims.rend(); ++it) { for (auto it = dims.rbegin(); it != dims.rend(); ++it) {
......
# InfiniOP 测例生成
## 介绍
使用 python 脚本生成包含测例的 `.gguf` 文件,并使用 `infiniop-test` 程序进行测试。
## 运行方式
- 编译 `infiniop-test` 程序
```bash
xmake build infiniop-test
```
- 生成测例
`/test/infiniop-test/`目录执行矩阵乘测例生成脚本,执行结束以后会在`/test/infiniop-test/`目录生成`matmul.gguf`测例文件。
```bash
cd /test/infiniop-test/
python -m test_generate.testcases.matmul
```
- 测试测例
打印测试程序用法
```bash
infiniop-test --help
```
示例:在CPU上测试`matmul.gguf`测例文件,预热20次,测试1000次。
```bash
infiniop-test matmul.gguf --cpu --warmup 20 --run 1000
```
## 自定义测例
### GGUF文件格式
```text
GGUF File Contents:
Version: 3
Number of Meta KVs: 8
Number of Tensors: 4
Meta KVs:
Key: general.architecture, Type: GGUF_TYPE_STRING, Value: infiniop-test
Key: test_count, Type: GGUF_TYPE_UINT64, Value: 1
Key: test.0.op_name, Type: GGUF_TYPE_STRING, Value: matmul
Key: test.0.a.strides, Type: GGUF_TYPE_INT32, Value: [1, 5]
Key: test.0.b.strides, Type: GGUF_TYPE_INT32, Value: [1, 6]
Key: test.0.c.strides, Type: GGUF_TYPE_INT32, Value: [1, 6]
Key: test.0.alpha, Type: GGUF_TYPE_FLOAT32, Value: 1.000000
Key: test.0.beta, Type: GGUF_TYPE_FLOAT32, Value: 0.000000
Tensor INFOs:
Name: test.0.a, NDims: 2, Shape: [5, 4], DataType: F32, DataOffset: 0
Name: test.0.b, NDims: 2, Shape: [6, 5], DataType: F32, DataOffset: 96
Name: test.0.c, NDims: 2, Shape: [6, 4], DataType: F32, DataOffset: 224
Name: test.0.ans, NDims: 2, Shape: [6, 4], DataType: F64, DataOffset: 320
```
- `Meta` 中必须包含 `test_count` ,表示测例数量。
- 每个测例的 `Meta``Tensor` 名字以 `test.[id].` 开头,后接具体信息名称。数字 `[id]` 表示测例编号。编号必须为 0 到 test_count-1.
- `Tensor` 名字接 `.strides` 表示步长,若没有则默认为连续。
from .infiniop_test import InfiniopTestCase, InfiniopTestWriter, np_dtype_to_ggml, gguf_strides
import gguf
from typing import List
import numpy as np
from gguf import GGMLQuantizationType
def np_dtype_to_ggml(tensor_dtype: np.dtype):
if tensor_dtype == np.float16:
return GGMLQuantizationType.F16
elif tensor_dtype == np.float32:
return GGMLQuantizationType.F32
elif tensor_dtype == np.float64:
return GGMLQuantizationType.F64
elif tensor_dtype == np.int8:
return GGMLQuantizationType.I8
elif tensor_dtype == np.int16:
return GGMLQuantizationType.I16
elif tensor_dtype == np.int32:
return GGMLQuantizationType.I32
elif tensor_dtype == np.int64:
return GGMLQuantizationType.I64
else:
raise ValueError(
"Only F16, F32, F64, I8, I16, I32, I64 tensors are supported for now"
)
def gguf_strides(*args: int) -> list[int] | None:
return list(args)[::-1] if args else None
class InfiniopTestCase:
op_name: str
def __init__(self, op_name: str):
self.op_name = op_name
def write_test(self, test_writer: "InfiniopTestWriter"):
test_writer.add_string(test_writer.gguf_key("op_name"), self.op_name)
class InfiniopTestWriter(gguf.GGUFWriter):
_test_cases: List["InfiniopTestCase"]
_written_tests = 0
def __init__(self, filepath):
super().__init__(filepath, "infiniop-test")
self._test_cases = []
self._written_tests = 0
def add_test(self, test_case: "InfiniopTestCase"):
self._test_cases.append(test_case)
def add_tests(self, test_cases: List["InfiniopTestCase"]):
self._test_cases.extend(test_cases)
def gguf_key(self, name: str) -> str:
return f"test.{self._written_tests}.{name}"
def save(self):
super().add_uint64("test_count", len(self._test_cases))
for test_case in self._test_cases:
test_case.write_test(self)
self._written_tests += 1
super().write_header_to_file()
super().write_kv_data_to_file()
super().write_tensors_to_file()
super().close()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment