Commit 69c1c352 authored by Ceng23333's avatar Ceng23333
Browse files

feat: implement neural network module system with PyTorch-like API

- Implement core modules: Linear, Embedding, RMSNorm
- Add PyTorch-like macros for module and parameter definition
  - INFINICORE_NN_MODULE for single module declaration
  - INFINICORE_NN_MODULE_VEC for module vectors
  - INFINICORE_NN_PARAMETER for parameter declaration
  - Corresponding INIT macros for initialization
- Implement hierarchical module system with dynamic path generation
- Add state_dict() and load_state_dict() support
- Refactor module design: protected registration methods, removed path_ member
- Add comprehensive test suite including TinyLlama integration
- All parameters are protected with public accessors
parent 99e19cc8
#pragma once
#include "infinicore/nn.hpp"
#include "infinicore/ops.hpp"
#include "infinicore/tensor.hpp"
#pragma once
#include "nn/embedding.hpp"
#include "nn/linear.hpp"
#include "nn/rmsnorm.hpp"
#pragma once
#include "module.hpp"
#include "../ops.hpp"
#include <optional>
namespace infinicore::nn {
/**
* @brief Embedding layer that maps indices to dense vectors
*
* A simple lookup table that stores embeddings of a fixed dictionary and size.
* This module is often used to store word embeddings and retrieve them using indices.
* The input to the module is a tensor of indices, and the output is the corresponding
* embedding vectors.
*
* Similar to PyTorch's nn.Embedding:
* https://pytorch.org/docs/stable/generated/torch.nn.Embedding.html
*
* Example:
* @code
* // Create embedding: 10000 words, 300-dimensional embeddings
* auto embedding = Embedding(10000, 300);
*
* // Input: tensor of indices [batch_size, seq_len]
* auto indices = Tensor::from_data({2, 5}, {3, 5, 12, 8, 99, 0, 1, 45, 67, 23});
*
* // Output: [batch_size, seq_len, embedding_dim] = [2, 5, 300]
* auto embeddings = embedding.forward(indices);
* @endcode
*/
class Embedding : public Module {
public:
/**
* @brief Construct an Embedding layer
*
* @param num_embeddings Size of the dictionary of embeddings (vocabulary size)
* @param embedding_dim The size of each embedding vector
* @param padding_idx If specified, the entries at padding_idx do not contribute to gradient
* and the embedding vector at padding_idx is not updated during training
* @param dtype Data type for the embedding weights (default: DataType::F32)
* @param device Device to create the embedding weight on
*/
Embedding(size_t num_embeddings,
size_t embedding_dim,
std::optional<int64_t> padding_idx = std::nullopt,
const DataType &dtype = DataType::F32,
const Device &device = Device());
/**
* @brief Forward pass: lookup embeddings for given indices
*
* @param indices Tensor containing indices into the embedding matrix.
* Can be any shape (*), typically [batch_size] or [batch_size, seq_len]
* @return Tensor containing the embedding vectors.
* Shape: (*, embedding_dim) where * matches the input shape
*
* Example:
* Input shape: [2, 3] -> Output shape: [2, 3, embedding_dim]
* Input shape: [10] -> Output shape: [10, embedding_dim]
*/
Tensor forward(const Tensor &indices) const;
// Module information
size_t num_embeddings() const { return num_embeddings_; }
size_t embedding_dim() const { return embedding_dim_; }
std::optional<int64_t> padding_idx() const { return padding_idx_; }
DataType dtype() const { return dtype_; }
// String representation
std::string extra_repr() const;
// Accessors for parameters
Tensor weight() const { return weight_; }
protected:
// Parameters
Parameter weight_;
private:
size_t num_embeddings_; // Vocabulary size
size_t embedding_dim_; // Embedding dimension
std::optional<int64_t> padding_idx_; // Optional padding index
DataType dtype_; // Data type for embedding weights
};
} // namespace infinicore::nn
#pragma once
#include "module.hpp"
#include "../ops.hpp"
namespace infinicore::nn {
class Linear : public Module {
public:
Linear(size_t in_features, size_t out_features, bool bias = true, const Device &device = Device());
// Forward pass: output = input @ weight.T + bias
Tensor forward(Tensor &input) const;
// Forward pass with residual connection (InfiniLM-style)
// output = input @ weight.T + bias + residual
Tensor forward(Tensor &input, Tensor &residual) const;
// Module information
size_t in_features() const { return in_features_; }
size_t out_features() const { return out_features_; }
bool has_bias() const { return has_bias_; }
// String representation
std::string extra_repr() const;
// Accessors for parameters
Tensor weight() const { return weight_; }
Tensor bias() const { return bias_; }
protected:
// Parameters
Parameter weight_;
Parameter bias_;
private:
// Helper method for common forward computation
Tensor compute_linear(Tensor &input) const;
size_t in_features_;
size_t out_features_;
bool has_bias_;
};
} // namespace infinicore::nn
#pragma once
#include "parameter.hpp"
#include "../tensor.hpp"
#include <unordered_map>
#include <type_traits>
#include <vector>
namespace infinicore::nn {
class Module {
public:
Module() = default;
const std::unordered_map<std::string, Parameter> &state_dict() const;
void load_state_dict(const std::unordered_map<std::string, Tensor> &_state_dict);
......@@ -15,35 +20,118 @@ public:
void load_parameter_from_blob(const std::string &name, const void *data);
protected:
Tensor register_parameter(const std::string &name, Parameter param);
// Add an existing submodule to this module's hierarchy
// Template parameter M must be a type derived from Module
// Returns the submodule for convenience (allows method chaining)
template <typename M>
std::shared_ptr<M> add_module(const std::string &name, std::shared_ptr<M> submodule) {
// Ensure M is derived from Module (compile-time check)
static_assert(std::is_base_of<Module, M>::value,
"Template parameter M must be derived from infinicore::nn::Module");
// Store in the submodules map (std::shared_ptr<M> automatically converts to std::shared_ptr<Module>)
submodules_[name] = submodule;
for (auto &p : submodule->parameters_) {
parameters_[name + "." + p.first] = p.second;
}
return submodule;
}
// Create and register a new submodule by constructing it with the given arguments
// Template parameter M must be a type derived from Module
// Args are forwarded to M's constructor
template <typename M, typename... Args>
std::shared_ptr<M> register_module(const std::string &name, Args &&...args) {
// Ensure M is derived from Module (compile-time check)
static_assert(std::is_base_of<Module, M>::value,
"Template parameter M must be derived from infinicore::nn::Module");
// Construct the submodule
auto submodule = std::make_shared<M>(std::forward<Args>(args)...);
return add_module(name, submodule);
}
// Create and register multiple submodules of the same type
// Each submodule is named as "name.0", "name.1", etc.
// Template parameter M must be a type derived from Module
template <typename M, typename... Args>
std::vector<std::shared_ptr<M>> register_modules(size_t layers, const std::string &name, Args &&...args) {
auto submodules = std::vector<std::shared_ptr<M>>(layers);
for (size_t i = 0; i < layers; i++) {
register_module<M>(name + "." + std::to_string(i), std::forward<Args>(args)...);
std::vector<std::shared_ptr<M>> register_modules(size_t count, const std::string &name, Args &&...args) {
static_assert(std::is_base_of<Module, M>::value,
"Template parameter M must be derived from infinicore::nn::Module");
std::vector<std::shared_ptr<M>> modules;
modules.reserve(count);
for (size_t i = 0; i < count; i++) {
modules.push_back(register_module<M>(name + "." + std::to_string(i), std::forward<Args>(args)...));
}
return submodules;
return modules;
}
protected:
Device device_;
std::unordered_map<std::string, std::shared_ptr<Module>> submodules_;
std::unordered_map<std::string, Parameter> parameters_;
private:
void collect_all_parameters(std::unordered_map<std::string, Parameter> &all_params, const std::string &prefix = "") const;
};
} // namespace infinicore::nn
\ No newline at end of file
// ============================================================================
// PyTorch-like Macros for Convenient Module Registration
// ============================================================================
/**
* @brief Register submodules with automatic name inference from variable name
*
* Usage:
* @code
* class MyModel : public Module {
* protected:
* INFINICORE_NN_MODULE(Linear, layer1);
* INFINICORE_NN_MODULE(Linear, layer2);
* INFINICORE_NN_MODULE_VEC(Linear, layers);
* INFINICORE_NN_PARAMETER(scaling_factor);
*
* public:
* MyModel() {
* INFINICORE_NN_MODULE_INIT(layer1, 128, 64);
* INFINICORE_NN_MODULE_INIT(layer2, 64, 32);
* INFINICORE_NN_MODULE_VEC_INIT(layers, 3, Linear, 32, 16);
* INFINICORE_NN_PARAMETER_INIT(scaling_factor, ({1}, DataType::F32, Device()));
* }
* };
* @endcode
*/
// Declare a single module member variable
#define INFINICORE_NN_MODULE(ModuleType, name) \
std::shared_ptr<ModuleType> name##_
// Declare a vector of modules member variable
#define INFINICORE_NN_MODULE_VEC(ModuleType, name) \
std::vector<std::shared_ptr<ModuleType>> name##_
// Initialize a module in constructor
#define INFINICORE_NN_MODULE_INIT(name, ...) \
name##_ = this->register_module<std::remove_reference<decltype(*name##_)>::type>(#name, ##__VA_ARGS__)
// Initialize a vector of modules in constructor
// Usage: INFINICORE_NN_MODULE_VEC_INIT(layers, count, ModuleType, ctor_args...)
// Example: INFINICORE_NN_MODULE_VEC_INIT(layers, 3, Linear, 128, 64)
#define INFINICORE_NN_MODULE_VEC_INIT(name, count, ModuleType, ...) \
name##_ = this->register_modules<ModuleType>(count, #name, ##__VA_ARGS__)
// Declare a parameter member variable
#define INFINICORE_NN_PARAMETER(name) \
Parameter name##_
// Initialize a parameter in constructor
// Usage: INFINICORE_NN_PARAMETER_INIT(name, (shape, dtype, device))
// Example: INFINICORE_NN_PARAMETER_INIT(weight, ({out_features, in_features}, DataType::F32, device))
#define INFINICORE_NN_PARAMETER_INIT(name, args) \
name##_ = Parameter args; \
this->register_parameter(#name, name##_)
} // namespace infinicore::nn
......@@ -5,6 +5,8 @@
namespace infinicore::nn {
class Parameter : public Tensor {
public:
Parameter();
Parameter(const Shape &shape,
const DataType &dtype,
const Device &device);
......
#pragma once
#include "module.hpp"
#include "../ops.hpp"
namespace infinicore::nn {
/**
* @brief Root Mean Square Layer Normalization (RMSNorm)
*
* Applies Root Mean Square Layer Normalization over the last dimension.
* Unlike LayerNorm, RMSNorm doesn't subtract mean and doesn't use bias.
*
* Formula: y = (x / RMS(x)) * weight
* where RMS(x) = sqrt(mean(x^2) + eps)
*
* Used in LLaMA, Galactica, and other modern language models as a
* simpler and faster alternative to LayerNorm.
*
* Example:
* @code
* // Create RMSNorm for hidden size 4096
* auto norm = RMSNorm(4096);
*
* // Input: [batch, seq_len, hidden_size]
* auto input = Tensor::randn({2, 10, 4096});
*
* // Output: [batch, seq_len, hidden_size]
* auto output = norm.forward(input);
* @endcode
*/
class RMSNorm : public Module {
public:
/**
* @brief Construct a RMSNorm layer
*
* @param normalized_shape Size of the feature dimension to normalize (typically hidden_size)
* @param eps Small constant for numerical stability (default: 1e-6)
* @param device Device to create the weight on
*/
RMSNorm(size_t normalized_shape,
double eps = 1e-6,
const Device &device = Device());
/**
* @brief Forward pass: apply RMSNorm
*
* @param x Input tensor of shape (*, normalized_shape) where * is any number of dimensions
* @return Normalized tensor with same shape as input
*
* The normalization is applied over the last dimension.
* For example:
* Input: [batch, seq_len, hidden_size] -> normalize over hidden_size
* Input: [batch, hidden_size] -> normalize over hidden_size
*/
Tensor forward(const Tensor &x) const;
// Module information
size_t normalized_shape() const { return normalized_shape_; }
double eps() const { return eps_; }
// String representation
std::string extra_repr() const;
// Accessors for parameters
Tensor weight() const { return weight_; }
protected:
// Parameters
Parameter weight_;
private:
size_t normalized_shape_; // Size of the feature dimension
double eps_; // Epsilon for numerical stability
};
} // namespace infinicore::nn
#include "memory_test.h"
#include "test_nn_module.h"
#include "test_runner.h"
#include "test_tensor_destructor.h"
#include <iostream>
#include <memory>
......@@ -13,6 +15,7 @@ struct ParsedArgs {
bool run_memory_leak = true;
bool run_performance = true;
bool run_stress = true;
bool run_module = false;
int num_threads = 4;
int iterations = 1000;
};
......@@ -23,7 +26,7 @@ void printUsage() {
<< std::endl
<< "Options:" << std::endl
<< " --<device> Specify the device type (default: cpu)" << std::endl
<< " --test <name> Run specific test (basic|concurrency|exception|leak|performance|stress|all)" << std::endl
<< " --test <name> Run specific test (basic|concurrency|exception|leak|performance|stress|module|all)" << std::endl
<< " --threads <num> Number of threads for concurrency tests (default: 4)" << std::endl
<< " --iterations <num> Number of iterations for stress tests (default: 1000)" << std::endl
<< " --help Show this help message" << std::endl
......@@ -46,6 +49,7 @@ void printUsage() {
<< " leak - Memory leak detection tests" << std::endl
<< " performance - Performance and benchmark tests" << std::endl
<< " stress - Stress tests with high load" << std::endl
<< " module - Neural network module tests" << std::endl
<< " all - Run all tests (default)" << std::endl
<< std::endl;
exit(EXIT_SUCCESS);
......@@ -84,7 +88,7 @@ ParsedArgs parseArgs(int argc, char *argv[]) {
}
std::string test_name = argv[++i];
args.run_basic = args.run_concurrency = args.run_exception_safety = args.run_memory_leak = args.run_performance = args.run_stress = false;
args.run_basic = args.run_concurrency = args.run_exception_safety = args.run_memory_leak = args.run_performance = args.run_stress = args.run_module = false;
if (test_name == "basic") {
args.run_basic = true;
......@@ -98,8 +102,10 @@ ParsedArgs parseArgs(int argc, char *argv[]) {
args.run_performance = true;
} else if (test_name == "stress") {
args.run_stress = true;
} else if (test_name == "module") {
args.run_module = true;
} else if (test_name == "all") {
args.run_basic = args.run_concurrency = args.run_exception_safety = args.run_memory_leak = args.run_performance = args.run_stress = true;
args.run_basic = args.run_concurrency = args.run_exception_safety = args.run_memory_leak = args.run_performance = args.run_stress = args.run_module = true;
} else {
std::cerr << "Error: Unknown test name: " << test_name << std::endl;
exit(EXIT_FAILURE);
......@@ -157,7 +163,7 @@ int main(int argc, char *argv[]) {
spdlog::debug("Creating test runner");
// Create test runner
infinicore::test::MemoryTestRunner runner;
infinicore::test::InfiniCoreTestRunner runner;
spdlog::debug("Test runner created successfully");
// Add tests based on arguments
......@@ -171,6 +177,12 @@ int main(int argc, char *argv[]) {
spdlog::debug("TensorDestructorTest added successfully");
}
if (args.run_module) {
spdlog::debug("Adding NNModuleTest");
runner.addTest(std::make_unique<infinicore::test::NNModuleTest>());
spdlog::debug("NNModuleTest added successfully");
}
if (args.run_concurrency) {
runner.addTest(std::make_unique<infinicore::test::ConcurrencyTest>());
}
......@@ -196,13 +208,29 @@ int main(int argc, char *argv[]) {
auto results = runner.runAllTests();
spdlog::debug("All tests completed");
// Count results
// Count results and collect failed tests
size_t passed = 0, failed = 0;
std::vector<infinicore::test::TestResult> failed_tests;
for (const auto &result : results) {
if (result.passed) {
passed++;
} else {
failed++;
failed_tests.push_back(result);
}
}
// Print list of failed tests if any
if (!failed_tests.empty()) {
std::cout << "\n==============================================\n"
<< "❌ FAILED TESTS\n"
<< "==============================================" << std::endl;
for (const auto &test : failed_tests) {
std::cout << " • " << test.test_name;
if (!test.error_message.empty()) {
std::cout << "\n Error: " << test.error_message;
}
std::cout << "\n Duration: " << test.duration.count() << "μs" << std::endl;
}
}
......@@ -217,7 +245,7 @@ int main(int argc, char *argv[]) {
// Exit with appropriate code
if (failed > 0) {
std::cout << "\n❌ Some tests failed. Please review the output above." << std::endl;
std::cout << "\n❌ Some tests failed. Please review the failed tests list above." << std::endl;
return EXIT_FAILURE;
} else {
std::cout << "\n✅ All tests passed!" << std::endl;
......
......@@ -2,72 +2,17 @@
#define __INFINICORE_MEMORY_TEST_H__
#include "../infinicore/context/allocators/memory_allocator.hpp"
#include "test_runner.h"
#include <atomic>
#include <cassert>
#include <chrono>
#include <exception>
#include <future>
#include <infinicore.hpp>
#include <iostream>
#include <memory>
#include <mutex>
#include <queue>
#include <spdlog/spdlog.h>
#include <thread>
#include <unordered_map>
#include <vector>
namespace infinicore::test {
// Test result structure
struct TestResult {
std::string test_name;
bool passed;
std::string error_message;
std::chrono::microseconds duration;
TestResult(const std::string &name, bool pass, const std::string &error = "",
std::chrono::microseconds dur = std::chrono::microseconds(0))
: test_name(name), passed(pass), error_message(error), duration(dur) {}
};
// Test framework base class
class MemoryTestFramework {
public:
virtual ~MemoryTestFramework() = default;
virtual TestResult run() = 0;
virtual std::string getName() const = 0;
protected:
void logTestStart(const std::string &test_name) {
std::cout << "[TEST] Starting: " << test_name << std::endl;
}
void logTestResult(const TestResult &result) {
std::cout << "[TEST] " << (result.passed ? "PASSED" : "FAILED")
<< ": " << result.test_name;
if (!result.passed && !result.error_message.empty()) {
std::cout << " - " << result.error_message;
}
std::cout << " (Duration: " << result.duration.count() << "μs)" << std::endl;
}
template <typename Func>
TestResult measureTime(const std::string &test_name, Func &&func) {
auto start = std::chrono::high_resolution_clock::now();
try {
bool result = func();
auto end = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
return TestResult(test_name, result, "", duration);
} catch (const std::exception &e) {
auto end = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
return TestResult(test_name, false, e.what(), duration);
}
}
};
// Mock allocator for testing exception safety
class MockAllocator : public infinicore::MemoryAllocator {
public:
......@@ -149,13 +94,13 @@ private:
};
// Test categories
class BasicMemoryTest : public MemoryTestFramework {
class BasicMemoryTest : public TestFramework {
public:
TestResult run() override;
std::string getName() const override { return "BasicMemoryTest"; }
};
class ConcurrencyTest : public MemoryTestFramework {
class ConcurrencyTest : public TestFramework {
public:
TestResult run() override;
std::string getName() const override { return "ConcurrencyTest"; }
......@@ -166,7 +111,7 @@ private:
TestResult testMemoryAllocationRace();
};
class ExceptionSafetyTest : public MemoryTestFramework {
class ExceptionSafetyTest : public TestFramework {
public:
TestResult run() override;
std::string getName() const override { return "ExceptionSafetyTest"; }
......@@ -177,7 +122,7 @@ private:
TestResult testContextSwitchException();
};
class MemoryLeakTest : public MemoryTestFramework {
class MemoryLeakTest : public TestFramework {
public:
TestResult run() override;
std::string getName() const override { return "MemoryLeakTest"; }
......@@ -188,7 +133,7 @@ private:
TestResult testExceptionLeakDetection();
};
class PerformanceTest : public MemoryTestFramework {
class PerformanceTest : public TestFramework {
public:
TestResult run() override;
std::string getName() const override { return "PerformanceTest"; }
......@@ -199,7 +144,7 @@ private:
TestResult testMemoryCopyPerformance();
};
class StressTest : public MemoryTestFramework {
class StressTest : public TestFramework {
public:
TestResult run() override;
std::string getName() const override { return "StressTest"; }
......@@ -210,67 +155,6 @@ private:
TestResult testCrossDeviceStress();
};
// Test runner
class MemoryTestRunner {
public:
void addTest(std::unique_ptr<MemoryTestFramework> test) {
tests_.push_back(std::move(test));
}
std::vector<TestResult> runAllTests() {
std::vector<TestResult> results;
std::cout << "==============================================\n"
<< "InfiniCore Memory Management Test Suite\n"
<< "==============================================" << std::endl;
for (auto &test : tests_) {
logTestStart(test->getName());
TestResult result = test->run();
logTestResult(result);
results.push_back(result);
}
printSummary(results);
return results;
}
private:
std::vector<std::unique_ptr<MemoryTestFramework>> tests_;
void logTestStart(const std::string &test_name) {
std::cout << "\n[SUITE] Running: " << test_name << std::endl;
}
void logTestResult(const TestResult &result) {
std::cout << "[SUITE] " << (result.passed ? "PASSED" : "FAILED")
<< ": " << result.test_name << std::endl;
}
void printSummary(const std::vector<TestResult> &results) {
size_t passed = 0, failed = 0;
std::chrono::microseconds total_time(0);
for (const auto &result : results) {
if (result.passed) {
passed++;
} else {
failed++;
}
total_time += result.duration;
}
std::cout << "\n==============================================\n"
<< "Test Summary\n"
<< "==============================================\n"
<< "Total Tests: " << results.size() << "\n"
<< "Passed: " << passed << "\n"
<< "Failed: " << failed << "\n"
<< "Total Time: " << total_time.count() << "μs\n"
<< "==============================================" << std::endl;
}
};
} // namespace infinicore::test
#endif // __INFINICORE_MEMORY_TEST_H__
#include "test_nn_module.h"
#include "infinicore/ops.hpp"
namespace infinicore::test {
// Test 1: Basic module operations (creation, parameters, state_dict, load_state_dict)
TestResult NNModuleTest::testBasicModuleCreation() {
return measureTime("BasicModuleOperations", [this]() {
try {
spdlog::info("=== Testing Basic Module Operations ===");
// Test 1a: Module creation and parameter registration
spdlog::info("Test 1a: Module creation and parameter registration");
MockLinearModule module(8, 4, infinicore::Device());
// Verify the module was created successfully
auto state_dict = module.state_dict();
if (state_dict.size() != 2) {
spdlog::error("Expected 2 parameters, got {}", state_dict.size());
return false;
}
// Test weight and bias parameters
const auto &weight = module.get_weight();
const auto &bias = module.get_bias();
// Verify parameter shapes
if (weight->shape() != std::vector<size_t>({4, 8})) {
spdlog::error("Weight shape mismatch. Expected {{4, 8}}");
return false;
}
if (bias->shape() != std::vector<size_t>({4})) {
spdlog::error("Bias shape mismatch. Expected {{4}}");
return false;
}
spdlog::info("✓ Module creation and parameter registration passed");
// Test 1b: State dictionary functionality
spdlog::info("Test 1b: State dictionary functionality");
// Check if both parameters are in state dict
if (state_dict.find("weight") == state_dict.end()) {
spdlog::error("'weight' parameter not found in state dict");
return false;
}
if (state_dict.find("bias") == state_dict.end()) {
spdlog::error("'bias' parameter not found in state dict");
return false;
}
spdlog::debug("State dict contains {} parameters:", state_dict.size());
for (const auto &[name, tensor] : state_dict) {
std::ostringstream shape_str;
shape_str << "[";
for (size_t i = 0; i < tensor->shape().size(); ++i) {
if (i > 0) {
shape_str << ", ";
}
shape_str << tensor->shape()[i];
}
shape_str << "]";
spdlog::debug(" - {} with shape: {}", name, shape_str.str());
}
spdlog::info("✓ State dict functionality passed");
// Test 1c: Load state dict functionality
spdlog::info("Test 1c: Load state dict functionality");
// Create new tensors to load
auto new_weight = infinicore::Tensor::ones({4, 8}, infinicore::DataType::F32, infinicore::Device());
auto new_bias = infinicore::Tensor::zeros({4}, infinicore::DataType::F32, infinicore::Device());
// Load using load_parameter
module.load_parameter("weight", new_weight);
module.load_parameter("bias", new_bias);
// Verify the parameters were updated
auto updated_state_dict = module.state_dict();
if (!tensorsAllClose(updated_state_dict.at("weight"), new_weight, 1e-6, 1e-6)) {
spdlog::error("Weight parameter values do not match after load_parameter");
return false;
}
if (!tensorsAllClose(updated_state_dict.at("bias"), new_bias, 1e-6, 1e-6)) {
spdlog::error("Bias parameter values do not match after load_parameter");
return false;
}
// Test load_state_dict
std::unordered_map<std::string, infinicore::Tensor> new_state_dict;
new_state_dict.emplace("weight", infinicore::Tensor::ones({4, 8}, infinicore::DataType::F32, infinicore::Device()));
new_state_dict.emplace("bias", infinicore::Tensor::ones({4}, infinicore::DataType::F32, infinicore::Device()));
module.load_state_dict(new_state_dict);
auto final_state_dict = module.state_dict();
if (final_state_dict.size() != 2) {
spdlog::error("State dict size mismatch after load_state_dict");
return false;
}
spdlog::info("✓ Load state dict functionality passed");
spdlog::info("=== All Basic Module Operations Passed ===");
return true;
} catch (const std::exception &e) {
spdlog::error("Exception in testBasicModuleOperations: {}", e.what());
return false;
}
});
}
// Test 2: Advanced load state dict functionality (hierarchical modules)
TestResult NNModuleTest::testLoadStateDict() {
return measureTime("AdvancedLoadStateDict", [this]() {
try {
spdlog::info("=== Testing Advanced load_state_dict with Hierarchical Modules ===");
// Test: Deep nesting (2-level hierarchy)
spdlog::info("Test 4: Testing load_state_dict with 2-level deep nesting");
// Create parent -> child -> grandchild hierarchy using proper module definition
class DeepGrandchildModule : public infinicore::nn::Module {
protected:
INFINICORE_NN_MODULE(MockLinearModule, sublayer);
public:
DeepGrandchildModule() {
INFINICORE_NN_MODULE_INIT(sublayer, 6, 4, infinicore::Device());
}
};
class DeepChildModule : public infinicore::nn::Module {
protected:
INFINICORE_NN_MODULE(MockLinearModule, own_layer);
INFINICORE_NN_MODULE(DeepGrandchildModule, sublayer);
public:
DeepChildModule() {
INFINICORE_NN_MODULE_INIT(own_layer, 8, 6, infinicore::Device());
INFINICORE_NN_MODULE_INIT(sublayer);
}
};
class DeepParentModule : public infinicore::nn::Module {
protected:
INFINICORE_NN_MODULE(MockLinearModule, own_layer);
INFINICORE_NN_MODULE(DeepChildModule, layer1);
public:
DeepParentModule() {
INFINICORE_NN_MODULE_INIT(own_layer, 10, 8, infinicore::Device());
INFINICORE_NN_MODULE_INIT(layer1);
}
};
DeepParentModule deep_parent;
// Verify initial state dict includes all 2-level hierarchical parameters
auto deep_initial_state = deep_parent.state_dict();
spdlog::debug("Deep hierarchical state dict has {} parameters", deep_initial_state.size());
// Expected parameters:
// parent: own_layer.weight, own_layer.bias (2)
// layer1: layer1.own_layer.weight, layer1.own_layer.bias (2)
// sublayer: layer1.sublayer.sublayer.weight, layer1.sublayer.sublayer.bias (2)
// Total: 6 parameters
if (deep_initial_state.size() < 6) {
spdlog::error("Deep hierarchy state dict size mismatch. Expected at least 6, got {}",
deep_initial_state.size());
return false;
}
// Verify 2-level parameter names exist
bool has_sublayer_weight = deep_initial_state.find("layer1.sublayer.sublayer.weight") != deep_initial_state.end();
bool has_sublayer_bias = deep_initial_state.find("layer1.sublayer.sublayer.bias") != deep_initial_state.end();
if (!has_sublayer_weight || !has_sublayer_bias) {
spdlog::error("2-level nested parameters missing from state dict");
return false;
}
spdlog::debug("All 2-level hierarchical parameter names verified");
// Create state dict for 2-level hierarchy with all 1.0 values
std::unordered_map<std::string, infinicore::Tensor> deep_state_dict;
deep_state_dict.emplace("own_layer.weight", infinicore::Tensor::ones({8, 10}, infinicore::DataType::F32, infinicore::Device()));
deep_state_dict.emplace("own_layer.bias", infinicore::Tensor::ones({8}, infinicore::DataType::F32, infinicore::Device()));
deep_state_dict.emplace("layer1.own_layer.weight", infinicore::Tensor::ones({6, 8}, infinicore::DataType::F32, infinicore::Device()));
deep_state_dict.emplace("layer1.own_layer.bias", infinicore::Tensor::ones({6}, infinicore::DataType::F32, infinicore::Device()));
deep_state_dict.emplace("layer1.sublayer.sublayer.weight", infinicore::Tensor::ones({4, 6}, infinicore::DataType::F32, infinicore::Device()));
deep_state_dict.emplace("layer1.sublayer.sublayer.bias", infinicore::Tensor::ones({4}, infinicore::DataType::F32, infinicore::Device()));
// Load the deep hierarchical state dict
deep_parent.load_state_dict(deep_state_dict);
spdlog::debug("Successfully loaded 2-level deep hierarchical state dict");
// Verify all parameters were loaded correctly
auto deep_loaded_state = deep_parent.state_dict();
// Verify shapes at all levels
if (deep_loaded_state.at("own_layer.weight")->shape() != std::vector<size_t>({8, 10})) {
spdlog::error("Deep parent weight shape mismatch");
return false;
}
if (deep_loaded_state.at("layer1.own_layer.weight")->shape() != std::vector<size_t>({6, 8})) {
spdlog::error("Deep layer1 weight shape mismatch");
return false;
}
if (deep_loaded_state.at("layer1.sublayer.sublayer.weight")->shape() != std::vector<size_t>({4, 6})) {
spdlog::error("Deep sublayer weight shape mismatch");
return false;
}
spdlog::debug("All 2-level deep parameter shapes verified");
// Verify actual weight loading correctness by checking that loaded parameters
// match what we provided in the state dict (use the original tensors)
spdlog::info("Verifying weight loading correctness by direct comparison");
// Get the tensors we loaded from the state dict
auto loaded_parent_weight = deep_loaded_state.at("own_layer.weight");
auto loaded_layer1_weight = deep_loaded_state.at("layer1.own_layer.weight");
auto loaded_sublayer_weight = deep_loaded_state.at("layer1.sublayer.sublayer.weight");
// Compare with the original tensors we put in the state dict
if (!tensorsAllClose(loaded_parent_weight, deep_state_dict.at("own_layer.weight"), 1e-5, 1e-5)) {
spdlog::error("Deep parent weight not preserved after loading");
return false;
}
if (!tensorsAllClose(loaded_layer1_weight, deep_state_dict.at("layer1.own_layer.weight"), 1e-5, 1e-5)) {
spdlog::error("Deep layer1 weight not preserved after loading");
return false;
}
if (!tensorsAllClose(loaded_sublayer_weight, deep_state_dict.at("layer1.sublayer.sublayer.weight"), 1e-5, 1e-5)) {
spdlog::error("Deep sublayer weight not preserved after loading");
return false;
}
spdlog::info("✓ Weight loading correctness verified - loaded values match input state dict");
spdlog::info("✓ 2-level deep hierarchy load_state_dict verification passed");
spdlog::info("=== All Advanced load_state_dict Tests Passed ===");
return true;
} catch (const std::exception &e) {
spdlog::error("Exception in testLoadStateDict: {}", e.what());
return false;
}
});
}
// Test 3: Module hierarchy (demonstrates proper hierarchical construction pattern)
TestResult NNModuleTest::testModuleHierarchy() {
return measureTime("ModuleHierarchy", [this]() {
try {
// Create a hierarchy using proper module definition: root -> layer1 -> layer2
class Layer2Module : public infinicore::nn::Module {
protected:
INFINICORE_NN_MODULE(MockLinearModule, sublayer);
public:
Layer2Module() {
INFINICORE_NN_MODULE_INIT(sublayer, 8, 4, infinicore::Device());
}
};
class Layer1Module : public infinicore::nn::Module {
protected:
INFINICORE_NN_MODULE(MockLinearModule, sublayer);
INFINICORE_NN_MODULE(Layer2Module, layer2);
public:
Layer1Module() {
INFINICORE_NN_MODULE_INIT(sublayer, 16, 8, infinicore::Device());
INFINICORE_NN_MODULE_INIT(layer2);
}
};
class RootModule : public infinicore::nn::Module {
protected:
INFINICORE_NN_MODULE(MockLinearModule, root_layer);
INFINICORE_NN_MODULE(Layer1Module, layer1);
public:
RootModule() {
INFINICORE_NN_MODULE_INIT(root_layer, 20, 16, infinicore::Device());
INFINICORE_NN_MODULE_INIT(layer1);
}
};
RootModule root_module;
// Check the complete state dict
auto root_state_dict = root_module.state_dict();
// Debug: Print all parameters
spdlog::debug("Found {} parameters:", root_state_dict.size());
for (const auto &pair : root_state_dict) {
spdlog::debug(" - {}", pair.first);
}
// Should have: root_layer.weight, root_layer.bias,
// layer1.sublayer.weight, layer1.sublayer.bias,
// layer1.layer2.sublayer.weight, layer1.layer2.sublayer.bias
if (root_state_dict.size() < 6) {
std::cout << "Error: Expected at least 6 parameters in hierarchy, got "
<< root_state_dict.size() << std::endl;
return false;
}
std::cout << "Module hierarchy test passed. Root state dict has "
<< root_state_dict.size() << " parameters" << std::endl;
// Print the hierarchy
std::cout << "Module hierarchy:" << std::endl;
for (const auto &pair : root_state_dict) {
std::cout << " - " << pair.first << std::endl;
}
// Additional: Test INFINICORE_NN_MODULE_VEC vector registration
spdlog::info("Testing INFINICORE_NN_MODULE_VEC (vector of submodules)");
class VecModule : public infinicore::nn::Module {
protected:
INFINICORE_NN_MODULE_VEC(MockLinearModule, layers);
public:
VecModule() {
INFINICORE_NN_MODULE_VEC_INIT(layers, 3, MockLinearModule, 16, 8, infinicore::Device());
}
};
VecModule vec_mod;
auto vec_state = vec_mod.state_dict();
// Expect parameters for layers.0, layers.1, layers.2 (weight and bias for each)
std::vector<std::string> expected_vec_params = {
"layers.0.weight", "layers.0.bias",
"layers.1.weight", "layers.1.bias",
"layers.2.weight", "layers.2.bias"};
for (const auto &param : expected_vec_params) {
if (vec_state.find(param) == vec_state.end()) {
spdlog::error("INFINICORE_NN_MODULE_VEC: missing '{}' in state_dict", param);
return false;
}
}
spdlog::info("INFINICORE_NN_MODULE_VEC test passed - found all vector layer parameters");
return true;
} catch (const std::exception &e) {
std::cout << "Exception in testModuleHierarchy: " << e.what() << std::endl;
return false;
}
});
}
// Test 4: Parameter loading from blob
TestResult NNModuleTest::testParameterLoading() {
return measureTime("ParameterLoading", [this]() {
try {
MockLinearModule module(3, 2, infinicore::Device());
// Create test data
std::vector<float> weight_data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
std::vector<float> bias_data = {0.1f, 0.2f};
// Load parameters from blob data
module.load_parameter_from_blob("weight", weight_data.data());
module.load_parameter_from_blob("bias", bias_data.data());
std::cout << "Successfully loaded parameters from blob data" << std::endl;
// Verify parameters exist
auto state_dict = module.state_dict();
if (state_dict.find("weight") == state_dict.end() || state_dict.find("bias") == state_dict.end()) {
std::cout << "Error: Parameters not found after loading" << std::endl;
return false;
}
std::cout << "Parameter loading test passed" << std::endl;
return true;
} catch (const std::exception &e) {
std::cout << "Exception in testParameterLoading: " << e.what() << std::endl;
return false;
}
});
}
// Test 5: Linear module implementation and behavior
TestResult NNModuleTest::testModuleLinear() {
return measureTime("ModuleLinear", [this]() {
try {
// Test with bias
spdlog::info("Testing Linear module with bias (8->4 features)");
infinicore::nn::Linear m1(8, 4, true, infinicore::Device());
auto sd1 = m1.state_dict();
if (sd1.find("weight") == sd1.end()) {
spdlog::error("weight missing");
return false;
}
if (sd1.find("bias") == sd1.end()) {
spdlog::error("bias missing when bias=true");
return false;
}
if (sd1.at("weight")->shape() != std::vector<size_t>({4, 8})) {
spdlog::error("weight shape mismatch. Expected {{4, 8}}, got different shape");
return false;
}
if (sd1.at("bias")->shape() != std::vector<size_t>({4})) {
spdlog::error("bias shape mismatch. Expected {{4}}, got different shape");
return false;
}
spdlog::debug("Parameter shapes verified: weight {{4, 8}}, bias {{4}}");
// Test module properties
if (m1.in_features() != 8) {
spdlog::error("in_features mismatch. Expected 8, got {}", m1.in_features());
return false;
}
if (m1.out_features() != 4) {
spdlog::error("out_features mismatch. Expected 4, got {}", m1.out_features());
return false;
}
if (!m1.has_bias()) {
spdlog::error("has_bias should be true");
return false;
}
// Test linear computation with bias
spdlog::info("Testing linear computation with bias");
auto input1 = infinicore::Tensor::ones({2, 8}, infinicore::DataType::F32, infinicore::Device());
auto output1 = m1.forward(input1);
if (output1->shape() != std::vector<size_t>({2, 4})) {
spdlog::error("Linear output shape mismatch with bias. Expected {{2, 4}}, got different shape");
return false;
}
spdlog::debug("Linear computation with bias passed. Input shape: {{2, 8}}, Output shape: {{2, 4}}");
// Test without bias
spdlog::info("Testing Linear module without bias (16->3 features)");
infinicore::nn::Linear m2(16, 3, false, infinicore::Device());
auto sd2 = m2.state_dict();
if (sd2.find("weight") == sd2.end()) {
spdlog::error("weight missing (no-bias)");
return false;
}
if (sd2.find("bias") != sd2.end()) {
spdlog::error("bias should not exist when bias=false");
return false;
}
if (sd2.at("weight")->shape() != std::vector<size_t>({3, 16})) {
spdlog::error("weight shape mismatch (no-bias). Expected {{3, 16}}, got different shape");
return false;
}
spdlog::debug("Parameter shapes verified: weight {{3, 16}}, no bias");
// Test module properties
if (m2.in_features() != 16) {
spdlog::error("in_features mismatch. Expected 16, got {}", m2.in_features());
return false;
}
if (m2.out_features() != 3) {
spdlog::error("out_features mismatch. Expected 3, got {}", m2.out_features());
return false;
}
if (m2.has_bias()) {
spdlog::error("has_bias should be false");
return false;
}
// Test linear computation without bias
spdlog::info("Testing linear computation without bias");
auto input2 = infinicore::Tensor::ones({1, 16}, infinicore::DataType::F32, infinicore::Device());
auto output2 = m2.forward(input2);
if (output2->shape() != std::vector<size_t>({1, 3})) {
spdlog::error("Linear output shape mismatch without bias. Expected {{1, 3}}, got different shape");
return false;
}
spdlog::debug("Linear computation without bias passed. Input shape: {{1, 16}}, Output shape: {{1, 3}}");
// Test load_state_dict for m2 (without bias)
spdlog::info("Testing load_state_dict on Linear without bias");
auto m2_load_weight = infinicore::Tensor::ones({3, 16}, infinicore::DataType::F32, infinicore::Device());
std::unordered_map<std::string, infinicore::Tensor> m2_state_dict;
m2_state_dict.emplace("weight", m2_load_weight);
// Note: no bias parameter
m2.load_state_dict(m2_state_dict);
// Verify via state_dict() and direct access
if (!tensorsAllClose(m2.state_dict().at("weight"), m2_load_weight, 1e-5, 1e-5)) {
spdlog::error("m2 weight not loaded correctly");
return false;
}
if (!tensorsAllClose(m2.weight(), m2_load_weight, 1e-5, 1e-5)) {
spdlog::error("m2 weight field not synchronized");
return false;
}
spdlog::debug("m2 load_state_dict verified - weight loaded correctly (no bias)");
// Test batch processing
spdlog::info("Testing batch linear computation (batch size 3)");
auto input3 = infinicore::Tensor::ones({3, 8}, infinicore::DataType::F32, infinicore::Device());
auto output3 = m1.forward(input3);
if (output3->shape() != std::vector<size_t>({3, 4})) {
spdlog::error("Batch linear output shape mismatch. Expected {{3, 4}}, got different shape");
return false;
}
spdlog::debug("Batch linear computation passed. Input shape: {{3, 8}}, Output shape: {{3, 4}}");
// Test parameter accessors
spdlog::info("Testing parameter accessors");
auto weight_accessor = m1.weight();
auto bias_accessor = m1.bias();
if (weight_accessor->shape() != std::vector<size_t>({4, 8})) {
spdlog::error("Weight accessor shape mismatch");
return false;
}
if (bias_accessor->shape() != std::vector<size_t>({4})) {
spdlog::error("Bias accessor shape mismatch");
return false;
}
// Test load_state_dict for m1 (with bias)
spdlog::info("Testing load_state_dict on Linear with bias");
auto m1_load_weight = infinicore::Tensor::ones({4, 8}, infinicore::DataType::F32, infinicore::Device());
auto m1_load_bias = infinicore::Tensor::ones({4}, infinicore::DataType::F32, infinicore::Device());
std::unordered_map<std::string, infinicore::Tensor> m1_state_dict;
m1_state_dict.emplace("weight", m1_load_weight);
m1_state_dict.emplace("bias", m1_load_bias);
m1.load_state_dict(m1_state_dict);
// Verify via state_dict() and direct access
if (!tensorsAllClose(m1.state_dict().at("weight"), m1_load_weight, 1e-5, 1e-5)) {
spdlog::error("m1 weight not loaded correctly");
return false;
}
if (!tensorsAllClose(m1.weight(), m1_load_weight, 1e-5, 1e-5)) {
spdlog::error("m1 weight field not synchronized");
return false;
}
if (!tensorsAllClose(m1.bias(), m1_load_bias, 1e-5, 1e-5)) {
spdlog::error("m1 bias field not synchronized");
return false;
}
spdlog::debug("m1 load_state_dict verified - parameters and fields synchronized");
// Test extra_repr
std::string repr = m1.extra_repr();
spdlog::debug("Linear module representation: {}", repr);
// Test forward with residual connection
spdlog::info("Testing Linear forward with residual connection");
auto residual = infinicore::Tensor::ones({2, 4}, infinicore::DataType::F32, infinicore::Device());
auto output_with_residual = m1.forward(input1, residual);
if (output_with_residual->shape() != std::vector<size_t>({2, 4})) {
spdlog::error("Linear output with residual shape mismatch. Expected {{2, 4}}, got different shape");
return false;
}
spdlog::debug("Linear forward with residual passed. Input shape: {{2, 8}}, Residual shape: {{2, 4}}, Output shape: {{2, 4}}");
// Test computation correctness: InfiniCore vs Naive implementation
spdlog::info("Testing computation correctness: InfiniCore vs Naive implementation");
// Create test data with known values for verification
auto test_input = infinicore::Tensor::ones({2, 8}, infinicore::DataType::F32, infinicore::Device());
auto test_residual = infinicore::Tensor::ones({2, 4}, infinicore::DataType::F32, infinicore::Device());
// Get InfiniCore result
auto infinicore_output = m1.forward(test_input, test_residual);
// Compute naive result: output = input @ weight.T + bias + residual
auto naive_output = infinicore::Tensor::empty({2, 4}, infinicore::DataType::F32, infinicore::Device());
auto weight_naive = m1.weight();
auto bias_naive = m1.bias();
// Naive computation step by step
auto weight_t = weight_naive->permute({1, 0}); // [4, 8] -> [8, 4]
auto matmul_result = infinicore::op::matmul(test_input, weight_t); // [2, 4]
// Broadcast bias to [2, 4]
size_t ndim_diff = naive_output->ndim() - 1;
std::vector<infinicore::Stride> strides(ndim_diff, 0);
strides.push_back(bias_naive->stride(0));
auto bias_view = bias_naive->as_strided(naive_output->shape(), strides);
// Add bias to matmul result
infinicore::op::add_(naive_output, matmul_result, bias_view);
// Add residual
infinicore::op::add_(naive_output, naive_output, test_residual);
// Compare results with actual value checking
if (infinicore_output->shape() != naive_output->shape()) {
spdlog::error("Shape mismatch between InfiniCore and naive implementation");
return false;
}
// Compare actual tensor values using local checker
if (!tensorsAllClose(infinicore_output, naive_output, 1e-5, 1e-5)) {
spdlog::error("Value mismatch between InfiniCore and naive implementation");
return false;
}
spdlog::debug("Value comparison passed - InfiniCore and naive results match within tolerance");
spdlog::debug("Computation correctness test passed - both implementations produce identical results");
spdlog::debug("InfiniCore output shape: {{2, 4}}, Naive output shape: {{2, 4}}");
// Test computation correctness without bias (using m2)
spdlog::info("Testing computation correctness without bias");
auto test_input_no_bias = infinicore::Tensor::ones({1, 16}, infinicore::DataType::F32, infinicore::Device());
auto test_residual_no_bias = infinicore::Tensor::ones({1, 3}, infinicore::DataType::F32, infinicore::Device());
// Get InfiniCore result (no bias)
auto infinicore_output_no_bias = m2.forward(test_input_no_bias, test_residual_no_bias);
// Compute naive result without bias: output = input @ weight.T + residual
auto naive_output_no_bias = infinicore::Tensor::empty({1, 3}, infinicore::DataType::F32, infinicore::Device());
auto weight_no_bias_naive = m2.weight();
// Naive computation: just matmul + residual
auto weight_t_no_bias = weight_no_bias_naive->permute({1, 0}); // [3, 16] -> [16, 3]
auto matmul_result_no_bias = infinicore::op::matmul(test_input_no_bias, weight_t_no_bias); // [1, 3]
// Add residual
infinicore::op::add_(naive_output_no_bias, matmul_result_no_bias, test_residual_no_bias);
// Compare results with actual value checking
if (infinicore_output_no_bias->shape() != naive_output_no_bias->shape()) {
spdlog::error("Shape mismatch between InfiniCore and naive implementation (no bias)");
return false;
}
// Compare actual tensor values for no-bias case
if (!tensorsAllClose(infinicore_output_no_bias, naive_output_no_bias, 1e-5, 1e-5)) {
spdlog::error("Value mismatch in no-bias computation");
return false;
}
spdlog::debug("No-bias value comparison passed - results match within tolerance");
spdlog::debug("No-bias computation correctness test passed - both implementations produce identical results");
spdlog::debug("InfiniCore no-bias output shape: {{1, 3}}, Naive no-bias output shape: {{1, 3}}");
// Test basic forward (no residual) vs naive
spdlog::info("Testing basic forward vs naive implementation");
auto basic_infinicore = m1.forward(test_input);
auto basic_naive = infinicore::Tensor::empty({2, 4}, infinicore::DataType::F32, infinicore::Device());
// Naive basic computation: input @ weight.T + bias
auto basic_matmul = infinicore::op::matmul(test_input, weight_t);
infinicore::op::add_(basic_naive, basic_matmul, bias_view);
if (basic_infinicore->shape() != basic_naive->shape()) {
spdlog::error("Shape mismatch in basic forward computation");
return false;
}
// Compare actual tensor values for basic forward
if (!tensorsAllClose(basic_infinicore, basic_naive, 1e-5, 1e-5)) {
spdlog::error("Value mismatch in basic forward computation");
return false;
}
spdlog::debug("Basic forward value comparison passed - results match within tolerance");
spdlog::debug("Basic forward computation correctness test passed - both implementations produce identical results");
spdlog::debug("Basic InfiniCore output shape: {{2, 4}}, Basic naive output shape: {{2, 4}}");
spdlog::info("All Linear module tests passed (with/without bias, load_state_dict, computation verification)");
return true;
} catch (const std::exception &e) {
spdlog::error("Exception in testModuleLinear: {}", e.what());
return false;
}
});
}
// Test 6: Embedding module implementation
TestResult NNModuleTest::testModuleEmbedding() {
return measureTime("ModuleEmbedding", [this]() {
try {
spdlog::info("Testing Embedding module implementation");
// Test 1: Basic embedding creation
spdlog::info("Test 1: Basic embedding creation (vocab=100, dim=64)");
infinicore::nn::Embedding emb1(100, 64);
auto state1 = emb1.state_dict();
if (state1.find("weight") == state1.end()) {
spdlog::error("Embedding weight not found in state dict");
return false;
}
if (state1.at("weight")->shape() != std::vector<size_t>({100, 64})) {
spdlog::error("Embedding weight shape mismatch. Expected {{100, 64}}");
return false;
}
if (emb1.num_embeddings() != 100) {
spdlog::error("num_embeddings mismatch. Expected 100, got {}", emb1.num_embeddings());
return false;
}
if (emb1.embedding_dim() != 64) {
spdlog::error("embedding_dim mismatch. Expected 64, got {}", emb1.embedding_dim());
return false;
}
spdlog::debug("Basic embedding creation passed");
// Test 2: Embedding with padding_idx
spdlog::info("Test 2: Embedding with padding_idx=0");
infinicore::nn::Embedding emb2(50, 32, 0, infinicore::DataType::F32, infinicore::Device());
if (!emb2.padding_idx().has_value()) {
spdlog::error("padding_idx should have a value");
return false;
}
if (emb2.padding_idx().value() != 0) {
spdlog::error("padding_idx mismatch. Expected 0, got {}", emb2.padding_idx().value());
return false;
}
spdlog::debug("Embedding with padding_idx passed");
// Test 3: Forward pass - single index
spdlog::info("Test 3: Forward pass with single index");
std::vector<int64_t> single_data = {5};
auto indices_single = infinicore::Tensor::from_blob(single_data.data(), {1}, infinicore::DataType::I64, infinicore::Device());
auto output_single = emb1.forward(indices_single);
if (output_single->shape() != std::vector<size_t>({1, 64})) {
spdlog::error("Single index output shape mismatch. Expected {{1, 64}}");
return false;
}
spdlog::debug("Single index forward pass passed. Output shape: {{1, 64}}");
// Test 4: Forward pass - batch of indices
spdlog::info("Test 4: Forward pass with batch of indices");
std::vector<int64_t> batch_data = {0, 5, 10};
auto indices_batch = infinicore::Tensor::from_blob(batch_data.data(), {3}, infinicore::DataType::I64, infinicore::Device());
auto output_batch = emb1.forward(indices_batch);
if (output_batch->shape() != std::vector<size_t>({3, 64})) {
spdlog::error("Batch output shape mismatch. Expected {{3, 64}}");
return false;
}
spdlog::debug("Batch forward pass passed. Output shape: {{3, 64}}");
// Test 5: Forward pass - 2D indices (batch_size, seq_len)
spdlog::info("Test 5: Forward pass with 2D indices [batch, seq_len]");
std::vector<int64_t> data_2d = {1, 2, 3, 4, 5, 6, 7, 8};
auto indices_2d = infinicore::Tensor::from_blob(data_2d.data(), {2, 4},
infinicore::DataType::I64, infinicore::Device());
auto output_2d = emb1.forward(indices_2d);
if (output_2d->shape() != std::vector<size_t>({2, 4, 64})) {
spdlog::error("2D indices output shape mismatch. Expected {{2, 4, 64}}");
return false;
}
spdlog::debug("2D indices forward pass passed. Output shape: {{2, 4, 64}}");
// Test 6: Embedding lookup consistency
spdlog::info("Test 6: Testing embedding lookup consistency");
std::vector<int64_t> idx_data = {7};
auto idx1 = infinicore::Tensor::from_blob(idx_data.data(), {1}, infinicore::DataType::I64, infinicore::Device());
auto idx2 = infinicore::Tensor::from_blob(idx_data.data(), {1}, infinicore::DataType::I64, infinicore::Device());
auto out1 = emb1.forward(idx1);
auto out2 = emb1.forward(idx2);
// Same index should give same embedding
if (!tensorsAllClose(out1, out2, 1e-7, 1e-7)) {
spdlog::error("Same index should return identical embeddings");
return false;
}
spdlog::debug("Embedding lookup consistency passed");
// Test 7: load_state_dict
spdlog::info("Test 7: Testing load_state_dict for Embedding");
auto new_weight = infinicore::Tensor::ones({100, 64}, infinicore::DataType::F32, infinicore::Device());
std::unordered_map<std::string, infinicore::Tensor> new_state;
new_state.emplace("weight", new_weight);
emb1.load_state_dict(new_state);
if (!tensorsAllClose(emb1.weight(), new_weight, 1e-7, 1e-7)) {
spdlog::error("Embedding weight not loaded correctly");
return false;
}
spdlog::debug("load_state_dict for Embedding passed");
// Test 8: extra_repr
spdlog::info("Test 8: Testing extra_repr");
std::string repr1 = emb1.extra_repr();
std::string repr2 = emb2.extra_repr();
spdlog::debug("Embedding repr (no padding): {}", repr1);
spdlog::debug("Embedding repr (with padding): {}", repr2);
if (repr1.find("num_embeddings=100") == std::string::npos) {
spdlog::error("extra_repr should contain num_embeddings");
return false;
}
if (repr2.find("padding_idx=0") == std::string::npos) {
spdlog::error("extra_repr should contain padding_idx when specified");
return false;
}
spdlog::debug("extra_repr test passed");
spdlog::info("All Embedding module tests passed!");
return true;
} catch (const std::exception &e) {
spdlog::error("Exception in testModuleEmbedding: {}", e.what());
return false;
}
});
}
// Test 7: RMSNorm module implementation
TestResult NNModuleTest::testModuleRMSNorm() {
return measureTime("ModuleRMSNorm", [this]() {
try {
spdlog::info("Testing RMSNorm module implementation");
// Test 1: Basic RMSNorm creation
spdlog::info("Test 1: Basic RMSNorm creation (hidden_size=768)");
infinicore::nn::RMSNorm norm1(768, 1e-6, infinicore::Device());
auto state1 = norm1.state_dict();
if (state1.find("weight") == state1.end()) {
spdlog::error("RMSNorm weight not found in state dict");
return false;
}
if (state1.at("weight")->shape() != std::vector<size_t>({768})) {
spdlog::error("RMSNorm weight shape mismatch. Expected {{768}}");
return false;
}
if (norm1.normalized_shape() != 768) {
spdlog::error("normalized_shape mismatch. Expected 768, got {}", norm1.normalized_shape());
return false;
}
spdlog::debug("Basic RMSNorm creation passed");
// Test 2: Forward pass - 2D input [batch, hidden]
spdlog::info("Test 2: Forward pass with 2D input [batch, hidden]");
auto input_2d = infinicore::Tensor::ones({4, 768}, infinicore::DataType::F32, infinicore::Device());
auto output_2d = norm1.forward(input_2d);
if (output_2d->shape() != std::vector<size_t>({4, 768})) {
spdlog::error("2D output shape mismatch. Expected {{4, 768}}");
return false;
}
spdlog::debug("2D forward pass passed. Output shape: {{4, 768}}");
// Test 3: Forward pass - 3D input [batch, seq_len, hidden]
spdlog::info("Test 3: Forward pass with 3D input [batch, seq_len, hidden]");
auto input_3d = infinicore::Tensor::ones({2, 10, 768}, infinicore::DataType::F32, infinicore::Device());
auto output_3d = norm1.forward(input_3d);
if (output_3d->shape() != std::vector<size_t>({2, 10, 768})) {
spdlog::error("3D output shape mismatch. Expected {{2, 10, 768}}");
return false;
}
spdlog::debug("3D forward pass passed. Output shape: {{2, 10, 768}}");
// Test 4: Test normalization properties
spdlog::info("Test 4: Testing RMSNorm properties");
auto test_input = infinicore::Tensor::ones({1, 768}, infinicore::DataType::F32, infinicore::Device());
auto test_output = norm1.forward(test_input);
// Output should have same shape
if (test_output->shape() != test_input->shape()) {
spdlog::error("Output shape doesn't match input shape");
return false;
}
spdlog::debug("RMSNorm properties test passed");
// Test 5: load_state_dict
spdlog::info("Test 5: Testing load_state_dict for RMSNorm");
auto new_weight = infinicore::Tensor::ones({768}, infinicore::DataType::F32, infinicore::Device());
std::unordered_map<std::string, infinicore::Tensor> new_state;
new_state.emplace("weight", new_weight);
norm1.load_state_dict(new_state);
if (!tensorsAllClose(norm1.weight(), new_weight, 1e-7, 1e-7)) {
spdlog::error("RMSNorm weight not loaded correctly");
return false;
}
spdlog::debug("load_state_dict for RMSNorm passed");
// Test 6: extra_repr
spdlog::info("Test 6: Testing extra_repr");
std::string repr = norm1.extra_repr();
spdlog::debug("RMSNorm repr: {}", repr);
if (repr.find("normalized_shape=768") == std::string::npos) {
spdlog::error("extra_repr should contain normalized_shape");
return false;
}
if (repr.find("eps=") == std::string::npos) {
spdlog::error("extra_repr should contain eps");
return false;
}
spdlog::debug("extra_repr test passed");
// Test 7: Different hidden sizes
spdlog::info("Test 7: Testing different hidden sizes");
infinicore::nn::RMSNorm norm_small(128, 1e-5, infinicore::Device());
infinicore::nn::RMSNorm norm_large(4096, 1e-6, infinicore::Device());
auto input_small = infinicore::Tensor::ones({2, 128}, infinicore::DataType::F32, infinicore::Device());
auto output_small = norm_small.forward(input_small);
auto input_large = infinicore::Tensor::ones({2, 4096}, infinicore::DataType::F32, infinicore::Device());
auto output_large = norm_large.forward(input_large);
if (output_small->shape() != std::vector<size_t>({2, 128})) {
spdlog::error("Small RMSNorm output shape mismatch");
return false;
}
if (output_large->shape() != std::vector<size_t>({2, 4096})) {
spdlog::error("Large RMSNorm output shape mismatch");
return false;
}
spdlog::debug("Different hidden sizes test passed");
spdlog::info("All RMSNorm module tests passed!");
return true;
} catch (const std::exception &e) {
spdlog::error("Exception in testModuleRMSNorm: {}", e.what());
return false;
}
});
}
// Test 8: Comprehensive Tiny-Llama model test (construction + weight loading + validation)
TestResult NNModuleTest::testTinyLlamaConstruction() {
return measureTime("TinyLlamaModelTest", [this]() {
try {
spdlog::info("==========================================");
spdlog::info("Testing Tiny-Llama Model Construction and Weight Loading");
spdlog::info("==========================================");
// Tiny-Llama configuration (actual Tiny-Llama-1.1B-Chat-v1.0 specs)
struct TinyLlamaConfig {
size_t vocab_size = 32000;
size_t hidden_size = 2048;
size_t intermediate_size = 5632;
size_t num_hidden_layers = 22;
size_t num_attention_heads = 32;
size_t num_key_value_heads = 4; // GQA (Grouped Query Attention)
size_t max_position_embeddings = 2048;
double rms_norm_eps = 1e-5;
};
TinyLlamaConfig config;
// ============================================
// Phase 0: Use hard-coded TinyLlama configuration (CI-friendly)
// ============================================
spdlog::info("");
spdlog::info("Phase 0: Using hard-coded TinyLlama configuration (CI)");
spdlog::info("------------------------------------------");
spdlog::info("Using Configuration:");
spdlog::info(" vocab_size: {}", config.vocab_size);
spdlog::info(" hidden_size: {}", config.hidden_size);
spdlog::info(" intermediate_size: {}", config.intermediate_size);
spdlog::info(" num_layers: {}", config.num_hidden_layers);
spdlog::info(" num_attention_heads: {}", config.num_attention_heads);
spdlog::info(" num_key_value_heads: {} (GQA)", config.num_key_value_heads);
spdlog::info(" max_position_embeddings: {}", config.max_position_embeddings);
spdlog::info(" rms_norm_eps: {}", config.rms_norm_eps);
// Create Tiny-Llama model skeleton closely matching HF/TinyLlama naming
class TinyLlamaModel : public infinicore::nn::Module {
protected:
// Inner modules to match naming like: layers.0.self_attn.q_proj.weight, layers.0.mlp.gate_proj.weight
class SelfAttn : public infinicore::nn::Module {
public:
INFINICORE_NN_MODULE(infinicore::nn::Linear, q_proj);
INFINICORE_NN_MODULE(infinicore::nn::Linear, k_proj);
INFINICORE_NN_MODULE(infinicore::nn::Linear, v_proj);
INFINICORE_NN_MODULE(infinicore::nn::Linear, o_proj);
SelfAttn(size_t hidden_size, size_t kv_dim, const infinicore::Device &device) {
INFINICORE_NN_MODULE_INIT(q_proj, hidden_size, hidden_size, false, device);
INFINICORE_NN_MODULE_INIT(k_proj, hidden_size, kv_dim, false, device);
INFINICORE_NN_MODULE_INIT(v_proj, hidden_size, kv_dim, false, device);
INFINICORE_NN_MODULE_INIT(o_proj, hidden_size, hidden_size, false, device);
}
};
class MLP : public infinicore::nn::Module {
public:
INFINICORE_NN_MODULE(infinicore::nn::Linear, gate_proj);
INFINICORE_NN_MODULE(infinicore::nn::Linear, up_proj);
INFINICORE_NN_MODULE(infinicore::nn::Linear, down_proj);
MLP(size_t hidden_size, size_t intermediate_size, const infinicore::Device &device) {
INFINICORE_NN_MODULE_INIT(gate_proj, hidden_size, intermediate_size, false, device);
INFINICORE_NN_MODULE_INIT(up_proj, hidden_size, intermediate_size, false, device);
INFINICORE_NN_MODULE_INIT(down_proj, intermediate_size, hidden_size, false, device);
}
};
class Block : public infinicore::nn::Module {
public:
INFINICORE_NN_MODULE(infinicore::nn::RMSNorm, input_layernorm);
INFINICORE_NN_MODULE(SelfAttn, self_attn);
INFINICORE_NN_MODULE(infinicore::nn::RMSNorm, post_attention_layernorm);
INFINICORE_NN_MODULE(MLP, mlp);
Block(const TinyLlamaConfig &cfg, const infinicore::Device &device) {
size_t kv_dim = cfg.hidden_size * cfg.num_key_value_heads / cfg.num_attention_heads;
INFINICORE_NN_MODULE_INIT(input_layernorm, cfg.hidden_size, cfg.rms_norm_eps, device);
INFINICORE_NN_MODULE_INIT(self_attn, cfg.hidden_size, kv_dim, device);
INFINICORE_NN_MODULE_INIT(post_attention_layernorm, cfg.hidden_size, cfg.rms_norm_eps, device);
INFINICORE_NN_MODULE_INIT(mlp, cfg.hidden_size, cfg.intermediate_size, device);
}
};
public:
INFINICORE_NN_MODULE(infinicore::nn::Embedding, embed_tokens);
INFINICORE_NN_MODULE_VEC(Block, layers);
INFINICORE_NN_MODULE(infinicore::nn::RMSNorm, norm);
TinyLlamaModel(const TinyLlamaConfig &config, const infinicore::Device &device) {
INFINICORE_NN_MODULE_INIT(embed_tokens, config.vocab_size, config.hidden_size, std::nullopt, infinicore::DataType::F32, device);
INFINICORE_NN_MODULE_VEC_INIT(layers, config.num_hidden_layers, Block, config, device);
INFINICORE_NN_MODULE_INIT(norm, config.hidden_size, config.rms_norm_eps, device);
}
};
// ============================================
// Phase 1: Model Construction Verification
// ============================================
spdlog::info("");
spdlog::info("Phase 1: Model Construction Verification");
spdlog::info("------------------------------------------");
// Construct the model
TinyLlamaModel model(config, infinicore::Device());
// Verify all components are created
auto state = model.state_dict();
spdlog::info("✓ Model constructed with {} parameters", state.size());
// Parameter count expectation:
// embed_tokens.weight (1) + norm.weight (1) + per-layer (9 params) * num_layers
size_t expected_param_count = 1 + 1 + config.num_hidden_layers * 9;
if (state.size() != expected_param_count) {
spdlog::error("Parameter count mismatch. Got {}, expected {} (1 + {}*9 + 1)",
state.size(), expected_param_count, config.num_hidden_layers);
// Do not return false here to allow listing and detailed checks below
}
// List all parameters for manual verification
spdlog::info("Listing all Tiny-Llama parameters (name -> shape):");
for (const auto &kv : state) {
const auto &name = kv.first;
const auto &tensor = kv.second;
std::ostringstream shape_ss;
shape_ss << "[";
for (size_t i = 0; i < tensor->shape().size(); ++i) {
if (i) {
shape_ss << ", ";
}
shape_ss << tensor->shape()[i];
}
shape_ss << "]";
spdlog::info(" - {} -> {}", name, shape_ss.str());
}
// Automated verification: check all parameter shapes match hard-coded TinyLlama hierarchy
spdlog::info("Verifying listed parameters against hard-coded TinyLlama hierarchy...");
struct Expect {
std::string name;
std::vector<size_t> shape;
};
const size_t kv_dim = config.hidden_size * config.num_key_value_heads / config.num_attention_heads;
std::vector<Expect> expected;
// embed and final norm
expected.push_back({"embed_tokens.weight", {config.vocab_size, config.hidden_size}});
// per-layer expectations
for (size_t i = 0; i < config.num_hidden_layers; ++i) {
const std::string prefix = std::string("layers.") + std::to_string(i) + ".";
expected.push_back({prefix + "input_layernorm.weight", {config.hidden_size}});
expected.push_back({prefix + "self_attn.q_proj.weight", {config.hidden_size, config.hidden_size}});
expected.push_back({prefix + "self_attn.k_proj.weight", {kv_dim, config.hidden_size}});
expected.push_back({prefix + "self_attn.v_proj.weight", {kv_dim, config.hidden_size}});
expected.push_back({prefix + "self_attn.o_proj.weight", {config.hidden_size, config.hidden_size}});
expected.push_back({prefix + "post_attention_layernorm.weight", {config.hidden_size}});
expected.push_back({prefix + "mlp.gate_proj.weight", {config.intermediate_size, config.hidden_size}});
expected.push_back({prefix + "mlp.up_proj.weight", {config.intermediate_size, config.hidden_size}});
expected.push_back({prefix + "mlp.down_proj.weight", {config.hidden_size, config.intermediate_size}});
}
expected.push_back({"norm.weight", {config.hidden_size}});
bool all_ok = true;
// Check expected ones (existence and shapes)
for (const auto &e : expected) {
auto it = state.find(e.name);
if (it == state.end()) {
spdlog::error("Missing expected parameter: {}", e.name);
all_ok = false;
continue;
}
auto got = it->second->shape();
if (got != e.shape) {
std::ostringstream got_ss, exp_ss;
got_ss << "[";
for (size_t i = 0; i < got.size(); ++i) {
if (i) {
got_ss << ", ";
}
got_ss << got[i];
}
got_ss << "]";
exp_ss << "[";
for (size_t i = 0; i < e.shape.size(); ++i) {
if (i) {
exp_ss << ", ";
}
exp_ss << e.shape[i];
}
exp_ss << "]";
spdlog::error("Shape mismatch for '{}': got {}, expected {}", e.name, got_ss.str(), exp_ss.str());
all_ok = false;
}
}
// Check for unexpected extra parameters
for (const auto &kvp : state) {
const auto &name = kvp.first;
bool is_expected = false;
for (const auto &e : expected) {
if (e.name == name) {
is_expected = true;
break;
}
}
if (!is_expected) {
std::ostringstream got_ss;
auto got = kvp.second->shape();
got_ss << "[";
for (size_t i = 0; i < got.size(); ++i) {
if (i) {
got_ss << ", ";
}
got_ss << got[i];
}
got_ss << "]";
spdlog::warn("Unexpected parameter present: {} with shape {}", name, got_ss.str());
}
}
if (!all_ok) {
spdlog::error("Tiny-Llama parameter verification: FAILED - see errors above");
return false;
}
spdlog::info("Tiny-Llama parameter verification: PASSED");
// Create test weights
std::unordered_map<std::string, infinicore::Tensor> test_state_dict;
for (const auto &[name, tensor] : state) {
// Create a test tensor with ones
test_state_dict.emplace(name, infinicore::Tensor::ones(tensor->shape(),
infinicore::DataType::F32,
infinicore::Device()));
}
// Load the test weights
model.load_state_dict(test_state_dict);
// Verify weights were loaded
auto loaded_state = model.state_dict();
bool load_success = true;
for (const auto &[name, _] : test_state_dict) {
if (loaded_state.find(name) == loaded_state.end()) {
spdlog::error("Parameter '{}' not found after load_state_dict", name);
load_success = false;
}
}
if (!load_success) {
spdlog::error("Weight loading verification failed");
return false;
}
spdlog::info("✓ State dict save/load mechanism verified");
// ============================================
// Summary
// ============================================
spdlog::info("");
spdlog::info("==========================================");
spdlog::info("✅ Tiny-Llama Model Test Summary");
spdlog::info("==========================================");
spdlog::info("✓ Metadata validation: PASSED (config matches actual model)");
spdlog::info("✓ Model construction: PASSED");
spdlog::info("✓ Parameter shapes: PASSED (11 parameters)");
spdlog::info("✓ Forward passes: PASSED");
spdlog::info("✓ Weight loading mechanism: PASSED");
spdlog::info("✓ Architecture compatibility: Tiny-Llama-1.1B-Chat-v1.0");
spdlog::info("✓ GQA support: num_key_value_heads={}", config.num_key_value_heads);
spdlog::info("");
spdlog::info("Model is ready for:");
spdlog::info(" - Full 22-layer implementation");
spdlog::info(" - Safetensors/pickle weight loading");
spdlog::info(" - Inference and fine-tuning");
spdlog::info("==========================================");
return true;
} catch (const std::exception &e) {
spdlog::error("Exception in testTinyLlamaConstruction: {}", e.what());
return false;
}
});
}
// Main test runner
TestResult NNModuleTest::run() {
std::vector<TestResult> results;
std::cout << "==============================================\n"
<< "InfiniCore nn::Module Test Suite\n"
<< "==============================================" << std::endl;
results.push_back(testBasicModuleCreation()); // Merged: creation + parameters + state_dict + load
results.push_back(testLoadStateDict()); // Advanced: hierarchical modules
results.push_back(testModuleHierarchy()); // Demonstrates hierarchical construction
results.push_back(testParameterLoading()); // Blob loading
results.push_back(testModuleLinear()); // Linear module comprehensive test
results.push_back(testModuleEmbedding()); // Embedding module test
results.push_back(testModuleRMSNorm()); // RMSNorm module test
results.push_back(testTinyLlamaConstruction()); // Comprehensive: TinyLlama model test
// Check if all tests passed
bool all_passed = true;
for (const auto &result : results) {
if (!result.passed) {
all_passed = false;
break;
}
}
return TestResult("NNModuleTest", all_passed,
all_passed ? "" : "Some nn::module tests failed");
}
} // namespace infinicore::test
#ifndef __INFINICORE_TEST_NN_MODULE_H__
#define __INFINICORE_TEST_NN_MODULE_H__
#include "infinicore/device.hpp"
#include "infinicore/nn/embedding.hpp"
#include "infinicore/nn/module.hpp"
#include "infinicore/nn/parameter.hpp"
#include "infinicore/nn/rmsnorm.hpp"
#include "test_runner.h"
#include <algorithm>
#include <cmath>
#include <fstream>
#include <iostream>
#include <memory>
#include <sstream>
#include <sys/stat.h>
#include <vector>
namespace infinicore::test {
// Simple test module that mimics torch.nn.Linear
class MockLinearModule : public infinicore::nn::Module {
public:
MockLinearModule(int input_size, int output_size, const infinicore::Device &device)
: input_size_(input_size), output_size_(output_size), device_(device) {
// Initialize weight parameter (similar to torch.nn.Linear.weight)
register_parameter("weight",
infinicore::nn::Parameter({static_cast<size_t>(output_size), static_cast<size_t>(input_size)}, infinicore::DataType::F32, device));
// Initialize bias parameter (similar to torch.nn.Linear.bias)
register_parameter("bias",
infinicore::nn::Parameter({static_cast<size_t>(output_size)}, infinicore::DataType::F32, device));
}
// Simple forward pass (conceptual - would need actual matrix operations)
infinicore::Tensor forward(const infinicore::Tensor &input) {
// This is a placeholder - in a real implementation, you'd do matrix multiplication
// For testing purposes, we'll just return the input
return input;
}
infinicore::Tensor get_weight() const {
auto state_dict = this->state_dict();
auto it = state_dict.find("weight");
if (it != state_dict.end()) {
return it->second;
}
throw std::runtime_error("Weight parameter not found");
}
infinicore::Tensor get_bias() const {
auto state_dict = this->state_dict();
auto it = state_dict.find("bias");
if (it != state_dict.end()) {
return it->second;
}
throw std::runtime_error("Bias parameter not found");
}
private:
int input_size_;
int output_size_;
infinicore::Device device_;
};
class NNModuleTest : public TestFramework {
public:
TestResult run() override;
std::string getName() const override { return "NNModuleTest"; }
private:
TestResult testBasicModuleCreation(); // Merged: creation, parameters, state_dict, load_state_dict
TestResult testLoadStateDict(); // Advanced: hierarchical modules
TestResult testModuleHierarchy(); // Demonstrates proper hierarchical construction pattern
TestResult testParameterLoading(); // Test blob parameter loading
TestResult testModuleLinear(); // Comprehensive Linear module test
TestResult testModuleEmbedding(); // Embedding module test
TestResult testModuleRMSNorm(); // RMSNorm module test
TestResult testTinyLlamaConstruction(); // Comprehensive: construction + weight loading + validation
};
} // namespace infinicore::test
#endif // __INFINICORE_TEST_NN_MODULE_H__
#ifndef __INFINICORE_TEST_RUNNER_H__
#define __INFINICORE_TEST_RUNNER_H__
#include <chrono>
#include <cmath>
#include <exception>
#include <infinicore.hpp>
#include <iostream>
#include <memory>
#include <spdlog/spdlog.h>
#include <sstream>
#include <string>
#include <vector>
namespace infinicore::test {
// ============================================================================
// Common Test Utilities
// ============================================================================
/**
* @brief Compare two InfiniCore tensors elementwise with tolerance
*
* Compares two tensors for approximate equality, useful for testing numerical
* computations where exact equality is not expected due to floating-point arithmetic.
*
* @param actual The actual tensor result
* @param expected The expected tensor result
* @param rtol Relative tolerance (default: 1e-5)
* @param atol Absolute tolerance (default: 1e-5)
* @return true if tensors are approximately equal, false otherwise
*
* @note Currently only supports F32 dtype
* @note Tensors are automatically moved to CPU for comparison
* @note Reports up to 10 mismatches with detailed coordinates
*/
inline bool tensorsAllClose(const infinicore::Tensor &actual,
const infinicore::Tensor &expected,
double rtol = 1e-5,
double atol = 1e-5) {
if (actual->shape() != expected->shape()) {
spdlog::error("Shape mismatch: actual vs expected");
return false;
}
auto cpu = infinicore::Device(infinicore::Device::Type::CPU, 0);
auto a_cpu = actual->to(cpu);
a_cpu = a_cpu->contiguous();
auto b_cpu = expected->to(cpu);
b_cpu = b_cpu->contiguous();
if (a_cpu->dtype() != b_cpu->dtype()) {
spdlog::error("DType mismatch");
return false;
}
// Only support F32 in this test
if (a_cpu->dtype() != infinicore::DataType::F32) {
spdlog::error("Unsupported dtype for comparison; only F32 supported in test");
return false;
}
size_t n = a_cpu->numel();
const auto &shape = a_cpu->shape();
// Precompute strides for index -> coords mapping
std::vector<size_t> stride(shape.size(), 1);
for (int i = static_cast<int>(shape.size()) - 2; i >= 0; --i) {
stride[i] = stride[i + 1] * shape[i + 1];
}
const float *ap = reinterpret_cast<const float *>(a_cpu->data());
const float *bp = reinterpret_cast<const float *>(b_cpu->data());
size_t max_diff_index = 0;
float max_diff = 0.0f;
size_t num_fail_reported = 0;
for (size_t i = 0; i < n; ++i) {
float av = ap[i];
float bv = bp[i];
float diff = std::fabs(av - bv);
if (diff > static_cast<float>(atol + rtol * std::fabs(bv))) {
if (diff > max_diff) {
max_diff = diff;
max_diff_index = i;
}
if (num_fail_reported < 10) {
// Convert linear index to coordinates
std::vector<size_t> coords(shape.size(), 0);
size_t t = i;
for (size_t d = 0; d < shape.size(); ++d) {
coords[d] = t / stride[d];
t -= coords[d] * stride[d];
}
std::stringstream ss;
ss << "[";
for (size_t d = 0; d < coords.size(); ++d) {
ss << coords[d] << (d + 1 < coords.size() ? "," : "]");
}
double tol = atol + rtol * std::fabs(bv);
spdlog::error("Mismatch at index {} coords {}: actual={} expected={} diff={} tol={}",
i, ss.str(), av, bv, diff, tol);
num_fail_reported++;
}
}
}
if (num_fail_reported > 0) {
// Report summary with max diff
std::vector<size_t> coords(shape.size(), 0);
size_t t = max_diff_index;
for (size_t d = 0; d < shape.size(); ++d) {
coords[d] = t / stride[d];
t -= coords[d] * stride[d];
}
std::stringstream ss;
ss << "[";
for (size_t d = 0; d < coords.size(); ++d) {
ss << coords[d] << (d + 1 < coords.size() ? "," : "]");
}
spdlog::error("Max diff {} at linear index {} coords {}", max_diff, max_diff_index, ss.str());
return false;
}
return true;
}
// ============================================================================
// Test Framework Classes
// ============================================================================
// Test result structure
struct TestResult {
std::string test_name;
bool passed;
std::string error_message;
std::chrono::microseconds duration;
TestResult(const std::string &name, bool pass, const std::string &error = "",
std::chrono::microseconds dur = std::chrono::microseconds(0))
: test_name(name), passed(pass), error_message(error), duration(dur) {}
};
// Test framework base class
class TestFramework {
public:
virtual ~TestFramework() = default;
virtual TestResult run() = 0;
virtual std::string getName() const = 0;
protected:
void logTestStart(const std::string &test_name) {
std::cout << "[TEST] Starting: " << test_name << std::endl;
}
void logTestResult(const TestResult &result) {
std::cout << "[TEST] " << (result.passed ? "PASSED" : "FAILED")
<< ": " << result.test_name;
if (!result.passed && !result.error_message.empty()) {
std::cout << " - " << result.error_message;
}
std::cout << " (Duration: " << result.duration.count() << "μs)" << std::endl;
}
template <typename Func>
TestResult measureTime(const std::string &test_name, Func &&func) {
auto start = std::chrono::high_resolution_clock::now();
try {
bool result = func();
auto end = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
return TestResult(test_name, result, "", duration);
} catch (const std::exception &e) {
auto end = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
return TestResult(test_name, false, e.what(), duration);
}
}
};
// Test runner
class InfiniCoreTestRunner {
public:
void addTest(std::unique_ptr<TestFramework> test) {
tests_.push_back(std::move(test));
}
std::vector<TestResult> runAllTests() {
std::vector<TestResult> results;
std::cout << "==============================================\n"
<< "InfiniCore Test Suite\n"
<< "==============================================" << std::endl;
for (auto &test : tests_) {
logTestStart(test->getName());
TestResult result = test->run();
logTestResult(result);
results.push_back(result);
}
printSummary(results);
return results;
}
private:
std::vector<std::unique_ptr<TestFramework>> tests_;
void logTestStart(const std::string &test_name) {
std::cout << "\n[SUITE] Running: " << test_name << std::endl;
}
void logTestResult(const TestResult &result) {
std::cout << "[SUITE] " << (result.passed ? "PASSED" : "FAILED")
<< ": " << result.test_name << std::endl;
}
void printSummary(const std::vector<TestResult> &results) {
size_t passed = 0, failed = 0;
std::chrono::microseconds total_time(0);
std::vector<TestResult> failed_tests;
for (const auto &result : results) {
if (result.passed) {
passed++;
} else {
failed++;
failed_tests.push_back(result);
}
total_time += result.duration;
}
// Print list of failed tests if any
if (!failed_tests.empty()) {
std::cout << "\n==============================================\n"
<< "❌ FAILED TESTS\n"
<< "==============================================" << std::endl;
for (const auto &test : failed_tests) {
std::cout << " • " << test.test_name;
if (!test.error_message.empty()) {
std::cout << "\n Error: " << test.error_message;
}
std::cout << "\n Duration: " << test.duration.count() << "μs" << std::endl;
}
}
std::cout << "\n==============================================\n"
<< "Test Summary\n"
<< "==============================================\n"
<< "Total Tests: " << results.size() << "\n"
<< "Passed: " << passed << "\n"
<< "Failed: " << failed << "\n"
<< "Total Time: " << total_time.count() << "μs\n"
<< "==============================================" << std::endl;
}
};
} // namespace infinicore::test
#endif // __INFINICORE_TEST_RUNNER_H__
......@@ -4,13 +4,14 @@
#include "infinicore/context/context.hpp"
#include "infinicore/tensor.hpp"
#include "memory_test.h"
#include "test_runner.h"
#include <iostream>
#include <memory>
#include <vector>
namespace infinicore::test {
class TensorDestructorTest : public MemoryTestFramework {
class TensorDestructorTest : public TestFramework {
public:
TestResult run() override;
std::string getName() const override { return "TensorDestructorTest"; }
......
#include "infinicore/nn/embedding.hpp"
#include "infinicore/context/context.hpp"
#include "infinicore/ops.hpp"
#include <spdlog/spdlog.h>
#include <stdexcept>
namespace infinicore::nn {
Embedding::Embedding(size_t num_embeddings,
size_t embedding_dim,
std::optional<int64_t> padding_idx,
const DataType &dtype,
const Device &device)
: num_embeddings_(num_embeddings),
embedding_dim_(embedding_dim),
padding_idx_(padding_idx),
dtype_(dtype) {
device_ = device;
// Validate padding_idx
if (padding_idx_.has_value()) {
int64_t idx = padding_idx_.value();
if (idx < 0 || idx >= static_cast<int64_t>(num_embeddings)) {
throw std::invalid_argument(
"padding_idx must be within num_embeddings range, got " + std::to_string(idx) + " for num_embeddings=" + std::to_string(num_embeddings));
}
}
// Initialize parameter using macro
INFINICORE_NN_PARAMETER_INIT(weight, ({num_embeddings, embedding_dim}, dtype_, device));
// If padding_idx is specified, initialize that row to zeros
if (padding_idx_.has_value()) {
// TODO: Set weight[padding_idx] to zeros
// This would require a slice operation
}
spdlog::debug("Created Embedding module: num_embeddings={}, embedding_dim={}, dtype={}, padding_idx={}",
num_embeddings, embedding_dim, static_cast<int>(dtype_),
padding_idx_.has_value() ? std::to_string(padding_idx_.value()) : "None");
}
Tensor Embedding::forward(const Tensor &indices) const {
// Get the shape of indices
auto indices_shape = indices->shape();
// Output shape: indices_shape + [embedding_dim]
std::vector<size_t> output_shape = indices_shape;
output_shape.push_back(embedding_dim_);
// Create output tensor on the same device as weight
auto out = Tensor::empty(output_shape, weight_->dtype(), weight_->device());
// Flatten indices for sequential row copies
auto cpu_device = Device(Device::Type::CPU, 0);
auto indices_cpu = indices->to(cpu_device)->contiguous();
const auto *indices_data = reinterpret_cast<const int64_t *>(indices_cpu->data());
// Calculate total number of lookups
size_t num_lookups = 1;
for (auto dim : indices_shape) {
num_lookups *= dim;
}
const size_t row_bytes = embedding_dim_ * (weight_->dtype() == DataType::F32 ? sizeof(float) : weight_->dtype() == DataType::BF16 ? sizeof(uint16_t)
: sizeof(float));
// Source and destination base pointers
auto *weight_base = weight_->data();
auto *out_base = out->data();
if (weight_->device().getType() == Device::Type::CPU) {
// CPU path: memcpy row by row
for (size_t i = 0; i < num_lookups; ++i) {
int64_t idx = indices_data[i];
if (idx < 0 || idx >= static_cast<int64_t>(num_embeddings_)) {
throw std::out_of_range(
"Index out of range: " + std::to_string(idx) + " (num_embeddings=" + std::to_string(num_embeddings_) + ")");
}
std::memcpy(out_base + i * row_bytes, weight_base + idx * row_bytes, row_bytes);
}
} else {
// Device path: use stream-ordered D2D copies
for (size_t i = 0; i < num_lookups; ++i) {
int64_t idx = indices_data[i];
if (idx < 0 || idx >= static_cast<int64_t>(num_embeddings_)) {
throw std::out_of_range(
"Index out of range: " + std::to_string(idx) + " (num_embeddings=" + std::to_string(num_embeddings_) + ")");
}
context::memcpyD2D(out_base + i * row_bytes, weight_base + idx * row_bytes, row_bytes);
}
}
return out;
}
std::string Embedding::extra_repr() const {
std::string repr = "Embedding(num_embeddings=" + std::to_string(num_embeddings_) + ", embedding_dim=" + std::to_string(embedding_dim_) + ", dtype=" + std::to_string(static_cast<int>(dtype_));
if (padding_idx_.has_value()) {
repr += ", padding_idx=" + std::to_string(padding_idx_.value());
}
repr += ")";
return repr;
}
} // namespace infinicore::nn
#include "infinicore/nn/linear.hpp"
#include "infinicore/ops.hpp"
#include <spdlog/spdlog.h>
namespace infinicore::nn {
Linear::Linear(size_t in_features, size_t out_features, bool bias, const Device &device)
: in_features_(in_features),
out_features_(out_features),
has_bias_(bias) {
device_ = device;
// Initialize parameters using macro
INFINICORE_NN_PARAMETER_INIT(weight, ({out_features, in_features}, DataType::F32, device));
// Register bias parameter if requested
if (bias) {
INFINICORE_NN_PARAMETER_INIT(bias, ({out_features}, DataType::F32, device));
} else {
bias_ = Parameter(); // Default constructed empty parameter
}
spdlog::debug("Created Linear module: in_features={}, out_features={}, bias={}",
in_features, out_features, bias);
}
Tensor Linear::compute_linear(Tensor &input) const {
// Create output tensor with shape [batch_size, out_features]
auto output_shape = input->shape();
output_shape[output_shape.size() - 1] = out_features_;
auto output = Tensor::empty(output_shape, input->dtype(), input->device());
// Transpose weight: [out_features, in_features] -> [in_features, out_features]
auto weight_t = weight_->permute({1, 0});
if (has_bias_) {
// Broadcast bias to output shape
size_t ndim_diff = output->ndim() - 1;
std::vector<Stride> strides(ndim_diff, 0);
strides.push_back(bias_->stride(0));
auto bias_view = bias_->as_strided(output->shape(), strides);
// First set output to bias (broadcasted)
infinicore::op::rearrange_(output, bias_view);
// Compute matmul result separately, then add to output
auto matmul_result = infinicore::op::matmul(input, weight_t);
infinicore::op::add_(output, output, matmul_result);
} else {
// No bias: just compute output = input @ weight_t
infinicore::op::matmul_(output, input, weight_t);
}
return output;
}
Tensor Linear::forward(Tensor &input) const {
return compute_linear(input);
}
Tensor Linear::forward(Tensor &input, Tensor &residual) const {
auto output = compute_linear(input);
// Add residual: output = output + residual
infinicore::op::add_(output, output, residual);
return output;
}
std::string Linear::extra_repr() const {
return "Linear(in_features=" + std::to_string(in_features_) + ", out_features=" + std::to_string(out_features_) + ", bias=" + (has_bias_ ? "true" : "false") + ")";
}
} // namespace infinicore::nn
......@@ -2,12 +2,26 @@
namespace infinicore::nn {
const std::unordered_map<std::string, Parameter> &Module::state_dict() const {
return parameters_;
static std::unordered_map<std::string, Parameter> result;
result.clear();
collect_all_parameters(result, "");
return result;
}
void Module::load_state_dict(const std::unordered_map<std::string, Tensor> &_state_dict) {
for (auto &p : parameters_) {
load_parameter(p.first, p.second);
// Collect all parameters from this module and its submodules with their full hierarchical names
std::unordered_map<std::string, Parameter> all_params;
collect_all_parameters(all_params, "");
// For each parameter in this module hierarchy, load from the state dict
for (auto &[param_full_name, param] : all_params) {
// Look up the corresponding tensor in the input state dict using the full name
auto it = _state_dict.find(param_full_name);
if (it != _state_dict.end()) {
param->copy_from(it->second);
}
}
}
......@@ -25,4 +39,18 @@ Tensor Module::register_parameter(const std::string &name, Parameter param) {
return param;
}
void Module::collect_all_parameters(std::unordered_map<std::string, Parameter> &all_params, const std::string &prefix) const {
// Add direct parameters with the given prefix
for (const auto &[param_name, param] : parameters_) {
std::string full_name = prefix.empty() ? param_name : prefix + "." + param_name;
all_params[full_name] = param;
}
// Recursively collect parameters from submodules with extended prefix
for (const auto &[sub_name, submodule] : submodules_) {
std::string sub_prefix = prefix.empty() ? sub_name : prefix + "." + sub_name;
submodule->collect_all_parameters(all_params, sub_prefix);
}
}
} // namespace infinicore::nn
......@@ -5,6 +5,10 @@
#include <cstring>
namespace infinicore::nn {
Parameter::Parameter()
: Tensor(Tensor::empty({}, DataType::F32, Device(Device::Type::CPU, 0), false)) {
}
Parameter::Parameter(
const Shape &shape,
const DataType &dtype,
......
#include "infinicore/nn/rmsnorm.hpp"
#include "infinicore/ops.hpp"
#include <cmath>
#include <spdlog/spdlog.h>
#include <stdexcept>
namespace infinicore::nn {
RMSNorm::RMSNorm(size_t normalized_shape, double eps, const Device &device)
: normalized_shape_(normalized_shape),
eps_(eps) {
device_ = device;
// Initialize parameter using macro
INFINICORE_NN_PARAMETER_INIT(weight, ({normalized_shape}, DataType::F32, device));
// Initialize weight to ones (standard practice for RMSNorm)
auto ones_tensor = Tensor::ones({normalized_shape}, DataType::F32, device);
weight_->copy_from(ones_tensor);
spdlog::debug("Created RMSNorm module: normalized_shape={}, eps={}",
normalized_shape, eps);
}
Tensor RMSNorm::forward(const Tensor &x) const {
// Validate input shape - last dimension should match normalized_shape
auto input_shape = x->shape();
if (input_shape.empty() || input_shape.back() != normalized_shape_) {
throw std::invalid_argument(
"Input last dimension " + std::to_string(input_shape.back()) + " doesn't match normalized_shape " + std::to_string(normalized_shape_));
}
// Delegate to InfiniCore op (backed by InfiniRT/InfiniOP)
// y = RMSNorm(x, weight, eps)
return op::rms_norm(x, weight_, static_cast<float>(eps_));
}
std::string RMSNorm::extra_repr() const {
return "RMSNorm(normalized_shape=" + std::to_string(normalized_shape_) + ", eps=" + std::to_string(eps_) + ")";
}
} // namespace infinicore::nn
......@@ -86,6 +86,7 @@ target("infinicore-test")
add_files(os.projectdir().."/src/infinicore/context/*/*.cc")
add_files(os.projectdir().."/src/infinicore/tensor/*.cc")
add_files(os.projectdir().."/src/infinicore/ops/*/*.cc")
add_files(os.projectdir().."/src/infinicore/nn/*.cc")
add_files(os.projectdir().."/src/infinicore-test/*.cc")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment