Issue/506 实现TensorMetaData析构方法

e6dae2e3 · Ceng · GitHub · df8d4408 · e6dae2e3 · e6dae2e3
Unverified Commit e6dae2e3 authored Oct 16, 2025 by Ceng Committed by GitHub Oct 16, 2025
12 changed files
--- a/include/infinicore/tensor.hpp
+++ b/include/infinicore/tensor.hpp
@@ -27,6 +27,7 @@ struct TensorMetaData {
    infiniopTensorDescriptor_t desc;
    TensorMetaData(const Shape &shape, const Strides &strides, const DataType &dtype);
+    ~TensorMetaData();
 };
 struct TensorData {

--- a/src/infinicore-test/README.md
+++ b/src/infinicore-test/README.md
+# InfiniCore Memory Management Test Suite
+This test suite provides comprehensive testing for the InfiniCore memory management system, focusing on the critical issues identified in the memory management architecture analysis.
+## Overview
+The test suite includes six main test categories:
+1. **Basic Memory Tests** - Basic allocation, deallocation, and memory operations
+2. **Concurrency Tests** - Thread safety and concurrent access testing
+3. **Exception Safety Tests** - Exception handling and safety testing
+4. **Memory Leak Tests** - Memory leak detection and prevention
+5. **Performance Tests** - Performance benchmarks and optimization validation
+6. **Stress Tests** - High-load stress testing and edge cases
+## Building
+### Using XMake (if integrated with main build)
+```bash
+# From InfiniCore root directory
+xmake build infinicore-test
+```
+## Running Tests
+### Run All Tests
+```bash
+./infinicore-test
+```
+### Run Specific Test Categories
+```bash
+# Basic memory tests
+./infinicore-test --test basic
+# Concurrency tests
+./infinicore-test --test concurrency
+# Exception safety tests
+./infinicore-test --test exception
+# Memory leak tests
+./infinicore-test --test leak
+# Performance tests
+./infinicore-test --test performance
+# Stress tests
+./infinicore-test --test stress
+```
+### Run with Specific Device
+```bash
+# Run on CPU
+./infinicore-test --cpu
+# Run on NVIDIA GPU
+./infinicore-test --nvidia
+# Run on other devices
+./infinicore-test --cambricon
+./infinicore-test --ascend
+./infinicore-test --metax
+./infinicore-test --moore
+./infinicore-test --iluvatar
+./infinicore-test --kunlun
+./infinicore-test --hygon
+```
+### Customize Test Parameters
+```bash
+# Run with custom thread count
+./infinicore-test --threads 8
+# Run with custom iteration count
+./infinicore-test --iterations 5000
+# Combine options
+./infinicore-test --nvidia --test concurrency --threads 16 --iterations 2000
+```
+## Test Categories
+### 1. Basic Memory Tests
+Tests fundamental memory operations:
+- Memory allocation and deallocation
+- Memory size and device properties
+- Memory read/write operations
+- Pinned memory allocation
+- Memory data integrity
+### 2. Concurrency Tests
+Tests thread safety and concurrent access:
+- **Concurrent Allocations**: Multiple threads allocating memory simultaneously
+- **Concurrent Device Switching**: Multiple threads switching device contexts
+- **Memory Allocation Race**: Race condition testing for memory operations
+### 3. Exception Safety Tests
+Tests exception handling and safety:
+- **Allocation Failure**: Tests behavior when allocation fails
+- **Deallocation Exception**: Tests exception safety during deallocation
+- **Context Switch Exception**: Tests exception handling during device switching
+### 4. Memory Leak Tests
+Tests memory leak detection and prevention:
+- **Basic Leak Detection**: Basic memory leak detection
+- **Cross-Device Leak Detection**: Memory leaks in cross-device scenarios
+- **Exception Leak Detection**: Memory leaks during exception handling
+### 5. Performance Tests
+Tests performance and benchmarks:
+- **Allocation Performance**: Memory allocation speed benchmarks
+- **Concurrent Performance**: Performance under concurrent load
+- **Memory Copy Performance**: Memory copy bandwidth tests
+### 6. Stress Tests
+Tests high-load scenarios and edge cases:
+- **High Frequency Allocations**: Rapid allocation/deallocation cycles
+- **Large Memory Allocations**: Large memory block allocation
+- **Cross-Device Stress**: Stress testing across multiple devices
+## Expected Results
+### Critical Issues to Watch For
+The tests are designed to detect the critical issues identified in the memory management analysis:
+1. **Thread Safety Violations**
+   - Race conditions in concurrent allocations
+   - Inconsistent device context switching
+   - Global state corruption
+2. **Memory Leaks**
+   - Unfreed memory after deallocation
+   - Cross-device memory not properly cleaned up
+   - Exception-related memory leaks
+3. **Exception Safety Issues**
+   - Exceptions during allocation causing resource leaks
+   - Exceptions in destructors causing `std::terminate`
+   - Incomplete cleanup on exceptions
+4. **Performance Issues**
+   - Slow allocation/deallocation performance
+   - Poor concurrent performance
+   - Inefficient memory copy operations
+### Performance Thresholds
+The tests include performance thresholds:
+- **Allocation Performance**: < 100μs per allocation
+- **Concurrent Performance**: < 200μs per allocation under load
+- **Memory Bandwidth**: > 100 MB/s for memory copies
+## Test Output
+### Successful Test Run
+```
+==============================================
+InfiniCore Memory Management Test Suite
+==============================================
+Device: 0
+Threads: 4
+Iterations: 1000
+==============================================
+[SUITE] Running: BasicMemoryTest
+[TEST] Starting: BasicMemoryTest
+[TEST] PASSED: BasicMemoryTest (Duration: 1234μs)
+[SUITE] Running: ConcurrencyTest
+[TEST] Starting: ConcurrencyTest
+[TEST] PASSED: ConcurrencyTest (Duration: 5678μs)
+...
+==============================================
+Test Summary
+==============================================
+Total Tests: 6
+Passed: 6
+Failed: 0
+Total Time: 12345μs
+==============================================
+✅ All tests passed!
+```
+### Failed Test Run
+```
+[TEST] FAILED: ConcurrencyTest - Concurrent allocation test failed: expected 8000 successes, got 7995 successes and 5 failures
+==============================================
+Final Results
+==============================================
+Total Tests: 6
+Passed: 5
+Failed: 1
+==============================================
+❌ Some tests failed. Please review the output above.
+```
+## Debugging Failed Tests
+### Common Issues and Solutions
+1. **Thread Safety Failures**
+   - Check for race conditions in global state access
+   - Verify proper synchronization in allocators
+   - Review device context switching logic
+2. **Memory Leak Failures**
+   - Check deallocation logic in allocators
+   - Verify cross-device cleanup mechanisms
+   - Review exception safety in destructors
+3. **Performance Failures**
+   - Profile allocation/deallocation paths
+   - Check for unnecessary context switching
+   - Review memory copy implementations
+4. **Exception Safety Failures**
+   - Verify no-throw guarantees in destructors
+   - Check exception handling in allocation paths
+   - Review resource cleanup on exceptions
+## Integration with CI/CD
+### GitHub Actions Example
+```yaml
+- name: Run Memory Tests
+  run: |
+    cd src/infinicore-test
+    mkdir build && cd build
+    cmake ..
+    make
+    ./infinicore-test --test all
+```
+### Custom Test Targets
+```bash
+# Run specific test categories
+make test-memory-basic
+make test-memory-concurrency
+make test-memory-exception
+make test-memory-leak
+make test-memory-performance
+make test-memory-stress
+make test-memory-all
+```
+## Contributing
+When adding new tests:
+1. Follow the existing test framework pattern
+2. Add appropriate error messages and logging
+3. Include performance thresholds where applicable
+4. Test both success and failure scenarios
+5. Update this README with new test descriptions
+## Dependencies
+- InfiniCore library (infinicore, infiniop, infinirt, infiniccl)
+- C++17 compatible compiler
+- Threading library (pthread on Linux)
+- CMake 3.16+ (for CMake build)
+## Notes
+- Tests are designed to be deterministic where possible
+- Some tests may have timing dependencies
+- Performance tests may vary based on system load
+- Memory leak detection is basic and may not catch all leaks
+- Tests assume proper InfiniCore initialization
--- a/src/infinicore-test/main.cc
+++ b/src/infinicore-test/main.cc
+#include "memory_test.h"
+#include "test_tensor_destructor.h"
+#include <iostream>
+#include <memory>
+#include <spdlog/spdlog.h>
+#include <vector>
+struct ParsedArgs {
+    infiniDevice_t device_type = INFINI_DEVICE_CPU;
+    bool run_basic = true;
+    bool run_concurrency = true;
+    bool run_exception_safety = true;
+    bool run_memory_leak = true;
+    bool run_performance = true;
+    bool run_stress = true;
+    int num_threads = 4;
+    int iterations = 1000;
+};
+void printUsage() {
+    std::cout << "Usage:" << std::endl
+              << "  infinicore-test [--<device>] [--test <test_name>] [--threads <num>] [--iterations <num>]" << std::endl
+              << std::endl
+              << "Options:" << std::endl
+              << "  --<device>        Specify the device type (default: cpu)" << std::endl
+              << "  --test <name>     Run specific test (basic|concurrency|exception|leak|performance|stress|all)" << std::endl
+              << "  --threads <num>   Number of threads for concurrency tests (default: 4)" << std::endl
+              << "  --iterations <num> Number of iterations for stress tests (default: 1000)" << std::endl
+              << "  --help            Show this help message" << std::endl
+              << std::endl
+              << "Available devices:" << std::endl
+              << "  cpu         - Default" << std::endl
+              << "  nvidia" << std::endl
+              << "  cambricon" << std::endl
+              << "  ascend" << std::endl
+              << "  metax" << std::endl
+              << "  moore" << std::endl
+              << "  iluvatar" << std::endl
+              << "  kunlun" << std::endl
+              << "  hygon" << std::endl
+              << std::endl
+              << "Available tests:" << std::endl
+              << "  basic       - Basic memory allocation and deallocation tests" << std::endl
+              << "  concurrency - Thread safety and concurrent access tests" << std::endl
+              << "  exception   - Exception safety tests" << std::endl
+              << "  leak        - Memory leak detection tests" << std::endl
+              << "  performance - Performance and benchmark tests" << std::endl
+              << "  stress      - Stress tests with high load" << std::endl
+              << "  all         - Run all tests (default)" << std::endl
+              << std::endl;
+    exit(EXIT_SUCCESS);
+}
+ParsedArgs parseArgs(int argc, char *argv[]) {
+    ParsedArgs args;
+    for (int i = 1; i < argc; ++i) {
+        std::string arg = argv[i];
+        if (arg == "--help" || arg == "-h") {
+            printUsage();
+        } else if (arg == "--cpu") {
+            args.device_type = INFINI_DEVICE_CPU;
+        } else if (arg == "--nvidia") {
+            args.device_type = INFINI_DEVICE_NVIDIA;
+        } else if (arg == "--cambricon") {
+            args.device_type = INFINI_DEVICE_CAMBRICON;
+        } else if (arg == "--ascend") {
+            args.device_type = INFINI_DEVICE_ASCEND;
+        } else if (arg == "--metax") {
+            args.device_type = INFINI_DEVICE_METAX;
+        } else if (arg == "--moore") {
+            args.device_type = INFINI_DEVICE_MOORE;
+        } else if (arg == "--iluvatar") {
+            args.device_type = INFINI_DEVICE_ILUVATAR;
+        } else if (arg == "--kunlun") {
+            args.device_type = INFINI_DEVICE_KUNLUN;
+        } else if (arg == "--hygon") {
+            args.device_type = INFINI_DEVICE_HYGON;
+        } else if (arg == "--test") {
+            if (i + 1 >= argc) {
+                std::cerr << "Error: --test requires a test name" << std::endl;
+                exit(EXIT_FAILURE);
+            }
+            std::string test_name = argv[++i];
+            args.run_basic = args.run_concurrency = args.run_exception_safety = args.run_memory_leak = args.run_performance = args.run_stress = false;
+            if (test_name == "basic") {
+                args.run_basic = true;
+            } else if (test_name == "concurrency") {
+                args.run_concurrency = true;
+            } else if (test_name == "exception") {
+                args.run_exception_safety = true;
+            } else if (test_name == "leak") {
+                args.run_memory_leak = true;
+            } else if (test_name == "performance") {
+                args.run_performance = true;
+            } else if (test_name == "stress") {
+                args.run_stress = true;
+            } else if (test_name == "all") {
+                args.run_basic = args.run_concurrency = args.run_exception_safety = args.run_memory_leak = args.run_performance = args.run_stress = true;
+            } else {
+                std::cerr << "Error: Unknown test name: " << test_name << std::endl;
+                exit(EXIT_FAILURE);
+            }
+        } else if (arg == "--threads") {
+            if (i + 1 >= argc) {
+                std::cerr << "Error: --threads requires a number" << std::endl;
+                exit(EXIT_FAILURE);
+            }
+            args.num_threads = std::stoi(argv[++i]);
+            if (args.num_threads <= 0) {
+                std::cerr << "Error: Number of threads must be positive" << std::endl;
+                exit(EXIT_FAILURE);
+            }
+        } else if (arg == "--iterations") {
+            if (i + 1 >= argc) {
+                std::cerr << "Error: --iterations requires a number" << std::endl;
+                exit(EXIT_FAILURE);
+            }
+            args.iterations = std::stoi(argv[++i]);
+            if (args.iterations <= 0) {
+                std::cerr << "Error: Number of iterations must be positive" << std::endl;
+                exit(EXIT_FAILURE);
+            }
+        } else {
+            std::cerr << "Error: Unknown argument: " << arg << std::endl;
+            exit(EXIT_FAILURE);
+        }
+    }
+    return args;
+}
+int main(int argc, char *argv[]) {
+    try {
+        // Initialize spdlog for debugging
+        spdlog::set_level(spdlog::level::debug);
+        spdlog::info("Starting InfiniCore Memory Management Test Suite");
+        ParsedArgs args = parseArgs(argc, argv);
+        spdlog::debug("Arguments parsed successfully");
+        std::cout << "==============================================\n"
+                  << "InfiniCore Memory Management Test Suite\n"
+                  << "==============================================\n"
+                  << "Device: " << static_cast<int>(args.device_type) << "\n"
+                  << "Threads: " << args.num_threads << "\n"
+                  << "Iterations: " << args.iterations << "\n"
+                  << "==============================================" << std::endl;
+        spdlog::debug("About to initialize InfiniCore context");
+        // Initialize InfiniCore context
+        infinicore::context::setDevice(infinicore::Device(static_cast<infinicore::Device::Type>(args.device_type), 0));
+        spdlog::debug("InfiniCore context initialized successfully");
+        spdlog::debug("Creating test runner");
+        // Create test runner
+        infinicore::test::MemoryTestRunner runner;
+        spdlog::debug("Test runner created successfully");
+        // Add tests based on arguments
+        if (args.run_basic) {
+            spdlog::debug("Adding BasicMemoryTest");
+            runner.addTest(std::make_unique<infinicore::test::BasicMemoryTest>());
+            spdlog::debug("BasicMemoryTest added successfully");
+            spdlog::debug("Adding TensorDestructorTest");
+            runner.addTest(std::make_unique<infinicore::test::TensorDestructorTest>());
+            spdlog::debug("TensorDestructorTest added successfully");
+        }
+        if (args.run_concurrency) {
+            runner.addTest(std::make_unique<infinicore::test::ConcurrencyTest>());
+        }
+        if (args.run_exception_safety) {
+            // runner.addTest(std::make_unique<infinicore::test::ExceptionSafetyTest>());
+        }
+        if (args.run_memory_leak) {
+            runner.addTest(std::make_unique<infinicore::test::MemoryLeakTest>());
+        }
+        if (args.run_performance) {
+            runner.addTest(std::make_unique<infinicore::test::PerformanceTest>());
+        }
+        if (args.run_stress) {
+            runner.addTest(std::make_unique<infinicore::test::StressTest>());
+        }
+        spdlog::debug("About to run all tests");
+        // Run all tests
+        auto results = runner.runAllTests();
+        spdlog::debug("All tests completed");
+        // Count results
+        size_t passed = 0, failed = 0;
+        for (const auto &result : results) {
+            if (result.passed) {
+                passed++;
+            } else {
+                failed++;
+            }
+        }
+        // Print final summary
+        std::cout << "\n==============================================\n"
+                  << "Final Results\n"
+                  << "==============================================\n"
+                  << "Total Tests: " << results.size() << "\n"
+                  << "Passed: " << passed << "\n"
+                  << "Failed: " << failed << "\n"
+                  << "==============================================" << std::endl;
+        // Exit with appropriate code
+        if (failed > 0) {
+            std::cout << "\n❌ Some tests failed. Please review the output above." << std::endl;
+            return EXIT_FAILURE;
+        } else {
+            std::cout << "\n✅ All tests passed!" << std::endl;
+            return EXIT_SUCCESS;
+        }
+    } catch (const std::exception &e) {
+        std::cerr << "Fatal error: " << e.what() << std::endl;
+        return EXIT_FAILURE;
+    } catch (...) {
+        std::cerr << "Fatal error: Unknown exception" << std::endl;
+        return EXIT_FAILURE;
+    }
+}
--- a/src/infinicore-test/memory_test.cc
+++ b/src/infinicore-test/memory_test.cc
+#include "memory_test.h"
+#include <algorithm>
+#include <cstring>
+#include <random>
+namespace infinicore::test {
+// Basic Memory Test Implementation
+TestResult BasicMemoryTest::run() {
+    return measureTime("BasicMemoryTest", [this]() -> bool {
+        try {
+            spdlog::debug("BasicMemoryTest: Starting test");
+            // Test basic memory allocation
+            spdlog::debug("BasicMemoryTest: About to allocate memory");
+            auto memory = context::allocateMemory(1024);
+            spdlog::debug("BasicMemoryTest: Memory allocated successfully");
+            if (!memory) {
+                std::cerr << "Failed to allocate memory" << std::endl;
+                return false;
+            }
+            spdlog::debug("BasicMemoryTest: Testing memory properties");
+            // Test memory properties
+            if (memory->size() != 1024) {
+                std::cerr << "Memory size mismatch: expected 1024, got " << memory->size() << std::endl;
+                return false;
+            }
+            spdlog::debug("BasicMemoryTest: Memory size check passed");
+            spdlog::debug("BasicMemoryTest: Testing memory access");
+            // Test memory access
+            std::byte *data = memory->data();
+            spdlog::debug("BasicMemoryTest: Got memory data pointer: {}", static_cast<void *>(data));
+            if (!data) {
+                std::cerr << "Memory data pointer is null" << std::endl;
+                return false;
+            }
+            spdlog::debug("BasicMemoryTest: Memory data pointer is valid");
+            // Check if this is GPU memory that can't be accessed directly
+            Device current_device = context::getDevice();
+            spdlog::debug("BasicMemoryTest: Current device type: {}", static_cast<int>(current_device.getType()));
+            spdlog::debug("BasicMemoryTest: Memory is pinned: {}", memory->is_pinned());
+            // For GPU memory, we shouldn't try to access it directly with memset
+            if (current_device.getType() != Device::Type::CPU) {
+                spdlog::debug("BasicMemoryTest: Skipping direct memory access for GPU device");
+                spdlog::debug("BasicMemoryTest: GPU memory access test completed (skipped)");
+            } else {
+                spdlog::debug("BasicMemoryTest: Testing memory write/read");
+                // Test memory write/read
+                std::memset(data, 0xAB, 1024);
+                spdlog::debug("BasicMemoryTest: Memory memset completed");
+                for (size_t i = 0; i < 1024; ++i) {
+                    if (data[i] != static_cast<std::byte>(0xAB)) {
+                        std::cerr << "Memory write/read test failed at index " << i << std::endl;
+                        return false;
+                    }
+                }
+                spdlog::debug("BasicMemoryTest: Memory write/read test completed");
+            }
+            spdlog::debug("BasicMemoryTest: Testing pinned memory allocation");
+            // Test pinned memory allocation
+            auto pinned_memory = context::allocatePinnedHostMemory(512);
+            spdlog::debug("BasicMemoryTest: Pinned memory allocated");
+            if (!pinned_memory) {
+                std::cerr << "Failed to allocate pinned memory" << std::endl;
+                return false;
+            }
+            spdlog::debug("BasicMemoryTest: Checking pinned memory properties");
+            // For CPU devices, pinned memory falls back to regular memory, so it may not be marked as pinned
+            Device pinned_device = context::getDevice();
+            if (pinned_device.getType() != Device::Type::CPU && !pinned_memory->is_pinned()) {
+                std::cerr << "Pinned memory not marked as pinned" << std::endl;
+                return false;
+            }
+            spdlog::debug("BasicMemoryTest: Pinned memory test completed");
+            return true;
+        } catch (const std::exception &e) {
+            std::cerr << "BasicMemoryTest failed with exception: " << e.what() << std::endl;
+            return false;
+        }
+    });
+}
+// Concurrency Test Implementation
+TestResult ConcurrencyTest::run() {
+    return measureTime("ConcurrencyTest", [this]() -> bool {
+        try {
+            // Run all concurrency subtests
+            auto result1 = testConcurrentAllocations();
+            if (!result1.passed) {
+                std::cerr << "Concurrent allocations test failed: " << result1.error_message << std::endl;
+                return false;
+            }
+            auto result2 = testConcurrentDeviceSwitching();
+            if (!result2.passed) {
+                std::cerr << "Concurrent device switching test failed: " << result2.error_message << std::endl;
+                return false;
+            }
+            auto result3 = testMemoryAllocationRace();
+            if (!result3.passed) {
+                std::cerr << "Memory allocation race test failed: " << result3.error_message << std::endl;
+                return false;
+            }
+            return true;
+        } catch (const std::exception &e) {
+            std::cerr << "ConcurrencyTest failed with exception: " << e.what() << std::endl;
+            return false;
+        }
+    });
+}
+TestResult ConcurrencyTest::testConcurrentAllocations() {
+    return measureTime("ConcurrentAllocations", [this]() -> bool {
+        const int num_threads = 8;
+        const int allocations_per_thread = 100;
+        std::vector<std::thread> threads;
+        std::atomic<int> success_count{0};
+        std::atomic<int> failure_count{0};
+        for (int i = 0; i < num_threads; ++i) {
+            threads.emplace_back([&, i]() {
+                try {
+                    for (int j = 0; j < allocations_per_thread; ++j) {
+                        // Allocate memory of random size
+                        size_t size = 64 + (j % 1024);
+                        auto memory = context::allocateMemory(size);
+                        if (memory && memory->size() == size) {
+                            success_count++;
+                        } else {
+                            failure_count++;
+                        }
+                        // Small delay to increase chance of race conditions
+                        std::this_thread::sleep_for(std::chrono::microseconds(1));
+                    }
+                } catch (const std::exception &e) {
+                    failure_count++;
+                    std::cerr << "Thread " << i << " failed: " << e.what() << std::endl;
+                }
+            });
+        }
+        for (auto &thread : threads) {
+            thread.join();
+        }
+        int total_expected = num_threads * allocations_per_thread;
+        if (success_count.load() != total_expected) {
+            std::cerr << "Concurrent allocation test failed: expected " << total_expected
+                      << " successes, got " << success_count.load()
+                      << " successes and " << failure_count.load() << " failures" << std::endl;
+            return false;
+        }
+        return true;
+    });
+}
+TestResult ConcurrencyTest::testConcurrentDeviceSwitching() {
+    return measureTime("ConcurrentDeviceSwitching", [this]() -> bool {
+        const int num_threads = 4;
+        std::vector<std::thread> threads;
+        std::atomic<int> success_count{0};
+        std::atomic<int> failure_count{0};
+        // Get available devices
+        std::vector<Device> devices;
+        for (int type = 0; type < static_cast<int>(Device::Type::COUNT); ++type) {
+            size_t count = context::getDeviceCount(static_cast<Device::Type>(type));
+            for (size_t i = 0; i < count; ++i) {
+                devices.emplace_back(static_cast<Device::Type>(type), i);
+            }
+        }
+        if (devices.size() < 2) {
+            std::cout << "Skipping device switching test - need at least 2 devices" << std::endl;
+            return true;
+        }
+        for (int i = 0; i < num_threads; ++i) {
+            threads.emplace_back([&, i, devices]() {
+                try {
+                    for (int j = 0; j < 50; ++j) {
+                        // Switch to random device
+                        Device target_device = devices[j % devices.size()];
+                        context::setDevice(target_device);
+                        // Verify device was set correctly
+                        Device current_device = context::getDevice();
+                        if (current_device == target_device) {
+                            success_count++;
+                        } else {
+                            failure_count++;
+                            std::cerr << "Device switching failed: expected "
+                                      << static_cast<int>(target_device.getType())
+                                      << ", got " << static_cast<int>(current_device.getType()) << std::endl;
+                        }
+                        // Allocate memory to test device context
+                        auto memory = context::allocateMemory(256);
+                        if (memory && memory->device() == target_device) {
+                            success_count++;
+                        } else {
+                            failure_count++;
+                        }
+                        std::this_thread::sleep_for(std::chrono::microseconds(10));
+                    }
+                } catch (const std::exception &e) {
+                    failure_count++;
+                    std::cerr << "Thread " << i << " failed: " << e.what() << std::endl;
+                }
+            });
+        }
+        for (auto &thread : threads) {
+            thread.join();
+        }
+        if (failure_count.load() > 0) {
+            std::cerr << "Concurrent device switching test failed: "
+                      << failure_count.load() << " failures out of "
+                      << (success_count.load() + failure_count.load()) << " operations" << std::endl;
+            return false;
+        }
+        return true;
+    });
+}
+TestResult ConcurrencyTest::testMemoryAllocationRace() {
+    return measureTime("MemoryAllocationRace", [this]() -> bool {
+        const int num_threads = 16;
+        const int allocations_per_thread = 1000;
+        std::vector<std::thread> threads;
+        std::atomic<int> success_count{0};
+        std::atomic<int> failure_count{0};
+        std::vector<std::shared_ptr<Memory>> all_allocations;
+        std::mutex allocations_mutex;
+        for (int i = 0; i < num_threads; ++i) {
+            threads.emplace_back([&, i]() {
+                std::vector<std::shared_ptr<Memory>> thread_allocations;
+                try {
+                    for (int j = 0; j < allocations_per_thread; ++j) {
+                        size_t size = 64 + (j % 1024);
+                        auto memory = context::allocateMemory(size);
+                        if (memory) {
+                            thread_allocations.push_back(memory);
+                            success_count++;
+                        } else {
+                            failure_count++;
+                        }
+                        // Occasionally deallocate some memory to test concurrent alloc/dealloc
+                        if (j % 10 == 0 && !thread_allocations.empty()) {
+                            thread_allocations.pop_back();
+                        }
+                    }
+                    // Store remaining allocations
+                    std::lock_guard<std::mutex> lock(allocations_mutex);
+                    all_allocations.insert(all_allocations.end(),
+                                           thread_allocations.begin(),
+                                           thread_allocations.end());
+                } catch (const std::exception &e) {
+                    failure_count++;
+                    std::cerr << "Thread " << i << " failed: " << e.what() << std::endl;
+                }
+            });
+        }
+        for (auto &thread : threads) {
+            thread.join();
+        }
+        // Verify all allocations are valid
+        for (const auto &memory : all_allocations) {
+            if (!memory || !memory->data()) {
+                std::cerr << "Invalid memory allocation found" << std::endl;
+                return false;
+            }
+        }
+        int total_expected = num_threads * allocations_per_thread;
+        if (success_count.load() < total_expected * 0.9) { // Allow 10% failure rate
+            std::cerr << "Memory allocation race test failed: expected at least "
+                      << total_expected * 0.9 << " successes, got " << success_count.load() << std::endl;
+            return false;
+        }
+        return true;
+    });
+}
+// Exception Safety Test Implementation
+TestResult ExceptionSafetyTest::run() {
+    return measureTime("ExceptionSafetyTest", [this]() -> bool {
+        try {
+            auto result1 = testAllocationFailure();
+            if (!result1.passed) {
+                std::cerr << "Allocation failure test failed: " << result1.error_message << std::endl;
+                return false;
+            }
+            auto result2 = testDeallocationException();
+            if (!result2.passed) {
+                std::cerr << "Deallocation exception test failed: " << result2.error_message << std::endl;
+                return false;
+            }
+            auto result3 = testContextSwitchException();
+            if (!result3.passed) {
+                std::cerr << "Context switch exception test failed: " << result3.error_message << std::endl;
+                return false;
+            }
+            return true;
+        } catch (const std::exception &e) {
+            std::cerr << "ExceptionSafetyTest failed with exception: " << e.what() << std::endl;
+            return false;
+        }
+    });
+}
+TestResult ExceptionSafetyTest::testAllocationFailure() {
+    return measureTime("AllocationFailure", [this]() -> bool {
+        try {
+            // Test allocation with extremely large size (should fail)
+            try {
+                auto memory = context::allocateMemory(SIZE_MAX);
+                std::cerr << "Expected allocation to fail with SIZE_MAX" << std::endl;
+                return false;
+            } catch (const std::exception &e) {
+                // Expected to fail
+                std::cout << "Allocation correctly failed with SIZE_MAX: " << e.what() << std::endl;
+            }
+            // Test allocation with zero size
+            try {
+                auto memory = context::allocateMemory(0);
+                if (memory) {
+                    std::cerr << "Zero-size allocation should return null or throw" << std::endl;
+                    return false;
+                }
+            } catch (const std::exception &e) {
+                // Also acceptable
+                std::cout << "Zero-size allocation correctly failed: " << e.what() << std::endl;
+            }
+            return true;
+        } catch (const std::exception &e) {
+            std::cerr << "Allocation failure test failed with unexpected exception: " << e.what() << std::endl;
+            return false;
+        }
+    });
+}
+TestResult ExceptionSafetyTest::testDeallocationException() {
+    return measureTime("DeallocationException", [this]() -> bool {
+        try {
+            // Test that deallocation doesn't throw exceptions
+            std::vector<std::shared_ptr<Memory>> memories;
+            // Allocate some memory
+            for (int i = 0; i < 10; ++i) {
+                auto memory = context::allocateMemory(1024);
+                if (memory) {
+                    memories.push_back(memory);
+                }
+            }
+            // Test that destruction doesn't throw
+            try {
+                memories.clear(); // This should trigger deallocation
+            } catch (const std::exception &e) {
+                std::cerr << "Memory deallocation threw exception: " << e.what() << std::endl;
+                return false;
+            }
+            return true;
+        } catch (const std::exception &e) {
+            std::cerr << "Deallocation exception test failed: " << e.what() << std::endl;
+            return false;
+        }
+    });
+}
+TestResult ExceptionSafetyTest::testContextSwitchException() {
+    return measureTime("ContextSwitchException", [this]() -> bool {
+        try {
+            // Test context switching with invalid device
+            Device original_device = context::getDevice();
+            try {
+                // Try to switch to a device that might not exist
+                Device invalid_device(Device::Type::COUNT, 999);
+                context::setDevice(invalid_device);
+                std::cerr << "Expected device switching to fail with invalid device" << std::endl;
+                return false;
+            } catch (const std::exception &e) {
+                // Expected to fail
+                std::cout << "Device switching correctly failed with invalid device: " << e.what() << std::endl;
+            }
+            // Verify original device is still set
+            Device current_device = context::getDevice();
+            if (current_device != original_device) {
+                std::cerr << "Device context not restored after exception" << std::endl;
+                return false;
+            }
+            return true;
+        } catch (const std::exception &e) {
+            std::cerr << "Context switch exception test failed: " << e.what() << std::endl;
+            return false;
+        }
+    });
+}
+// Memory Leak Test Implementation
+TestResult MemoryLeakTest::run() {
+    return measureTime("MemoryLeakTest", [this]() -> bool {
+        try {
+            auto result1 = testBasicLeakDetection();
+            if (!result1.passed) {
+                std::cerr << "Basic leak detection test failed: " << result1.error_message << std::endl;
+                return false;
+            }
+            auto result2 = testCrossDeviceLeakDetection();
+            if (!result2.passed) {
+                std::cerr << "Cross-device leak detection test failed: " << result2.error_message << std::endl;
+                return false;
+            }
+            auto result3 = testExceptionLeakDetection();
+            if (!result3.passed) {
+                std::cerr << "Exception leak detection test failed: " << result3.error_message << std::endl;
+                return false;
+            }
+            return true;
+        } catch (const std::exception &e) {
+            std::cerr << "MemoryLeakTest failed with exception: " << e.what() << std::endl;
+            return false;
+        }
+    });
+}
+TestResult MemoryLeakTest::testBasicLeakDetection() {
+    return measureTime("BasicLeakDetection", [this]() -> bool {
+        try {
+            // Reset leak detector
+            MemoryLeakDetector::instance().reset();
+            // Allocate and deallocate memory
+            std::vector<std::shared_ptr<Memory>> memories;
+            for (int i = 0; i < 100; ++i) {
+                auto memory = context::allocateMemory(1024);
+                if (memory) {
+                    memories.push_back(memory);
+                }
+            }
+            // Clear memories to trigger deallocation
+            memories.clear();
+            // Force garbage collection if available
+            std::this_thread::sleep_for(std::chrono::milliseconds(100));
+            // Check for leaks (this is a basic test - real leak detection would need more sophisticated tools)
+            size_t leaked_memory = MemoryLeakDetector::instance().getLeakedMemory();
+            if (leaked_memory > 0) {
+                std::cerr << "Potential memory leak detected: " << leaked_memory << " bytes" << std::endl;
+                return false;
+            }
+            return true;
+        } catch (const std::exception &e) {
+            std::cerr << "Basic leak detection test failed: " << e.what() << std::endl;
+            return false;
+        }
+    });
+}
+TestResult MemoryLeakTest::testCrossDeviceLeakDetection() {
+    return measureTime("CrossDeviceLeakDetection", [this]() -> bool {
+        try {
+            // Get available devices
+            std::vector<Device> devices;
+            for (int type = 0; type < static_cast<int>(Device::Type::COUNT); ++type) {
+                size_t count = context::getDeviceCount(static_cast<Device::Type>(type));
+                for (size_t i = 0; i < count; ++i) {
+                    devices.emplace_back(static_cast<Device::Type>(type), i);
+                }
+            }
+            if (devices.size() < 2) {
+                std::cout << "Skipping cross-device leak test - need at least 2 devices" << std::endl;
+                return true;
+            }
+            // Allocate pinned memory on one device
+            context::setDevice(devices[0]);
+            auto pinned_memory = context::allocatePinnedHostMemory(1024);
+            if (!pinned_memory) {
+                std::cerr << "Failed to allocate pinned memory" << std::endl;
+                return false;
+            }
+            // Switch to another device and deallocate
+            context::setDevice(devices[1]);
+            pinned_memory.reset(); // This should trigger cross-device deallocation
+            // Force garbage collection
+            std::this_thread::sleep_for(std::chrono::milliseconds(100));
+            // Check for leaks
+            size_t leaked_memory = MemoryLeakDetector::instance().getLeakedMemory();
+            if (leaked_memory > 0) {
+                std::cerr << "Potential cross-device memory leak detected: " << leaked_memory << " bytes" << std::endl;
+                return false;
+            }
+            return true;
+        } catch (const std::exception &e) {
+            std::cerr << "Cross-device leak detection test failed: " << e.what() << std::endl;
+            return false;
+        }
+    });
+}
+TestResult MemoryLeakTest::testExceptionLeakDetection() {
+    return measureTime("ExceptionLeakDetection", [this]() -> bool {
+        try {
+            // Test that exceptions don't cause memory leaks
+            std::vector<std::shared_ptr<Memory>> memories;
+            try {
+                // Allocate some memory
+                for (int i = 0; i < 10; ++i) {
+                    auto memory = context::allocateMemory(1024);
+                    if (memory) {
+                        memories.push_back(memory);
+                    }
+                }
+                // Simulate an exception
+                throw std::runtime_error("Simulated exception");
+            } catch (const std::exception &e) {
+                // Memory should still be properly cleaned up
+                memories.clear();
+            }
+            // Force garbage collection
+            std::this_thread::sleep_for(std::chrono::milliseconds(100));
+            // Check for leaks
+            size_t leaked_memory = MemoryLeakDetector::instance().getLeakedMemory();
+            if (leaked_memory > 0) {
+                std::cerr << "Potential exception-related memory leak detected: " << leaked_memory << " bytes" << std::endl;
+                return false;
+            }
+            return true;
+        } catch (const std::exception &e) {
+            std::cerr << "Exception leak detection test failed: " << e.what() << std::endl;
+            return false;
+        }
+    });
+}
+// Performance Test Implementation
+TestResult PerformanceTest::run() {
+    return measureTime("PerformanceTest", [this]() -> bool {
+        try {
+            auto result1 = testAllocationPerformance();
+            if (!result1.passed) {
+                std::cerr << "Allocation performance test failed: " << result1.error_message << std::endl;
+                return false;
+            }
+            auto result2 = testConcurrentPerformance();
+            if (!result2.passed) {
+                std::cerr << "Concurrent performance test failed: " << result2.error_message << std::endl;
+                return false;
+            }
+            auto result3 = testMemoryCopyPerformance();
+            if (!result3.passed) {
+                std::cerr << "Memory copy performance test failed: " << result3.error_message << std::endl;
+                return false;
+            }
+            return true;
+        } catch (const std::exception &e) {
+            std::cerr << "PerformanceTest failed with exception: " << e.what() << std::endl;
+            return false;
+        }
+    });
+}
+TestResult PerformanceTest::testAllocationPerformance() {
+    return measureTime("AllocationPerformance", [this]() -> bool {
+        try {
+            const int num_allocations = 10000;
+            const size_t allocation_size = 1024;
+            auto start = std::chrono::high_resolution_clock::now();
+            std::vector<std::shared_ptr<Memory>> memories;
+            for (int i = 0; i < num_allocations; ++i) {
+                auto memory = context::allocateMemory(allocation_size);
+                if (memory) {
+                    memories.push_back(memory);
+                }
+            }
+            auto end = std::chrono::high_resolution_clock::now();
+            auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+            double avg_time_per_allocation = static_cast<double>(duration.count()) / num_allocations;
+            std::cout << "Average allocation time: " << avg_time_per_allocation << "μs" << std::endl;
+            // Performance threshold: should be under 100μs per allocation
+            if (avg_time_per_allocation > 100.0) {
+                std::cerr << "Allocation performance too slow: " << avg_time_per_allocation << "μs per allocation" << std::endl;
+                return false;
+            }
+            return true;
+        } catch (const std::exception &e) {
+            std::cerr << "Allocation performance test failed: " << e.what() << std::endl;
+            return false;
+        }
+    });
+}
+TestResult PerformanceTest::testConcurrentPerformance() {
+    return measureTime("ConcurrentPerformance", [this]() -> bool {
+        try {
+            const int num_threads = 4;
+            const int allocations_per_thread = 1000;
+            auto start = std::chrono::high_resolution_clock::now();
+            std::vector<std::thread> threads;
+            std::atomic<int> success_count{0};
+            for (int i = 0; i < num_threads; ++i) {
+                threads.emplace_back([&]() {
+                    for (int j = 0; j < allocations_per_thread; ++j) {
+                        auto memory = context::allocateMemory(512);
+                        if (memory) {
+                            success_count++;
+                        }
+                    }
+                });
+            }
+            for (auto &thread : threads) {
+                thread.join();
+            }
+            auto end = std::chrono::high_resolution_clock::now();
+            auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+            double total_allocations = num_threads * allocations_per_thread;
+            double avg_time_per_allocation = static_cast<double>(duration.count()) / total_allocations;
+            std::cout << "Concurrent allocation time: " << avg_time_per_allocation << "μs per allocation" << std::endl;
+            // Performance threshold: should be under 200μs per allocation under concurrent load
+            if (avg_time_per_allocation > 200.0) {
+                std::cerr << "Concurrent allocation performance too slow: " << avg_time_per_allocation << "μs per allocation" << std::endl;
+                return false;
+            }
+            return true;
+        } catch (const std::exception &e) {
+            std::cerr << "Concurrent performance test failed: " << e.what() << std::endl;
+            return false;
+        }
+    });
+}
+TestResult PerformanceTest::testMemoryCopyPerformance() {
+    return measureTime("MemoryCopyPerformance", [this]() -> bool {
+        try {
+            const size_t data_size = 1024 * 1024; // 1MB
+            const int num_copies = 100;
+            // Allocate source and destination memory
+            auto src_memory = context::allocateMemory(data_size);
+            auto dst_memory = context::allocateMemory(data_size);
+            if (!src_memory || !dst_memory) {
+                std::cerr << "Failed to allocate memory for copy test" << std::endl;
+                return false;
+            }
+            // Initialize source data
+            std::memset(src_memory->data(), 0xAB, data_size);
+            auto start = std::chrono::high_resolution_clock::now();
+            // Perform memory copies
+            for (int i = 0; i < num_copies; ++i) {
+                context::memcpyD2D(dst_memory->data(), src_memory->data(), data_size);
+            }
+            // Synchronize to ensure copies complete
+            context::syncDevice();
+            auto end = std::chrono::high_resolution_clock::now();
+            auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+            double avg_time_per_copy = static_cast<double>(duration.count()) / num_copies;
+            double bandwidth = (data_size * num_copies) / (duration.count() / 1e6) / (1024 * 1024); // MB/s
+            std::cout << "Average copy time: " << avg_time_per_copy << "μs" << std::endl;
+            std::cout << "Memory bandwidth: " << bandwidth << " MB/s" << std::endl;
+            // Performance threshold: should achieve at least 100 MB/s
+            if (bandwidth < 100.0) {
+                std::cerr << "Memory copy performance too slow: " << bandwidth << " MB/s" << std::endl;
+                return false;
+            }
+            return true;
+        } catch (const std::exception &e) {
+            std::cerr << "Memory copy performance test failed: " << e.what() << std::endl;
+            return false;
+        }
+    });
+}
+// Stress Test Implementation
+TestResult StressTest::run() {
+    return measureTime("StressTest", [this]() -> bool {
+        try {
+            auto result1 = testHighFrequencyAllocations();
+            if (!result1.passed) {
+                std::cerr << "High frequency allocations test failed: " << result1.error_message << std::endl;
+                return false;
+            }
+            auto result2 = testLargeMemoryAllocations();
+            if (!result2.passed) {
+                std::cerr << "Large memory allocations test failed: " << result2.error_message << std::endl;
+                return false;
+            }
+            auto result3 = testCrossDeviceStress();
+            if (!result3.passed) {
+                std::cerr << "Cross-device stress test failed: " << result3.error_message << std::endl;
+                return false;
+            }
+            return true;
+        } catch (const std::exception &e) {
+            std::cerr << "StressTest failed with exception: " << e.what() << std::endl;
+            return false;
+        }
+    });
+}
+TestResult StressTest::testHighFrequencyAllocations() {
+    return measureTime("HighFrequencyAllocations", [this]() -> bool {
+        try {
+            const int num_allocations = 100000;
+            std::vector<std::shared_ptr<Memory>> memories;
+            memories.reserve(num_allocations);
+            auto start = std::chrono::high_resolution_clock::now();
+            for (int i = 0; i < num_allocations; ++i) {
+                size_t size = 64 + (i % 1024);
+                auto memory = context::allocateMemory(size);
+                if (memory) {
+                    memories.push_back(memory);
+                }
+                // Periodically deallocate some memory to test alloc/dealloc stress
+                if (i % 1000 == 0 && !memories.empty()) {
+                    memories.erase(memories.begin(), memories.begin() + std::min(100, static_cast<int>(memories.size())));
+                }
+            }
+            auto end = std::chrono::high_resolution_clock::now();
+            auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
+            std::cout << "High frequency allocations completed: " << num_allocations
+                      << " allocations in " << duration.count() << "ms" << std::endl;
+            // Clear remaining memory
+            memories.clear();
+            return true;
+        } catch (const std::exception &e) {
+            std::cerr << "High frequency allocations test failed: " << e.what() << std::endl;
+            return false;
+        }
+    });
+}
+TestResult StressTest::testLargeMemoryAllocations() {
+    return measureTime("LargeMemoryAllocations", [this]() -> bool {
+        try {
+            const size_t large_size = 100 * 1024 * 1024; // 100MB
+            const int num_allocations = 10;
+            std::vector<std::shared_ptr<Memory>> memories;
+            for (int i = 0; i < num_allocations; ++i) {
+                try {
+                    auto memory = context::allocateMemory(large_size);
+                    if (memory) {
+                        memories.push_back(memory);
+                        std::cout << "Allocated " << large_size / (1024 * 1024) << "MB memory block " << i + 1 << std::endl;
+                    }
+                } catch (const std::exception &e) {
+                    std::cout << "Large allocation " << i + 1 << " failed (expected): " << e.what() << std::endl;
+                    break; // Expected to fail at some point due to memory limits
+                }
+            }
+            std::cout << "Successfully allocated " << memories.size() << " large memory blocks" << std::endl;
+            // Clear memory
+            memories.clear();
+            return true;
+        } catch (const std::exception &e) {
+            std::cerr << "Large memory allocations test failed: " << e.what() << std::endl;
+            return false;
+        }
+    });
+}
+TestResult StressTest::testCrossDeviceStress() {
+    return measureTime("CrossDeviceStress", [this]() -> bool {
+        try {
+            // Get available devices
+            std::vector<Device> devices;
+            for (int type = 0; type < static_cast<int>(Device::Type::COUNT); ++type) {
+                size_t count = context::getDeviceCount(static_cast<Device::Type>(type));
+                for (size_t i = 0; i < count; ++i) {
+                    devices.emplace_back(static_cast<Device::Type>(type), i);
+                }
+            }
+            if (devices.size() < 2) {
+                std::cout << "Skipping cross-device stress test - need at least 2 devices" << std::endl;
+                return true;
+            }
+            const int num_operations = 1000;
+            std::vector<std::shared_ptr<Memory>> pinned_memories;
+            for (int i = 0; i < num_operations; ++i) {
+                // Switch to random device
+                Device target_device = devices[i % devices.size()];
+                context::setDevice(target_device);
+                // Allocate pinned memory
+                auto pinned_memory = context::allocatePinnedHostMemory(1024);
+                if (pinned_memory) {
+                    pinned_memories.push_back(pinned_memory);
+                }
+                // Periodically deallocate some memory
+                if (i % 100 == 0 && !pinned_memories.empty()) {
+                    pinned_memories.erase(pinned_memories.begin(),
+                                          pinned_memories.begin() + std::min(10, static_cast<int>(pinned_memories.size())));
+                }
+            }
+            std::cout << "Cross-device stress test completed: " << num_operations
+                      << " operations across " << devices.size() << " devices" << std::endl;
+            // Clear remaining memory
+            pinned_memories.clear();
+            return true;
+        } catch (const std::exception &e) {
+            std::cerr << "Cross-device stress test failed: " << e.what() << std::endl;
+            return false;
+        }
+    });
+}
+} // namespace infinicore::test
--- a/src/infinicore-test/memory_test.h
+++ b/src/infinicore-test/memory_test.h
+#ifndef __INFINICORE_MEMORY_TEST_H__
+#define __INFINICORE_MEMORY_TEST_H__
+#include "../infinicore/context/allocators/memory_allocator.hpp"
+#include <atomic>
+#include <cassert>
+#include <chrono>
+#include <exception>
+#include <future>
+#include <infinicore.hpp>
+#include <iostream>
+#include <memory>
+#include <mutex>
+#include <queue>
+#include <spdlog/spdlog.h>
+#include <thread>
+#include <unordered_map>
+#include <vector>
+namespace infinicore::test {
+// Test result structure
+struct TestResult {
+    std::string test_name;
+    bool passed;
+    std::string error_message;
+    std::chrono::microseconds duration;
+    TestResult(const std::string &name, bool pass, const std::string &error = "",
+               std::chrono::microseconds dur = std::chrono::microseconds(0))
+        : test_name(name), passed(pass), error_message(error), duration(dur) {}
+};
+// Test framework base class
+class MemoryTestFramework {
+public:
+    virtual ~MemoryTestFramework() = default;
+    virtual TestResult run() = 0;
+    virtual std::string getName() const = 0;
+protected:
+    void logTestStart(const std::string &test_name) {
+        std::cout << "[TEST] Starting: " << test_name << std::endl;
+    }
+    void logTestResult(const TestResult &result) {
+        std::cout << "[TEST] " << (result.passed ? "PASSED" : "FAILED")
+                  << ": " << result.test_name;
+        if (!result.passed && !result.error_message.empty()) {
+            std::cout << " - " << result.error_message;
+        }
+        std::cout << " (Duration: " << result.duration.count() << "μs)" << std::endl;
+    }
+    template <typename Func>
+    TestResult measureTime(const std::string &test_name, Func &&func) {
+        auto start = std::chrono::high_resolution_clock::now();
+        try {
+            bool result = func();
+            auto end = std::chrono::high_resolution_clock::now();
+            auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+            return TestResult(test_name, result, "", duration);
+        } catch (const std::exception &e) {
+            auto end = std::chrono::high_resolution_clock::now();
+            auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
+            return TestResult(test_name, false, e.what(), duration);
+        }
+    }
+};
+// Mock allocator for testing exception safety
+class MockAllocator : public infinicore::MemoryAllocator {
+public:
+    MockAllocator(bool should_throw = false, size_t max_allocations = SIZE_MAX)
+        : should_throw_(should_throw), max_allocations_(max_allocations),
+          allocation_count_(0), total_allocated_(0) {}
+    std::byte *allocate(size_t size) override {
+        if (should_throw_) {
+            throw std::runtime_error("Mock allocation failure");
+        }
+        if (allocation_count_ >= max_allocations_) {
+            throw std::runtime_error("Mock allocation limit exceeded");
+        }
+        allocation_count_++;
+        total_allocated_ += size;
+        return static_cast<std::byte *>(std::malloc(size));
+    }
+    void deallocate(std::byte *ptr) override {
+        if (ptr) {
+            std::free(ptr);
+        }
+    }
+    size_t getAllocationCount() const { return allocation_count_; }
+    size_t getTotalAllocated() const { return total_allocated_; }
+private:
+    bool should_throw_;
+    size_t max_allocations_;
+    std::atomic<size_t> allocation_count_;
+    std::atomic<size_t> total_allocated_;
+};
+// Memory leak detector
+class MemoryLeakDetector {
+public:
+    static MemoryLeakDetector &instance() {
+        static MemoryLeakDetector detector;
+        return detector;
+    }
+    void recordAllocation(void *ptr, size_t size) {
+        std::lock_guard<std::mutex> lock(mutex_);
+        allocations_[ptr] = size;
+        total_allocated_ += size;
+    }
+    void recordDeallocation(void *ptr) {
+        std::lock_guard<std::mutex> lock(mutex_);
+        auto it = allocations_.find(ptr);
+        if (it != allocations_.end()) {
+            total_allocated_ -= it->second;
+            allocations_.erase(it);
+        }
+    }
+    size_t getLeakedMemory() const {
+        std::lock_guard<std::mutex> lock(mutex_);
+        return total_allocated_;
+    }
+    size_t getLeakCount() const {
+        std::lock_guard<std::mutex> lock(mutex_);
+        return allocations_.size();
+    }
+    void reset() {
+        std::lock_guard<std::mutex> lock(mutex_);
+        allocations_.clear();
+        total_allocated_ = 0;
+    }
+private:
+    mutable std::mutex mutex_;
+    std::unordered_map<void *, size_t> allocations_;
+    size_t total_allocated_ = 0;
+};
+// Test categories
+class BasicMemoryTest : public MemoryTestFramework {
+public:
+    TestResult run() override;
+    std::string getName() const override { return "BasicMemoryTest"; }
+};
+class ConcurrencyTest : public MemoryTestFramework {
+public:
+    TestResult run() override;
+    std::string getName() const override { return "ConcurrencyTest"; }
+private:
+    TestResult testConcurrentAllocations();
+    TestResult testConcurrentDeviceSwitching();
+    TestResult testMemoryAllocationRace();
+};
+class ExceptionSafetyTest : public MemoryTestFramework {
+public:
+    TestResult run() override;
+    std::string getName() const override { return "ExceptionSafetyTest"; }
+private:
+    TestResult testAllocationFailure();
+    TestResult testDeallocationException();
+    TestResult testContextSwitchException();
+};
+class MemoryLeakTest : public MemoryTestFramework {
+public:
+    TestResult run() override;
+    std::string getName() const override { return "MemoryLeakTest"; }
+private:
+    TestResult testBasicLeakDetection();
+    TestResult testCrossDeviceLeakDetection();
+    TestResult testExceptionLeakDetection();
+};
+class PerformanceTest : public MemoryTestFramework {
+public:
+    TestResult run() override;
+    std::string getName() const override { return "PerformanceTest"; }
+private:
+    TestResult testAllocationPerformance();
+    TestResult testConcurrentPerformance();
+    TestResult testMemoryCopyPerformance();
+};
+class StressTest : public MemoryTestFramework {
+public:
+    TestResult run() override;
+    std::string getName() const override { return "StressTest"; }
+private:
+    TestResult testHighFrequencyAllocations();
+    TestResult testLargeMemoryAllocations();
+    TestResult testCrossDeviceStress();
+};
+// Test runner
+class MemoryTestRunner {
+public:
+    void addTest(std::unique_ptr<MemoryTestFramework> test) {
+        tests_.push_back(std::move(test));
+    }
+    std::vector<TestResult> runAllTests() {
+        std::vector<TestResult> results;
+        std::cout << "==============================================\n"
+                  << "InfiniCore Memory Management Test Suite\n"
+                  << "==============================================" << std::endl;
+        for (auto &test : tests_) {
+            logTestStart(test->getName());
+            TestResult result = test->run();
+            logTestResult(result);
+            results.push_back(result);
+        }
+        printSummary(results);
+        return results;
+    }
+private:
+    std::vector<std::unique_ptr<MemoryTestFramework>> tests_;
+    void logTestStart(const std::string &test_name) {
+        std::cout << "\n[SUITE] Running: " << test_name << std::endl;
+    }
+    void logTestResult(const TestResult &result) {
+        std::cout << "[SUITE] " << (result.passed ? "PASSED" : "FAILED")
+                  << ": " << result.test_name << std::endl;
+    }
+    void printSummary(const std::vector<TestResult> &results) {
+        size_t passed = 0, failed = 0;
+        std::chrono::microseconds total_time(0);
+        for (const auto &result : results) {
+            if (result.passed) {
+                passed++;
+            } else {
+                failed++;
+            }
+            total_time += result.duration;
+        }
+        std::cout << "\n==============================================\n"
+                  << "Test Summary\n"
+                  << "==============================================\n"
+                  << "Total Tests: " << results.size() << "\n"
+                  << "Passed: " << passed << "\n"
+                  << "Failed: " << failed << "\n"
+                  << "Total Time: " << total_time.count() << "μs\n"
+                  << "==============================================" << std::endl;
+    }
+};
+} // namespace infinicore::test
+#endif // __INFINICORE_MEMORY_TEST_H__
--- a/src/infinicore-test/test_tensor_destructor.cc
+++ b/src/infinicore-test/test_tensor_destructor.cc
+#include "test_tensor_destructor.h"
+namespace infinicore::test {
+// Test 1: Basic tensor creation and destruction
+TestResult TensorDestructorTest::testBasicTensorDestruction() {
+    return measureTime("BasicTensorDestruction", [this]() {
+        {
+            // Create a tensor in a scope to test automatic destruction
+            auto tensor = Tensor::empty({2, 3}, DataType::F32, Device::Type::CPU);
+            // Verify tensor was created successfully
+            if (!tensor.operator->()) {
+                return false;
+            }
+            if (tensor->shape().size() != 2) {
+                return false;
+            }
+            if (tensor->shape()[0] != 2) {
+                return false;
+            }
+            if (tensor->shape()[1] != 3) {
+                return false;
+            }
+            if (tensor->dtype() != DataType::F32) {
+                return false;
+            }
+            std::cout << "Tensor created successfully with shape: ";
+            for (auto dim : tensor->shape()) {
+                std::cout << dim << " ";
+            }
+            std::cout << std::endl;
+        }
+        // Tensor should be destroyed when it goes out of scope
+        // This should trigger the TensorMetaData destructor
+        std::cout << "Tensor destroyed successfully - destructor called" << std::endl;
+        return true;
+    });
+}
+// Test 2: Multiple tensor creation and destruction
+TestResult TensorDestructorTest::testMultipleTensorDestruction() {
+    return measureTime("MultipleTensorDestruction", [this]() {
+        std::vector<Tensor> tensors;
+        // Create multiple tensors with different shapes and types
+        tensors.push_back(Tensor::empty({1, 2, 3}, DataType::F32, Device::Type::CPU));
+        tensors.push_back(Tensor::empty({4, 5}, DataType::F64, Device::Type::CPU));
+        tensors.push_back(Tensor::zeros({2, 2, 2}, DataType::I32, Device::Type::CPU));
+        tensors.push_back(Tensor::ones({3, 4}, DataType::F16, Device::Type::CPU));
+        // Verify all tensors were created
+        if (tensors.size() != 4) {
+            return false;
+        }
+        for (size_t i = 0; i < tensors.size(); ++i) {
+            if (!tensors[i].operator->()) {
+                return false;
+            }
+            std::cout << "Tensor " << i << " created with shape: ";
+            for (auto dim : tensors[i]->shape()) {
+                std::cout << dim << " ";
+            }
+            std::cout << std::endl;
+        }
+        std::cout << "All " << tensors.size() << " tensors created successfully" << std::endl;
+        // All tensors will be destroyed when the vector goes out of scope
+        // This should trigger all TensorMetaData destructors
+        return true;
+    });
+}
+// Test 3: Different data types
+TestResult TensorDestructorTest::testDifferentDataTypes() {
+    return measureTime("DifferentDataTypes", [this]() {
+        std::vector<std::pair<DataType, std::string>> data_types = {
+            {DataType::F32, "F32"},
+            {DataType::F64, "F64"},
+            {DataType::F16, "F16"},
+            {DataType::I32, "I32"},
+            {DataType::I64, "I64"},
+            {DataType::I8, "I8"},
+            {DataType::U8, "U8"},
+            {DataType::BOOL, "BOOL"}};
+        for (const auto &[dtype, name] : data_types) {
+            {
+                auto tensor = Tensor::empty({2, 2}, dtype, Device::Type::CPU);
+                if (!tensor.operator->()) {
+                    return false;
+                }
+                if (tensor->dtype() != dtype) {
+                    return false;
+                }
+                std::cout << "Created tensor with data type: " << name << std::endl;
+            }
+            std::cout << "Destroyed tensor with data type: " << name << std::endl;
+        }
+        return true;
+    });
+}
+// Test 4: Different shapes
+TestResult TensorDestructorTest::testDifferentShapes() {
+    return measureTime("DifferentShapes", [this]() {
+        std::vector<Shape> shapes = {
+            {1},             // 1D
+            {2, 3},          // 2D
+            {4, 5, 6},       // 3D
+            {1, 2, 3, 4},    // 4D
+            {2, 3, 4, 5, 6}, // 5D
+            {1000},          // Large 1D
+            {100, 100},      // Large 2D
+            {10, 10, 10, 10} // Large 4D
+        };
+        for (const auto &shape : shapes) {
+            {
+                auto tensor = Tensor::empty(shape, DataType::F32, Device::Type::CPU);
+                if (!tensor.operator->()) {
+                    return false;
+                }
+                if (tensor->shape() != shape) {
+                    return false;
+                }
+                std::cout << "Created tensor with shape: ";
+                for (auto dim : shape) {
+                    std::cout << dim << " ";
+                }
+                std::cout << std::endl;
+            }
+            std::cout << "Destroyed tensor with shape: ";
+            for (auto dim : shape) {
+                std::cout << dim << " ";
+            }
+            std::cout << std::endl;
+        }
+        return true;
+    });
+}
+// Test 5: Tensor from blob
+TestResult TensorDestructorTest::testTensorFromBlob() {
+    return measureTime("TensorFromBlob", [this]() {
+        // Create a blob of data
+        std::vector<float> data = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
+        {
+            // Create tensor from blob
+            auto tensor = Tensor::from_blob(data.data(), {2, 3}, DataType::F32, Device::Type::CPU);
+            if (!tensor.operator->()) {
+                return false;
+            }
+            if (tensor->shape() != Shape({2, 3})) {
+                return false;
+            }
+            if (tensor->dtype() != DataType::F32) {
+                return false;
+            }
+            std::cout << "Created tensor from blob with shape: ";
+            for (auto dim : tensor->shape()) {
+                std::cout << dim << " ";
+            }
+            std::cout << std::endl;
+        }
+        std::cout << "Destroyed tensor from blob successfully" << std::endl;
+        return true;
+    });
+}
+// Test 6: Strided tensor
+TestResult TensorDestructorTest::testStridedTensor() {
+    return measureTime("StridedTensor", [this]() {
+        {
+            // Create a strided tensor
+            auto tensor = Tensor::empty({4, 4}, DataType::F32, Device::Type::CPU);
+            if (!tensor.operator->()) {
+                return false;
+            }
+            // Create a narrowed view
+            std::vector<TensorSliceParams> slices = {
+                {0, 0, 2}, // dimension 0: start at 0, length 2
+                {1, 0, 2}  // dimension 1: start at 0, length 2
+            };
+            auto strided_tensor = tensor->narrow(slices);
+            if (!strided_tensor.operator->()) {
+                return false;
+            }
+            std::cout << "Created strided tensor with shape: ";
+            for (auto dim : strided_tensor->shape()) {
+                std::cout << dim << " ";
+            }
+            std::cout << std::endl;
+        }
+        std::cout << "Destroyed strided tensor successfully" << std::endl;
+        return true;
+    });
+}
+// Test 7: Memory leak detection
+TestResult TensorDestructorTest::testMemoryLeakDetection() {
+    return measureTime("MemoryLeakDetection", [this]() {
+        // Reset memory leak detector
+        MemoryLeakDetector::instance().reset();
+        size_t initial_leaks = MemoryLeakDetector::instance().getLeakCount();
+        // Create and destroy many tensors
+        for (int i = 0; i < 100; ++i) {
+            {
+                auto tensor = Tensor::empty({10, 10}, DataType::F32, Device::Type::CPU);
+                if (!tensor.operator->()) {
+                    return false;
+                }
+            }
+        }
+        size_t final_leaks = MemoryLeakDetector::instance().getLeakCount();
+        std::cout << "Initial leaks: " << initial_leaks << std::endl;
+        std::cout << "Final leaks: " << final_leaks << std::endl;
+        // Should not have more leaks than we started with
+        return final_leaks <= initial_leaks;
+    });
+}
+// Test 8: Tensor copy destruction
+TestResult TensorDestructorTest::testTensorCopyDestruction() {
+    return measureTime("TensorCopyDestruction", [this]() {
+        {
+            auto original_tensor = Tensor::empty({3, 3}, DataType::F32, Device::Type::CPU);
+            if (!original_tensor.operator->()) {
+                return false;
+            }
+            // Create a copy (using assignment operator)
+            auto copied_tensor = original_tensor;
+            if (!copied_tensor.operator->()) {
+                return false;
+            }
+            std::cout << "Created original and copied tensors" << std::endl;
+            std::cout << "Original tensor shape: ";
+            for (auto dim : original_tensor->shape()) {
+                std::cout << dim << " ";
+            }
+            std::cout << std::endl;
+            std::cout << "Copied tensor shape: ";
+            for (auto dim : copied_tensor->shape()) {
+                std::cout << dim << " ";
+            }
+            std::cout << std::endl;
+        }
+        std::cout << "Destroyed original and copied tensors successfully" << std::endl;
+        return true;
+    });
+}
+// Main test runner
+TestResult TensorDestructorTest::run() {
+    std::vector<TestResult> results;
+    std::cout << "==============================================\n"
+              << "Tensor Destructor Test Suite\n"
+              << "==============================================" << std::endl;
+    // Run all tests
+    results.push_back(testBasicTensorDestruction());
+    results.push_back(testMultipleTensorDestruction());
+    results.push_back(testDifferentDataTypes());
+    results.push_back(testDifferentShapes());
+    results.push_back(testTensorFromBlob());
+    results.push_back(testStridedTensor());
+    results.push_back(testMemoryLeakDetection());
+    results.push_back(testTensorCopyDestruction());
+    // Check if all tests passed
+    bool all_passed = true;
+    for (const auto &result : results) {
+        if (!result.passed) {
+            all_passed = false;
+            break;
+        }
+    }
+    return TestResult("TensorDestructorTest", all_passed,
+                      all_passed ? "" : "Some tensor destructor tests failed");
+}
+} // namespace infinicore::test
--- a/src/infinicore-test/test_tensor_destructor.h
+++ b/src/infinicore-test/test_tensor_destructor.h
+#ifndef __INFINICORE_TEST_TENSOR_DESTRUCTOR_H__
+#define __INFINICORE_TEST_TENSOR_DESTRUCTOR_H__
+#include "infinicore/context/context.hpp"
+#include "infinicore/tensor.hpp"
+#include "memory_test.h"
+#include <iostream>
+#include <memory>
+#include <vector>
+namespace infinicore::test {
+class TensorDestructorTest : public MemoryTestFramework {
+public:
+    TestResult run() override;
+    std::string getName() const override { return "TensorDestructorTest"; }
+private:
+    TestResult testBasicTensorDestruction();
+    TestResult testMultipleTensorDestruction();
+    TestResult testDifferentDataTypes();
+    TestResult testDifferentShapes();
+    TestResult testTensorFromBlob();
+    TestResult testStridedTensor();
+    TestResult testMemoryLeakDetection();
+    TestResult testTensorCopyDestruction();
+};
+} // namespace infinicore::test
+#endif // __INFINICORE_TEST_TENSOR_DESTRUCTOR_H__
--- a/src/infinicore/context/runtime/runtime.cc
+++ b/src/infinicore/context/runtime/runtime.cc
@@ -63,6 +63,10 @@ std::shared_ptr<Memory> Runtime::allocateMemory(size_t size) {
 }
 std::shared_ptr<Memory> Runtime::allocatePinnedHostMemory(size_t size) {
+    if (!pinned_host_memory_allocator_) {
+        spdlog::warn("For CPU devices, pinned memory is not supported, falling back to regular host memory");
+        return allocateMemory(size);
+    }
    std::byte *data_ptr = pinned_host_memory_allocator_->allocate(size);
    return std::make_shared<Memory>(
        data_ptr, size, device_,

--- a/src/infinicore/device.cc
+++ b/src/infinicore/device.cc
@@ -39,10 +39,11 @@ std::string Device::toString(const Type &type) {
        return "KUNLUN";
    case Type::HYGON:
        return "HYGON";
+    case Type::COUNT:
+        return "COUNT";
+    default:
+        return "UNKNOWN";
    }
-    // TODO: Add error handling.
-    return "";
 }
 bool Device::operator==(const Device &other) const {

--- a/src/infinicore/tensor/tensor.cc
+++ b/src/infinicore/tensor/tensor.cc
@@ -65,6 +65,13 @@ TensorMetaData::TensorMetaData(const Shape &_shape, const Strides &_strides, con
    INFINICORE_CHECK_ERROR(infiniopCreateTensorDescriptor(&desc, shape.size(), shape.data(), strides.data(), (infiniDtype_t)dtype));
 }
+TensorMetaData::~TensorMetaData() {
+    if (desc) {
+        infiniopDestroyTensorDescriptor(desc);
+        desc = nullptr;
+    }
+}
 TensorImpl::TensorImpl(const Shape &shape, const DataType &dtype)
    : meta_(TensorMetaData(shape, calculate_contiguous_strides(shape), dtype)) {}

--- a/xmake.lua
+++ b/xmake.lua
@@ -298,13 +298,14 @@ target("infiniccl")
    if has_config("moore-gpu") then
        add_deps("infiniccl-moore")
    end
    if has_config("kunlun-xpu") then
        add_deps("infiniccl-kunlun")
    end
    if has_config("hygon-dcu") then
        add_deps("infiniccl-hygon")
    end
    set_languages("cxx17")
    add_files("src/infiniccl/*.cc")

--- a/xmake/test.lua
+++ b/xmake/test.lua
@@ -4,7 +4,7 @@ target("infiniutils-test")
    set_warnings("all", "error")
    set_languages("cxx17")
    add_files(os.projectdir().."/src/utils-test/*.cc")
    set_installdir(os.getenv("INFINI_ROOT") or (os.getenv(is_host("windows") and "HOMEPATH" or "HOME") .. "/.infini"))
 target_end()
@@ -18,7 +18,7 @@ target("infiniop-test")
    set_languages("cxx17")
    set_warnings("all", "error")
    add_includedirs(INFINI_ROOT.."/include")
    add_linkdirs(INFINI_ROOT.."/lib")
    add_links("infiniop", "infinirt")
@@ -27,7 +27,7 @@ target("infiniop-test")
        add_cxflags("-fopenmp")
        add_ldflags("-fopenmp")
    end
    add_includedirs(os.projectdir().."/src/infiniop-test/include")
    add_files(os.projectdir().."/src/infiniop-test/src/*.cpp")
    add_files(os.projectdir().."/src/infiniop-test/src/ops/*.cpp")
@@ -63,3 +63,31 @@ target("infinirt-test")
    add_files(os.projectdir().."/src/infinirt-test/*.cc")
    set_installdir(os.getenv("INFINI_ROOT") or (os.getenv(is_host("windows") and "HOMEPATH" or "HOME") .. "/.infini"))
 target_end()
+target("infinicore-test")
+    set_kind("binary")
+    add_deps("infiniop", "infinirt", "infiniccl")
+    set_default(false)
+    set_languages("cxx17")
+    set_warnings("all", "error")
+    local INFINI_ROOT = os.getenv("INFINI_ROOT") or (os.getenv(is_host("windows") and "HOMEPATH" or "HOME") .. "/.infini")
+    add_includedirs(INFINI_ROOT.."/include")
+    add_linkdirs(INFINI_ROOT.."/lib")
+    add_links("infiniop", "infinirt", "infiniccl")
+    -- Add spdlog support
+    add_includedirs("third_party/spdlog/include")
+    add_defines("SPDLOG_ACTIVE_LEVEL=0")  -- Enable all log levels
+    add_files(os.projectdir().."/src/infinicore/*.cc")
+    add_files(os.projectdir().."/src/infinicore/context/*.cc")
+    add_files(os.projectdir().."/src/infinicore/context/*/*.cc")
+    add_files(os.projectdir().."/src/infinicore/tensor/*.cc")
+    add_files(os.projectdir().."/src/infinicore/op/*/*.cc")
+    add_files(os.projectdir().."/src/infinicore-test/*.cc")
+    set_installdir(INFINI_ROOT)
+target_end()