issue/143 fix bench script, worker cleanup, compiler initial input

144ba492 · PanZezhong · wooway777 · 69f18760 · 144ba492 · 144ba492
Commit 144ba492 authored Jan 30, 2026 by PanZezhong Committed by wooway777 Feb 10, 2026
Showing with 58 additions and 14 deletions

csrc/engine/compiler/paged_compiler.cpp csrc/engine/compiler/paged_compiler.cpp +14 -0

csrc/engine/rank_worker.cpp csrc/engine/rank_worker.cpp +23 -14

examples/bench.py examples/bench.py +21 -0

No files found.
--- a/csrc/engine/compiler/paged_compiler.cpp
+++ b/csrc/engine/compiler/paged_compiler.cpp
 #include "paged_compiler.hpp"
+namespace {
+// Todo: replace with Tensor::zeros when it is available
+inline void set_zeros(infinicore::Tensor &tensor) {
+    std::vector<uint8_t> zeros(tensor->nbytes(), 0);
+    infinicore::context::memcpyH2D(tensor->data(), zeros.data(), tensor->nbytes(), false);
+}
+} // namespace
 namespace infinilm::engine {
 PagedCompiler::PagedCompiler(const std::shared_ptr<InfinilmModel> &model, RankBarrier *barrier)
    : GraphCompiler(model, barrier) {
@@ -27,15 +35,20 @@ void PagedCompiler::compile() {
        compiled_map_decode_.clear();
        block_tables_holder_ = infinicore::Tensor::empty(
            {nblocks}, infinicore::DataType::I64, infinicore::context::getDevice());
+        set_zeros(block_tables_holder_);
        for (size_t b : decode_batch_sizes_) {
            size_t block_per_req = nblocks / b;
            InfinilmModel::Input input;
            input.input_ids = infinicore::Tensor::empty({1, b}, infinicore::DataType::I64, infinicore::context::getDevice());
            input.position_ids = infinicore::Tensor::empty({b}, infinicore::DataType::I64, infinicore::context::getDevice());
            input.total_sequence_lengths = infinicore::Tensor::empty({b}, infinicore::DataType::I64, infinicore::context::getDevice());
+            set_zeros(input.input_ids.value());
+            set_zeros(input.position_ids.value());
+            set_zeros(input.total_sequence_lengths.value());
            std::vector<int64_t> total_sequence_lengths_vec(b, 1);
            infinicore::context::memcpyH2D(input.total_sequence_lengths.value()->data(), total_sequence_lengths_vec.data(), b * sizeof(int64_t), false);
            input.input_offsets = infinicore::Tensor::empty({b + 1}, infinicore::DataType::I64, infinicore::context::getDevice());
+            set_zeros(input.input_offsets.value());
            std::vector<int64_t> input_offsets_vec(b + 1, 0);
            for (size_t i = 0; i <= b; i++) {
                input_offsets_vec[i] = i;
@@ -43,6 +56,7 @@ void PagedCompiler::compile() {
            infinicore::context::memcpyH2D(input.input_offsets.value()->data(), input_offsets_vec.data(), (b + 1) * sizeof(int64_t), false);
            input.block_tables = block_tables_holder_->as_strided({b, block_per_req}, {(ptrdiff_t)block_per_req, 1});
            input.slot_mapping = infinicore::Tensor::empty({b}, infinicore::DataType::I64, infinicore::context::getDevice());
+            set_zeros(input.slot_mapping.value());
            barrier_->wait();
            infinicore::context::startGraphRecording();

--- a/csrc/engine/rank_worker.cpp
+++ b/csrc/engine/rank_worker.cpp
@@ -246,12 +246,12 @@ void RankWorker::thread_loop() {
                try {
                    model_->load_parameter(local_param_name, local_param);
                } catch (const std::exception &e) {
-                    // convert exceptions to a safe behavior: set should_exit_ and notify caller
+                    {
-                    std::lock_guard<std::mutex> lk(mutex_);
+                        std::lock_guard<std::mutex> lk(mutex_);
-                    should_exit_ = true;
+                        should_exit_ = true;
-                    job_done_ = true;
+                        job_done_ = true;
+                    }
                    cv_.notify_all();
-                    // rethrow so the thread can be joined and caller sees an error if desired (optional)
                    spdlog::error("[{}] exception during load_parameter_: {}\n", info(), e.what());
                    break;
                }
@@ -321,9 +321,11 @@ void RankWorker::thread_loop() {
                    cv_.notify_all();
                } catch (const std::exception &e) {
-                    std::lock_guard<std::mutex> lk(mutex_);
+                    {
-                    should_exit_ = true;
+                        std::lock_guard<std::mutex> lk(mutex_);
-                    job_done_ = true;
+                        should_exit_ = true;
+                        job_done_ = true;
+                    }
                    cv_.notify_all();
                    spdlog::error("[{}] exception during forward: {}\n", info(), e.what());
                    break;
@@ -338,9 +340,11 @@ void RankWorker::thread_loop() {
                    cv_.notify_all();
                } catch (const std::exception &e) {
-                    std::lock_guard<std::mutex> lk(mutex_);
+                    {
-                    should_exit_ = true;
+                        std::lock_guard<std::mutex> lk(mutex_);
-                    job_done_ = true;
+                        should_exit_ = true;
+                        job_done_ = true;
+                    }
                    cv_.notify_all();
                    spdlog::error("[{}] exception during reset_cache: {}\n", info(), e.what());
                    break;
@@ -357,9 +361,11 @@ void RankWorker::thread_loop() {
                    cv_.notify_all();
                } catch (const std::exception &e) {
-                    std::lock_guard<std::mutex> lk(mutex_);
+                    {
-                    should_exit_ = true;
+                        std::lock_guard<std::mutex> lk(mutex_);
-                    job_done_ = true;
+                        should_exit_ = true;
+                        job_done_ = true;
+                    }
                    cv_.notify_all();
                    spdlog::error("[{}] exception during compile: {}\n", info(), e.what());
                    break;
@@ -369,6 +375,9 @@ void RankWorker::thread_loop() {
                // Shouldn't reach here (no-op)
            }
        } // while
+        // Some clean up should be done before exiting the thread
+        compiler_.reset();
    } catch (const std::exception &e) {
        // Top-level exception: ensure any waiters are woken and the thread exits cleanly.
        {

--- a/examples/bench.py
+++ b/examples/bench.py
@@ -137,6 +137,21 @@ def get_args():
        action="store_true",
        help="Run nvidia test",
    )
+    parser.add_argument(
+        "--metax",
+        action="store_true",
+        help="Run metax test",
+    )
+    parser.add_argument(
+        "--moore",
+        action="store_true",
+        help="Run moore test",
+    )
+    parser.add_argument(
+        "--iluvatar",
+        action="store_true",
+        help="Run iluvatar test",
+    )
    parser.add_argument(
        "--cambricon",
        action="store_true",
@@ -328,6 +343,12 @@ if __name__ == "__main__":
        device_str = "cpu"
    elif args.nvidia:
        device_str = "cuda"
+    elif args.metax:
+        device_str = "cuda"
+    elif args.moore:
+        device_str = "musa"
+    elif args.iluvatar:
+        device_str = "cuda"
    elif args.cambricon:
        device_str = "mlu"
    else: