Merge pull request #882 from InfiniTensor/issue/810

issue/810 static compute graph infra

Merge pull request #882 from InfiniTensor/issue/810
issue/810 static compute graph infra
01a4a0c8 · Haojie Wang · GitHub · 3883f32f · 39f9c349 · 01a4a0c8
Unverified Commit 01a4a0c8 authored Jan 10, 2026 by Haojie Wang Committed by GitHub Jan 10, 2026
9 changed files
--- a/src/infinicore/ops/linear/linear.cc
+++ b/src/infinicore/ops/linear/linear.cc
 #include "infinicore/ops/linear.hpp"
-#include "infinicore/ops/add.hpp"
+#include "infinicore/ops/gemm.hpp"
-#include "infinicore/ops/matmul.hpp"
+#include "infinicore/ops/rearrange.hpp"
 namespace infinicore::op {
@@ -42,16 +42,18 @@ void linear_(Tensor out,
    // linear transformation
    Tensor out_view = out->view({N, out_features});
-    matmul_(out_view,
-            input->view({N, in_features}),
-            weight->permute({1, 0}));
    // Add bias
+    float alpha = 1.0f;
+    float beta = 0.0f;
    if (bias.has_value()) {
-        add_(out_view,
+        rearrange_(out_view,
-             out_view,
+                   bias.value()->as_strided({N, out_features}, {0, 1}));
-             bias.value()->as_strided({N, out_features}, {0, 1}));
+        beta = 1.0f;
    }
+    gemm_(out_view,
+          input->view({N, in_features}),
+          weight->permute({1, 0}), alpha, beta);
 }
 } // namespace infinicore::op
--- a/src/infinicore/pybind11/context.hpp
+++ b/src/infinicore/pybind11/context.hpp
@@ -24,6 +24,11 @@ inline void bind(py::module &m) {
    // Synchronization
    m.def("sync_stream", &syncStream, "Synchronize the current stream");
    m.def("sync_device", &syncDevice, "Synchronize the current device");
+    // Graph
+    m.def("is_graph_recording", &isGraphRecording, "Check if graph recording is turned on");
+    m.def("start_graph_recording", &startGraphRecording, "Start graph recording");
+    m.def("stop_graph_recording", &stopGraphRecording, "Stop graph recording and return the graph");
 }
 } // namespace infinicore::context
--- a/src/infinicore/pybind11/graph.hpp
+++ b/src/infinicore/pybind11/graph.hpp
+#pragma once
+#include <pybind11/pybind11.h>
+#include <pybind11/stl.h>
+#include "infinicore.hpp"
+namespace py = pybind11;
+namespace infinicore::graph {
+inline void bind(py::module_ &m) {
+    py::class_<infinicore::graph::Graph,
+               std::shared_ptr<infinicore::graph::Graph>>(m, "Graph")
+        .def(py::init<>()) // allow construction
+        .def("run", &infinicore::graph::Graph::run);
+}
+} // namespace infinicore::graph
--- a/src/infinicore/pybind11/infinicore.cc
+++ b/src/infinicore/pybind11/infinicore.cc
@@ -6,6 +6,7 @@
 #include "device.hpp"
 #include "device_event.hpp"
 #include "dtype.hpp"
+#include "graph.hpp"
 #include "ops.hpp"
 #include "tensor.hpp"
@@ -18,6 +19,7 @@ PYBIND11_MODULE(_infinicore, m) {
    dtype::bind(m);
    ops::bind(m);
    tensor::bind(m);
+    graph::bind(m);
 }
 } // namespace infinicore
--- a/src/infinicore/tensor/tensor.cc
+++ b/src/infinicore/tensor/tensor.cc
@@ -275,4 +275,12 @@ std::shared_ptr<TensorImpl> TensorImpl::strided_from_blob(
    return t;
 }
+Tensor TensorImpl::to_blob() const {
+    auto t = std::shared_ptr<TensorImpl>(new TensorImpl(shape(), strides(), dtype()));
+    t->data_.offset = this->data_.offset;
+    t->data_.memory = std::make_shared<Memory>(this->data_.memory->data(), this->data_.memory->size(), this->data_.memory->device(), nullptr);
+    return Tensor{t};
+}
 } // namespace infinicore
--- a/src/infinicore/utils.hpp
+++ b/src/infinicore/utils.hpp
@@ -47,3 +47,14 @@ inline struct SpdlogInitializer {
            }                                                                     \
        }                                                                         \
    } while (0)
+#define INFINICORE_ASSERT(CONDITION__)                                                                                                         \
+    do {                                                                                                                                       \
+        if (!(CONDITION__)) {                                                                                                                  \
+            SPDLOG_ERROR(                                                                                                                      \
+                "Assertion `{}` failed from {} at {}:{}",                                                                                      \
+                #CONDITION__, __func__, __FILE__, __LINE__);                                                                                   \
+            throw std::runtime_error(                                                                                                          \
+                std::string("Assertion `") + #CONDITION__ + "` failed from " + __func__ + " at " + __FILE__ + ":" + std::to_string(__LINE__)); \
+        }                                                                                                                                      \
+    } while (0)
--- a/test/infinicore/framework/tensor.py
+++ b/test/infinicore/framework/tensor.py
@@ -60,7 +60,12 @@ class TensorInitializer:
        # Handle real floating-point types
        if mode == TensorInitializer.RANDOM:
-            return torch.rand(shape, dtype=torch_dtype, device=torch_device_str)
+            scale = kwargs.get("scale", 1.0)
+            bias = kwargs.get("bias", 0.0)
+            return (
+                torch.rand(shape, dtype=torch_dtype, device=torch_device_str) * scale
+                + bias
+            )
        elif mode == TensorInitializer.ZEROS:
            return torch.zeros(shape, dtype=torch_dtype, device=torch_device_str)
        elif mode == TensorInitializer.ONES:

--- a/test/infinicore/graph/graph.py
+++ b/test/infinicore/graph/graph.py
+import sys
+import os
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
+import torch
+import infinicore
+from framework import BaseOperatorTest, TensorSpec, TestCase, GenericTestRunner
+# Test cases format: (in_shape, proj_w_shape)
+_TEST_CASES_DATA = [
+    ((32, 4096), (4096, 4096)),
+]
+_TOLERANCE_MAP = {
+    infinicore.float16: {"atol": 0, "rtol": 1e-2},
+    infinicore.float32: {"atol": 1e-4, "rtol": 1e-3},
+    infinicore.bfloat16: {"atol": 0, "rtol": 5e-2},
+}
+_TENSOR_DTYPES = [infinicore.float16, infinicore.float32, infinicore.bfloat16]
+def parse_test_cases():
+    cases = []
+    for in_shape, proj_w_shape in _TEST_CASES_DATA:
+        for dtype in _TENSOR_DTYPES:
+            tol = _TOLERANCE_MAP[dtype]
+            in_spec = TensorSpec.from_tensor(in_shape, dtype=dtype)
+            proj_w_spec = TensorSpec.from_tensor(proj_w_shape, dtype=dtype)
+            temp_spec = TensorSpec.from_tensor(in_shape, dtype=dtype)
+            # Out-of-place
+            cases.append(
+                TestCase(
+                    inputs=[in_spec, proj_w_spec, temp_spec],
+                    kwargs={},
+                    output_spec=None,
+                    comparison_target=None,
+                    tolerance=tol,
+                    description="Graph",
+                )
+            )
+    return cases
+class OpTest(BaseOperatorTest):
+    """Test Operator Graph"""
+    def __init__(self):
+        super().__init__("Graph")
+    def get_test_cases(self):
+        return parse_test_cases()
+    def torch_operator(self, *args, **kwargs):
+        a = args[0]
+        b = args[1]
+        return torch.matmul(a, b)
+    def infinicore_operator(self, *args, **kwargs):
+        """Record graph and run"""
+        a = args[0]
+        b = args[1]
+        temp_a = args[2]
+        infinicore.start_graph_recording()
+        c = infinicore.matmul(temp_a, b)
+        op_graph = infinicore.stop_graph_recording()
+        temp_a.copy_(a)
+        op_graph.run()
+        return c
+def main():
+    """Main entry point"""
+    runner = GenericTestRunner(OpTest)
+    runner.run_and_exit()
+if __name__ == "__main__":
+    main()
--- a/xmake.lua
+++ b/xmake.lua
@@ -268,6 +268,9 @@ target("infinirt")
        add_deps("infinirt-hygon")
    end
    set_languages("cxx17")
+    if not is_plat("windows") then
+        add_cxflags("-fPIC")
+    end
    set_installdir(os.getenv("INFINI_ROOT") or (os.getenv(is_host("windows") and "HOMEPATH" or "HOME") .. "/.infini"))
    add_files("src/infinirt/*.cc")
    add_installfiles("include/infinirt.h", {prefixdir = "include"})
@@ -390,6 +393,7 @@ target("infinicore_cpp_api")
    add_files("src/infinicore/context/*.cc")
    add_files("src/infinicore/context/*/*.cc")
    add_files("src/infinicore/tensor/*.cc")
+    add_files("src/infinicore/graph/*.cc")
    add_files("src/infinicore/nn/*.cc")
    add_files("src/infinicore/ops/*/*.cc")
    add_files("src/utils/*.cc")
@@ -418,6 +422,8 @@ target("_infinicore")
    add_packages("pybind11")
    set_languages("cxx17")
+    add_deps("infinicore_cpp_api")
    set_kind("shared")
    local INFINI_ROOT = os.getenv("INFINI_ROOT") or (os.getenv(is_host("windows") and "HOMEPATH" or "HOME") .. "/.infini")
    add_includedirs(INFINI_ROOT.."/include", { public = true })
@@ -425,14 +431,7 @@ target("_infinicore")
    add_linkdirs(INFINI_ROOT.."/lib")
    add_links("infiniop", "infinirt", "infiniccl")
-    add_files("src/infinicore/*.cc")
-    add_files("src/infinicore/context/*.cc")
-    add_files("src/infinicore/context/*/*.cc")
-    add_files("src/infinicore/tensor/*.cc")
-    add_files("src/infinicore/nn/*.cc")
-    add_files("src/infinicore/ops/*/*.cc")
    add_files("src/infinicore/pybind11/**.cc")
-    add_files("src/utils/*.cc")
    set_installdir("python/infinicore")
 target_end()