Merge pull request #285 from InfiniTensor/issue/137_new

issue/137: 添加causal_softmax测例，更新readme（合并）

Merge pull request #285 from InfiniTensor/issue/137_new
issue/137: 添加causal_softmax测例，更新readme（合并）
a0abcb2c · PanZezhong1725 · GitHub · 7eb94082 · be01afcf · a0abcb2c
Unverified Commit a0abcb2c authored Jun 27, 2025 by PanZezhong1725 Committed by GitHub Jun 27, 2025
6 changed files
--- a/scripts/python_test.py
+++ b/scripts/python_test.py
@@ -20,9 +20,12 @@ def run_tests(args):
        "sub.py",
        "swiglu.py",
        "attention.py",
+        "causal_softmax.py",
+        "rearrange.py",
+        "mul.py"
    ]:
        result = subprocess.run(
-            f"python {test} {args}", text=True, encoding="utf-8", shell=True
+            f"python {test} {args} --debug", text=True, encoding="utf-8", shell=True
        )
        if result.returncode != 0:
            failed.append(test)

--- a/src/infiniop-test/include/ops.hpp
+++ b/src/infiniop-test/include/ops.hpp
@@ -5,16 +5,17 @@
 /*
 * Declare all the tests here
 */
-DECLARE_INFINIOP_TEST(add)
-DECLARE_INFINIOP_TEST(clip)
 DECLARE_INFINIOP_TEST(gemm)
-DECLARE_INFINIOP_TEST(mul)
 DECLARE_INFINIOP_TEST(random_sample)
-DECLARE_INFINIOP_TEST(rearrange)
 DECLARE_INFINIOP_TEST(rms_norm)
+DECLARE_INFINIOP_TEST(mul)
 DECLARE_INFINIOP_TEST(rope)
-DECLARE_INFINIOP_TEST(sub)
+DECLARE_INFINIOP_TEST(clip)
 DECLARE_INFINIOP_TEST(swiglu)
+DECLARE_INFINIOP_TEST(add)
+DECLARE_INFINIOP_TEST(causal_softmax)
+DECLARE_INFINIOP_TEST(rearrange)
+DECLARE_INFINIOP_TEST(sub)
 #define REGISTER_INFINIOP_TEST(name)                      \
    {                                                     \
@@ -31,16 +32,17 @@ DECLARE_INFINIOP_TEST(swiglu)
 */
 #define TEST_BUILDER_MAPPINGS                  \
    {                                          \
-        REGISTER_INFINIOP_TEST(add)           \
-        REGISTER_INFINIOP_TEST(clip)          \
        REGISTER_INFINIOP_TEST(gemm)           \
-        REGISTER_INFINIOP_TEST(mul)           \
        REGISTER_INFINIOP_TEST(random_sample)  \
-        REGISTER_INFINIOP_TEST(rearrange)     \
+        REGISTER_INFINIOP_TEST(add)            \
-        REGISTER_INFINIOP_TEST(rms_norm)      \
+        REGISTER_INFINIOP_TEST(mul)            \
+        REGISTER_INFINIOP_TEST(clip)           \
+        REGISTER_INFINIOP_TEST(swiglu)         \
        REGISTER_INFINIOP_TEST(rope)           \
+        REGISTER_INFINIOP_TEST(rms_norm)       \
+        REGISTER_INFINIOP_TEST(causal_softmax) \
+        REGISTER_INFINIOP_TEST(rearrange)      \
        REGISTER_INFINIOP_TEST(sub)            \
-        REGISTER_INFINIOP_TEST(swiglu)        \
    }
 namespace infiniop_test {

--- a/src/infiniop-test/src/ops/causal_softmax.cpp
+++ b/src/infiniop-test/src/ops/causal_softmax.cpp
+#include "ops.hpp"
+#include "utils.hpp"
+#include <infinirt.h>
+#include <iomanip>
+#include <iostream>
+namespace infiniop_test::causal_softmax {
+struct Test::Attributes {
+    std::shared_ptr<Tensor> x;
+    std::shared_ptr<Tensor> y;
+    std::shared_ptr<Tensor> ans;
+};
+std::shared_ptr<Test> Test::build(
+    std::unordered_map<std::string, std::vector<uint8_t>> attributes,
+    std::unordered_map<std::string, std::shared_ptr<Tensor>> tensors,
+    double rtol, double atol) {
+    auto test = std::shared_ptr<Test>(new Test(rtol, atol));
+    test->_attributes = new Attributes();
+    if (tensors.find("x") == tensors.end()
+        || tensors.find("y") == tensors.end()
+        || tensors.find("ans") == tensors.end()) {
+        throw std::runtime_error("Invalid Test");
+    }
+    test->_attributes->x = tensors["x"];
+    test->_attributes->y = tensors["y"];
+    test->_attributes->ans = tensors["ans"];
+    return test;
+}
+std::shared_ptr<infiniop_test::Result> Test::run(
+    infiniopHandle_t handle, infiniDevice_t device, int device_id, size_t warm_ups, size_t iterations) {
+    infiniopCausalSoftmaxDescriptor_t op_desc;
+    auto y = _attributes->y->to(device, device_id);
+    auto x = _attributes->x->to(device, device_id);
+    auto ans = _attributes->ans->to(device, device_id);
+    CHECK_OR(infiniopCreateCausalSoftmaxDescriptor(handle, &op_desc,
+                                                   y->desc(),
+                                                   x->desc()),
+             return TEST_FAILED(OP_CREATION_FAILED, "Failed to create op descriptor."));
+    size_t workspace_size;
+    CHECK_OR(infiniopGetCausalSoftmaxWorkspaceSize(op_desc, &workspace_size),
+             return TEST_FAILED(OP_CREATION_FAILED, "Failed to get workspace size."));
+    void *workspace;
+    CHECK_OR(infinirtMalloc(&workspace, workspace_size),
+             return TEST_FAILED(OP_CREATION_FAILED, "Failed to allocate workspace."));
+    CHECK_OR(infiniopCausalSoftmax(op_desc, workspace, workspace_size,
+                                   y->data(),
+                                   x->data(),
+                                   nullptr),
+             return TEST_FAILED(OP_EXECUTION_FAILED, "Failed during execution."));
+    try {
+        allClose(y, _attributes->ans, _rtol, _atol);
+    } catch (const std::exception &e) {
+        return TEST_FAILED(RESULT_INCORRECT, e.what());
+    }
+    double elapsed_time = 0.;
+    elapsed_time = benchmark(
+        [=]() {
+            infiniopCausalSoftmax(
+                op_desc, workspace, workspace_size,
+                y->data(),
+                x->data(),
+                nullptr);
+        },
+        warm_ups, iterations);
+    return TEST_PASSED(elapsed_time);
+}
+std::vector<std::string> Test::attribute_names() {
+    return {};
+}
+std::vector<std::string> Test::tensor_names() {
+    return {"x", "y", "ans"};
+}
+std::vector<std::string> Test::output_names() {
+    return {"y"};
+}
+std::string Test::toString() const {
+    std::ostringstream oss;
+    oss << op_name() << std::endl;
+    oss << "- x: " << _attributes->x->info() << std::endl;
+    oss << "- y: " << _attributes->y->info() << std::endl;
+    oss << "- ans: " << _attributes->ans->info() << std::endl;
+    oss << std::scientific << std::setprecision(2);
+    oss << "- rtol=" << _rtol << ", atol=" << _atol << std::endl;
+    return oss.str();
+}
+Test::~Test() {
+    delete _attributes;
+}
+} // namespace infiniop_test::causal_softmax
--- a/test/infiniop-test/README.md
+++ b/test/infiniop-test/README.md
@@ -65,3 +65,11 @@ Name: test.0.ans, NDims: 2, Shape: [6, 4], DataType: F64, DataOffset: 320
 - `Meta` 中必须包含 `test_count` ，表示测例数量。
 - 每个测例的 `Meta` 和 `Tensor` 名字以 `test.[id].` 开头，后接具体信息名称。数字 `[id]` 表示测例编号。编号必须为 0 到 test_count-1.
 - `Tensor` 名字接 `.strides` 表示步长，若没有则默认为连续。
+### GGUF测例构建要求
+不参与计算的 `Tensor` 不应存储数据，避免 `GGUF` 文件中出现冗余内容。
+此类 `Tensor` 应使用 `np.empty(tuple(0 for _ in shape), dtype=dtype)` 构造其数据字段,  且 `GGUF` 需存储此张量的形状数据 `.shape`、步长数据 `.strides`，否则无法成功构建，可使用 `contiguous_gguf_strides(shape)` 计算步长数据。
+对于 `Elementwise` 算子，需包含零步长（zero-stride）测试。对于步长为0的张量，`GGUF` 不应存储冗余广播数据，可使用 `process_zero_stride_tensor`进行冗余数据移除，同时必须在 `GGUF` 中提供此张量的实际形状数据 `.shape`，否则无法成功构建。
\ No newline at end of file
--- a/test/infiniop-test/test_generate/testcases/causal_softmax.py
+++ b/test/infiniop-test/test_generate/testcases/causal_softmax.py
+from ast import List
+import numpy as np
+import gguf
+from typing import List
+from enum import Enum, auto
+from .. import InfiniopTestWriter, InfiniopTestCase, np_dtype_to_ggml, gguf_strides, contiguous_gguf_strides
+def causal_softmax(x):
+    if not isinstance(x, np.ndarray):
+        raise TypeError("Input must be a NumPy array.")
+    mask = np.tril(np.ones_like(x), k=-1)
+    mask = np.flip(mask, axis=(-2, -1))
+    masked = np.where(mask == 1, -np.inf, x)
+    exp_values = np.exp(masked - np.max(masked, axis=-1, keepdims=True))
+    softmax_result = exp_values / np.sum(exp_values, axis=-1, keepdims=True)
+    return softmax_result
+def random_tensor(shape, dtype):
+    rate = 1e-3
+    var = 0.5 * rate  # 数值范围在[-5e-4, 5e-4]
+    return rate * np.random.rand(*shape).astype(dtype) - var
+class CausalSoftmaxTestCase(InfiniopTestCase):
+    def __init__(
+        self,
+        x: np.ndarray,
+        y: np.ndarray,
+        shape_x: List[int] | None,
+        shape_y: List[int] | None,
+        stride_x: List[int] | None,
+        stride_y: List[int] | None,
+    ):
+        super().__init__("causal_softmax")
+        self.x = x
+        self.y = y
+        self.shape_x=shape_x
+        self.shape_y=shape_y
+        self.stride_x = stride_x
+        self.stride_y = stride_y
+    def write_test(self, test_writer: "InfiniopTestWriter"):
+        super().write_test(test_writer)
+        if self.shape_x is not None:
+            test_writer.add_array(test_writer.gguf_key("x.shape"), self.shape_x)
+        if self.shape_y is not None:
+            test_writer.add_array(test_writer.gguf_key("y.shape"), self.shape_y)
+        if self.stride_x is not None:
+            test_writer.add_array(test_writer.gguf_key("x.strides"), gguf_strides(*self.stride_x))
+        test_writer.add_array(
+            test_writer.gguf_key("y.strides"),
+            gguf_strides(*self.stride_y if self.stride_y is not None else contiguous_gguf_strides(self.shape_y))
+        )
+        test_writer.add_tensor(
+            test_writer.gguf_key("x"),
+            self.x,
+            raw_dtype=np_dtype_to_ggml(self.x.dtype),
+        )
+        test_writer.add_tensor(
+            test_writer.gguf_key("y"),
+            self.y,
+            raw_dtype=np_dtype_to_ggml(self.y.dtype),
+        )
+        ans = causal_softmax(
+            self.x.astype(np.float64),
+        )
+        test_writer.add_tensor(
+            test_writer.gguf_key("ans"), ans, raw_dtype=gguf.GGMLQuantizationType.F64
+        )
+if __name__ == "__main__":
+    test_writer = InfiniopTestWriter("causal_softmax.gguf")
+    test_cases = []
+    # ==============================================================================
+    #  Configuration
+    # ==============================================================================
+    # These are not meant to be imported from other modules
+    _TEST_CASES_ = [
+        ((3, 3), None, None),
+        ((32, 512), None, None),
+        ((32, 512), (1024, 1), (1024, 1)),
+        ((32, 5, 5), None, None),
+        ((32, 20, 512), None, None),
+        ((32, 20, 512), (20480, 512, 1), None),
+    ]
+    _TENSOR_DTYPES_ = [np.float16, np.float32]
+    for dtype in _TENSOR_DTYPES_:
+        for shape, stride_x, stride_y in _TEST_CASES_:
+            x = random_tensor(shape, dtype)
+            y = np.empty(tuple(0 for _ in shape), dtype=dtype)
+            test_case = CausalSoftmaxTestCase(
+                x,
+                y,
+                shape,
+                shape,
+                stride_x,
+                stride_y,
+            )
+            test_cases.append(test_case)
+    test_writer.add_tests(test_cases)
+    test_writer.save()
--- a/test/infiniop/causal_softmax.py
+++ b/test/infiniop/causal_softmax.py
@@ -34,7 +34,7 @@ _TEST_CASES_ = [
 ]
 # Data types used for testing
-_TENSOR_DTYPES = [torch.float16]
+_TENSOR_DTYPES = [torch.float16, torch.float32]
 # Tolerance map for different data types
 _TOLERANCE_MAP = {