Merge pull request #231 from InfiniTensor/issue/204

Issue/228: infiniop-test框架支持0步长

Merge pull request #231 from InfiniTensor/issue/204
Issue/228: infiniop-test框架支持0步长
c203635b · PanZezhong1725 · GitHub · 6ca0e313 · ce2c4813 · c203635b
Unverified Commit c203635b authored Jun 03, 2025 by PanZezhong1725 Committed by GitHub Jun 03, 2025
16 changed files
--- a/src/infiniop-test/include/ops.hpp
+++ b/src/infiniop-test/include/ops.hpp
@@ -10,6 +10,7 @@ DECLARE_INFINIOP_TEST(random_sample)
 DECLARE_INFINIOP_TEST(mul)
 DECLARE_INFINIOP_TEST(clip)
 DECLARE_INFINIOP_TEST(swiglu)
+DECLARE_INFINIOP_TEST(add)

 #define REGISTER_INFINIOP_TEST(name)                      \
    {                                                     \
@@ -18,6 +19,7 @@ DECLARE_INFINIOP_TEST(swiglu)
            infiniop_test::name::Test::build,             \
            infiniop_test::name::Test::attribute_names(), \
            infiniop_test::name::Test::tensor_names(),    \
+            infiniop_test::name::Test::output_names(),    \
        }},

 /*
@@ -27,6 +29,7 @@ DECLARE_INFINIOP_TEST(swiglu)
    {                                         \
        REGISTER_INFINIOP_TEST(gemm)          \
        REGISTER_INFINIOP_TEST(random_sample) \
+        REGISTER_INFINIOP_TEST(add)           \
        REGISTER_INFINIOP_TEST(mul)           \
        REGISTER_INFINIOP_TEST(clip)          \
        REGISTER_INFINIOP_TEST(swiglu)        \

--- a/src/infiniop-test/include/tensor.hpp
+++ b/src/infiniop-test/include/tensor.hpp
@@ -58,7 +58,9 @@ private:
 public:
    Tensor(const GGUFTensorInfo *info,
           const void *ggml_ptr,
-           const GGUFKeyValue *strides_meta = nullptr);
+           const GGUFKeyValue *shape_meta = nullptr,
+           const GGUFKeyValue *strides_meta = nullptr,
+           bool isOutput = false);
    Tensor(std::shared_ptr<Memory> memory, size_t offset,
           const std::vector<size_t> &shape,
           const std::vector<ptrdiff_t> &strides,

--- a/src/infiniop-test/include/test.hpp
+++ b/src/infiniop-test/include/test.hpp
@@ -92,6 +92,7 @@ public:
                                                                              \
        static std::vector<std::string> attribute_names();                    \
        static std::vector<std::string> tensor_names();                       \
+        static std::vector<std::string> output_names();                       \
                                                                              \
        std::shared_ptr<infiniop_test::Result> run(                           \
            infiniopHandle_t handle, infiniDevice_t device, int device_id,    \
@@ -121,6 +122,7 @@ struct TestBuilder {
    BuilderFunc build;
    std::vector<std::string> attribute_names;
    std::vector<std::string> tensor_names;
+    std::vector<std::string> output_names;
 };
 } // namespace infiniop_test


--- a/src/infiniop-test/src/ops/add.cpp
+++ b/src/infiniop-test/src/ops/add.cpp
+#include "ops.hpp"
+#include "utils.hpp"
+#include <infinirt.h>
+#include <iomanip>
+#include <iostream>
+
+namespace infiniop_test::add {
+struct Test::Attributes {
+    std::shared_ptr<Tensor> a;
+    std::shared_ptr<Tensor> b;
+    std::shared_ptr<Tensor> c;
+    std::shared_ptr<Tensor> ans;
+};
+
+std::shared_ptr<Test> Test::build(
+    std::unordered_map<std::string, std::vector<uint8_t>> attributes,
+    std::unordered_map<std::string, std::shared_ptr<Tensor>> tensors,
+    double rtol, double atol) {
+    auto test = std::shared_ptr<Test>(new Test(rtol, atol));
+    test->_attributes = new Attributes();
+    if (tensors.find("a") == tensors.end()
+        || tensors.find("b") == tensors.end()
+        || tensors.find("c") == tensors.end()
+        || tensors.find("ans") == tensors.end()) {
+        throw std::runtime_error("Invalid Test");
+    }
+
+    test->_attributes->a = tensors["a"];
+    test->_attributes->b = tensors["b"];
+    test->_attributes->c = tensors["c"];
+    test->_attributes->ans = tensors["ans"];
+
+    return test;
+}
+
+std::shared_ptr<infiniop_test::Result> Test::run(
+    infiniopHandle_t handle, infiniDevice_t device, int device_id, size_t warm_ups, size_t iterations) {
+    infiniopAddDescriptor_t op_desc;
+    auto a = _attributes->a->to(device, device_id);
+    auto b = _attributes->b->to(device, device_id);
+    auto c = _attributes->c->to(device, device_id);
+    CHECK_OR(infiniopCreateAddDescriptor(handle, &op_desc,
+                                         c->desc(),
+                                         a->desc(),
+                                         b->desc()),
+             return TEST_FAILED(OP_CREATION_FAILED, "Failed to create op descriptor."));
+    size_t workspace_size;
+    CHECK_OR(infiniopGetAddWorkspaceSize(op_desc, &workspace_size),
+             return TEST_FAILED(OP_CREATION_FAILED, "Failed to get workspace size."));
+    void *workspace;
+    CHECK_OR(infinirtMalloc(&workspace, workspace_size),
+             return TEST_FAILED(OP_CREATION_FAILED, "Failed to allocate workspace."));
+    CHECK_OR(infiniopAdd(op_desc, workspace, workspace_size,
+                         c->data(),
+                         a->data(),
+                         b->data(),
+                         nullptr),
+             return TEST_FAILED(OP_EXECUTION_FAILED, "Failed during execution."));
+
+    try {
+        allClose(c, _attributes->ans, _rtol, _atol);
+    } catch (const std::exception &e) {
+        return TEST_FAILED(RESULT_INCORRECT, e.what());
+    }
+
+    double elapsed_time = 0.;
+
+    elapsed_time = benchmark(
+        [=]() {
+            infiniopAdd(
+                op_desc, workspace, workspace_size,
+                c->data(),
+                a->data(),
+                b->data(),
+                nullptr);
+        },
+        warm_ups, iterations);
+
+    return TEST_PASSED(elapsed_time);
+}
+
+std::vector<std::string> Test::attribute_names() {
+    return {};
+}
+
+std::vector<std::string> Test::tensor_names() {
+    return {"a", "b", "c", "ans"};
+}
+
+std::vector<std::string> Test::output_names() {
+    return {"c"};
+}
+
+std::string Test::toString() const {
+    std::ostringstream oss;
+    oss << op_name() << std::endl;
+    oss << "- a: " << _attributes->a->info() << std::endl;
+    oss << "- b: " << _attributes->b->info() << std::endl;
+    oss << "- c: " << _attributes->c->info() << std::endl;
+    oss << std::scientific << std::setprecision(2);
+    oss << "- rtol=" << _rtol << ", atol=" << _atol << std::endl;
+    return oss.str();
+}
+
+Test::~Test() {
+    delete _attributes;
+}
+
+} // namespace infiniop_test::add
--- a/src/infiniop-test/src/ops/clip.cpp
+++ b/src/infiniop-test/src/ops/clip.cpp
@@ -97,6 +97,10 @@ std::vector<std::string> Test::tensor_names() {
    return {"x", "min_val", "max_val", "y", "ans"};
 }

+std::vector<std::string> Test::output_names() {
+    return {"y"};
+}
+
 std::string Test::toString() const {
    std::ostringstream oss;
    oss << op_name() << std::endl;

--- a/src/infiniop-test/src/ops/gemm.cpp
+++ b/src/infiniop-test/src/ops/gemm.cpp
@@ -113,6 +113,10 @@ std::vector<std::string> Test::tensor_names() {
    return {"a", "b", "c", "ans"};
 }

+std::vector<std::string> Test::output_names() {
+    return {};
+}
+
 std::string Test::toString() const {
    std::ostringstream oss;
    oss << op_name() << std::endl;

--- a/src/infiniop-test/src/ops/mul.cpp
+++ b/src/infiniop-test/src/ops/mul.cpp
@@ -87,6 +87,10 @@ std::vector<std::string> Test::tensor_names() {
    return {"a", "b", "c", "ans"};
 }

+std::vector<std::string> Test::output_names() {
+    return {"c"};
+}
+
 std::string Test::toString() const {
    std::ostringstream oss;
    oss << op_name() << std::endl;

--- a/src/infiniop-test/src/ops/random_sample.cpp
+++ b/src/infiniop-test/src/ops/random_sample.cpp
@@ -109,6 +109,10 @@ std::vector<std::string> Test::tensor_names() {
    return {"data", "ans", "result"};
 }

+std::vector<std::string> Test::output_names() {
+    return {"result"};
+}
+
 std::string Test::toString() const {
    std::ostringstream oss;
    oss << op_name() << std::endl;

--- a/src/infiniop-test/src/ops/swiglu.cpp
+++ b/src/infiniop-test/src/ops/swiglu.cpp
@@ -82,6 +82,10 @@ std::vector<std::string> Test::tensor_names() {
    return {"a", "b", "c", "ans"};
 }

+std::vector<std::string> Test::output_names() {
+    return {"c"};
+}
+
 std::string Test::toString() const {
    std::ostringstream oss;
    oss << op_name() << std::endl;

--- a/src/infiniop-test/src/tensor.cpp
+++ b/src/infiniop-test/src/tensor.cpp
@@ -98,20 +98,28 @@ void *Tensor::data() const {

 Tensor::Tensor(const GGUFTensorInfo *info,
               const void *ggml_ptr,
-               const GGUFKeyValue *strides_meta) {
+               const GGUFKeyValue *shape_meta,
+               const GGUFKeyValue *strides_meta,
+               bool isOutput) {
+
    _ggml_type = info->ggml_type;
    _offset = 0;
    size_t ndim = static_cast<size_t>(info->ndim);
+    // `_shape`存储真实的tensor形状（来自shape_meta），`temp_shape`存储用于rearrange和计算内存的tensor形状
    _shape = std::vector<size_t>(ndim);
+    std::vector<size_t> temp_shape(ndim);
    _strides = std::vector<ptrdiff_t>(ndim);
    std::vector<ptrdiff_t> contiguous_strides(ndim);
    for (size_t i = 0; i < ndim; i++) {
-        _shape[i] = static_cast<size_t>(info->shape[ndim - 1 - i]);
+        temp_shape[i] = static_cast<size_t>(info->shape[ndim - 1 - i]);
        if (i == 0) {
            contiguous_strides[ndim - 1] = (ptrdiff_t)1;
        } else {
            contiguous_strides[ndim - 1 - i] = (ptrdiff_t)info->shape[i - 1] * contiguous_strides[ndim - i];
        }
+        if (isOutput) {
+            contiguous_strides[i] = (ptrdiff_t)0;
+        }
    }

    if (strides_meta == nullptr) {
@@ -120,7 +128,6 @@ Tensor::Tensor(const GGUFTensorInfo *info,
        }
    } else {
        for (size_t i = 0; i < ndim; i++) {
-            _shape[i] = static_cast<size_t>(info->shape[ndim - 1 - i]);
            if (strides_meta->gguf_type == GGUF_TYPE_INT64) {
                _strides[i] = (ptrdiff_t)(reinterpret_cast<const int64_t *>(
                    strides_meta->value.data())[ndim - 1 - i]);
@@ -133,18 +140,62 @@ Tensor::Tensor(const GGUFTensorInfo *info,
        }
    }

-    infiniopCreateTensorDescriptor(&_desc, ndim, _shape.data(), _strides.data(), ggmlTypeToInfiniType(_ggml_type));
+    if (isOutput) {
+        if (shape_meta == nullptr) {
+            throw std::runtime_error("Error Creating Tensor: shape_meta cannot be null for output tensor");
+        }
+        for (size_t i = 0; i < ndim; i++) {
+            if (shape_meta->gguf_type == GGUF_TYPE_INT64) {
+                int64_t val = reinterpret_cast<const int64_t *>(shape_meta->value.data())[i];
+                if (val < 0) {
+                    throw std::runtime_error("Shape must be non-negative");
+                }
+                temp_shape[i] = static_cast<size_t>(val);
+            } else if (shape_meta->gguf_type == GGUF_TYPE_INT32) {
+                int32_t val = reinterpret_cast<const int32_t *>(shape_meta->value.data())[i];
+                if (val < 0) {
+                    throw std::runtime_error("Shape must be non-negative");
+                }
+                temp_shape[i] = static_cast<size_t>(val);
+            } else {
+                throw std::runtime_error("Error Creating Tensor: Unsupported shape type");
+            }
+        }
+    }
+    infiniopCreateTensorDescriptor(&_desc, ndim, temp_shape.data(), _strides.data(), ggmlTypeToInfiniType(_ggml_type));
    size_t size;
-    calculateTensorMemory(size, _offset, _shape, _strides, ggmlTypeSize(_ggml_type));
+    calculateTensorMemory(size, _offset, temp_shape, _strides, ggmlTypeSize(_ggml_type));
    _memory = std::make_shared<Memory>(size, INFINI_DEVICE_CPU, 0);
    utils::rearrange(
        (char *)_memory->ptr() + _offset,
        (char *)ggml_ptr + info->data_offset,
-        _shape.data(),
+        temp_shape.data(),
        _strides.data(),
        contiguous_strides.data(),
        ndim,
        ggmlTypeSize(_ggml_type));
+
+    if (shape_meta == nullptr) {
+        _shape = temp_shape;
+    } else {
+        for (size_t i = 0; i < ndim; i++) {
+            if (shape_meta->gguf_type == GGUF_TYPE_INT64) {
+                int64_t val = reinterpret_cast<const int64_t *>(shape_meta->value.data())[i];
+                if (val < 0) {
+                    throw std::runtime_error("Shape must be non-negative");
+                }
+                _shape[i] = static_cast<size_t>(val);
+            } else if (shape_meta->gguf_type == GGUF_TYPE_INT32) {
+                int32_t val = reinterpret_cast<const int32_t *>(shape_meta->value.data())[i];
+                if (val < 0) {
+                    throw std::runtime_error("Shape must be non-negative");
+                }
+                _shape[i] = static_cast<size_t>(val);
+            } else {
+                throw std::runtime_error("Error Creating Tensor: Unsupported shape type");
+            }
+        }
+    }
 }

 Tensor::Tensor(std::shared_ptr<Memory> memory, size_t offset,

--- a/src/infiniop-test/src/test.cpp
+++ b/src/infiniop-test/src/test.cpp
@@ -90,14 +90,19 @@ std::shared_ptr<Result> runTest(const GGUFFileReader &gguf_reader,
                attrs[attr_name] = attr->second->value;
            }
        }
+
        for (auto tensor_name : builder.tensor_names) {
            auto info = tensor_info.find("test." + std::to_string(test_id) + "." + tensor_name);
            if (info != tensor_info.end()) {
+                auto shape = meta.find("test." + std::to_string(test_id) + "." + tensor_name + ".shape");
                auto strides = meta.find("test." + std::to_string(test_id) + "." + tensor_name + ".strides");
+                bool is_output = std::find(builder.output_names.begin(), builder.output_names.end(), tensor_name) != builder.output_names.end();
                tensors[tensor_name] = std::make_shared<Tensor>(
                    info->second.get(),
                    gguf_reader.getGgmlStart(),
-                    strides != meta.end() ? strides->second.get() : nullptr);
+                    shape != meta.end() ? shape->second.get() : nullptr,
+                    strides != meta.end() ? strides->second.get() : nullptr,
+                    is_output);
            }
        }
        std::shared_ptr<infiniop_test::base::Test> test;

--- a/test/infiniop-test/test_generate/__init__.py
+++ b/test/infiniop-test/test_generate/__init__.py
-from .infiniop_test import InfiniopTestCase, InfiniopTestWriter, np_dtype_to_ggml, gguf_strides
+from .infiniop_test import InfiniopTestCase, InfiniopTestWriter, np_dtype_to_ggml, gguf_strides, contiguous_gguf_strides
--- a/test/infiniop-test/test_generate/infiniop_test.py
+++ b/test/infiniop-test/test_generate/infiniop_test.py
@@ -29,6 +29,14 @@ def gguf_strides(*args: int) -> list[int] | None:
    return list(args)[::-1] if args else None


+def contiguous_gguf_strides(shape: tuple[int, ...]) -> list[int]:
+    strides = []
+    acc = 1
+    for size in reversed(shape):
+        strides.append(acc)
+        acc *= size
+    return strides[::-1]
+
 class InfiniopTestCase:
    op_name: str


--- a/test/infiniop-test/test_generate/testcases/add.py
+++ b/test/infiniop-test/test_generate/testcases/add.py
+from ast import List
+import numpy as np
+import gguf
+from typing import List
+from numpy.lib.stride_tricks import as_strided
+
+from .. import InfiniopTestWriter, InfiniopTestCase, np_dtype_to_ggml, gguf_strides, contiguous_gguf_strides
+
+
+def add(
+    a: np.ndarray,
+    b: np.ndarray,
+):
+    return a + b
+
+def process_tensor(a, b, stride_a=None, stride_b=None):
+    def normalize_stride(tensor, stride):
+        if stride:
+            slices = tuple(slice(0, 1) if s == 0 else slice(None) for s in stride)
+            return tensor[slices]
+        else:
+            return tensor
+
+    a_unique = normalize_stride(a, stride_a)
+    b_unique = normalize_stride(b, stride_b)
+    return a_unique, b_unique
+
+class AddTestCase(InfiniopTestCase):
+    def __init__(
+        self,
+        a: np.ndarray,
+        shape_a: List[int] | None,
+        stride_a: List[int] | None,
+        b: np.ndarray,
+        shape_b: List[int] | None,
+        stride_b: List[int] | None,
+        c: np.ndarray,
+        shape_c: List[int] | None,
+        stride_c: List[int] | None,
+
+    ):
+        super().__init__("add")
+        self.a = a
+        self.shape_a = shape_a
+        self.stride_a = stride_a
+        self.b = b
+        self.shape_b = shape_b
+        self.stride_b = stride_b
+        self.c = c
+        self.shape_c = shape_c
+        self.stride_c = stride_c
+
+
+    def write_test(self, test_writer: "InfiniopTestWriter"):
+        super().write_test(test_writer)
+        if self.shape_a is not None:
+            test_writer.add_array(test_writer.gguf_key("a.shape"), self.shape_a)
+        if self.shape_b is not None:
+            test_writer.add_array(test_writer.gguf_key("b.shape"), self.shape_b)
+        if self.shape_c is not None:
+            test_writer.add_array(test_writer.gguf_key("c.shape"), self.shape_c)
+        if self.stride_a is not None:
+            test_writer.add_array(test_writer.gguf_key("a.strides"), gguf_strides(*self.stride_a))
+        if self.stride_b is not None:
+            test_writer.add_array(test_writer.gguf_key("b.strides"), gguf_strides(*self.stride_b))
+        test_writer.add_array(
+            test_writer.gguf_key("c.strides"),
+            gguf_strides(*self.stride_c if self.stride_c is not None else contiguous_gguf_strides(self.shape_c))
+        )
+        test_writer.add_tensor(
+            test_writer.gguf_key("a"), self.a, raw_dtype=np_dtype_to_ggml(self.a.dtype)
+        )
+        test_writer.add_tensor(
+            test_writer.gguf_key("b"), self.b, raw_dtype=np_dtype_to_ggml(self.b.dtype)
+        )
+        test_writer.add_tensor(
+            test_writer.gguf_key("c"), self.c, raw_dtype=np_dtype_to_ggml(self.c.dtype)
+        )
+        ans = add(
+            self.a.astype(np.float64),
+            self.b.astype(np.float64),
+        )
+        test_writer.add_tensor(
+            test_writer.gguf_key("ans"), ans, raw_dtype=gguf.GGMLQuantizationType.F64
+        )
+
+
+if __name__ == "__main__":
+    test_writer = InfiniopTestWriter("add.gguf")
+    test_cases = []
+    # ==============================================================================
+    #  Configuration (Internal Use Only)
+    # ==============================================================================
+    # These are not meant to be imported from other modules
+    _TEST_CASES_ = [
+        # shape, a_stride, b_stride, c_stride
+        ((13, 4), None, None, None),
+        ((13, 4), (10, 1), (10, 1), (10, 1)),
+        ((13, 4), (0, 1), None, None),
+        ((13, 4, 4), None, None, None),
+        ((13, 4, 4), (20, 4, 1), (20, 4, 1), (20, 4, 1)),
+        ((13, 4, 4), (4, 0, 1), (0, 4, 1), None),
+        ((16, 5632), None, None, None),
+        ((16, 5632), (13312, 1), (13312, 1), (13312, 1)),
+        ((4, 4, 5632), None, None, None),
+        ((4, 4, 5632), (45056, 5632, 1), (45056, 5632, 1), (45056, 5632, 1)),
+    ]
+    _TENSOR_DTYPES_ = [np.float32, np.float16]
+    for dtype in _TENSOR_DTYPES_:
+        for shape, stride_a, stride_b, stride_c in _TEST_CASES_:
+            a = np.random.rand(*shape).astype(dtype)
+            b = np.random.rand(*shape).astype(dtype)
+            c = np.empty(tuple(0 for _ in shape), dtype=dtype)
+            a, b = process_tensor(a, b, stride_a, stride_b)
+            if stride_c is None:
+                stride_c = contiguous_gguf_strides(shape)
+            test_case = AddTestCase(
+                a=a,
+                shape_a=shape,
+                stride_a=stride_a,
+                b=b,
+                shape_b=shape,
+                stride_b=stride_b,
+                c=c,
+                shape_c=shape,
+                stride_c=stride_c,
+            )
+            test_cases.append(test_case)
+            
+    test_writer.add_tests(test_cases)
+    test_writer.save()
+    
\ No newline at end of file
--- a/test/infiniop-test/test_generate/testcases/mul.py
+++ b/test/infiniop-test/test_generate/testcases/mul.py
@@ -2,7 +2,7 @@ import numpy as np
 import gguf
 from typing import List

-from .. import InfiniopTestWriter, InfiniopTestCase, np_dtype_to_ggml, gguf_strides
+from .. import InfiniopTestWriter, InfiniopTestCase, np_dtype_to_ggml, gguf_strides, contiguous_gguf_strides

 def mul(
    a: np.ndarray,
@@ -19,28 +19,44 @@ class MulTestCase(InfiniopTestCase):
    def __init__(
        self,
        a: np.ndarray,
+        shape_a: List[int] | None,        
        stride_a: List[int] | None,
        b: np.ndarray,
+        shape_b: List[int] | None,       
        stride_b: List[int] | None,
        c: np.ndarray,
+        shape_c: List[int] | None,    
        stride_c: List[int] | None,
    ):
        super().__init__("mul")
        self.a = a
+        self.shape_a = shape_a
        self.stride_a = stride_a
        self.b = b
+        self.shape_b = shape_b
        self.stride_b = stride_b
        self.c = c
+        self.shape_c = shape_c
        self.stride_c = stride_c

+
    def write_test(self, test_writer: "InfiniopTestWriter"):
        super().write_test(test_writer)
+        if self.shape_a is not None:
+            test_writer.add_array(test_writer.gguf_key("a.shape"), self.shape_a)
+        if self.shape_b is not None:
+            test_writer.add_array(test_writer.gguf_key("b.shape"), self.shape_b)
+        if self.shape_c is not None:
+            test_writer.add_array(test_writer.gguf_key("c.shape"), self.shape_c)
        if self.stride_a is not None:
-            test_writer.add_array(test_writer.gguf_key("a.strides"), self.stride_a)
+            test_writer.add_array(test_writer.gguf_key("a.strides"), gguf_strides(*self.stride_a))
        if self.stride_b is not None:
-            test_writer.add_array(test_writer.gguf_key("b.strides"), self.stride_b)
-        if self.stride_c is not None:
-            test_writer.add_array(test_writer.gguf_key("c.strides"), self.stride_c)
+            test_writer.add_array(test_writer.gguf_key("b.strides"), gguf_strides(*self.stride_b))
+        test_writer.add_array(
+            test_writer.gguf_key("c.strides"),
+            gguf_strides(*self.stride_c if self.stride_c is not None else contiguous_gguf_strides(self.shape_c))
+        )
+
        test_writer.add_tensor(
            test_writer.gguf_key("a"), self.a, raw_dtype=np_dtype_to_ggml(self.a.dtype)
        )
@@ -52,6 +68,7 @@ class MulTestCase(InfiniopTestCase):
        )
        a_fp64 = self.a.astype(np.float64)
        b_fp64 = self.b.astype(np.float64)
+        
        ans_fp64 = np.multiply(a_fp64, b_fp64)
        ans = mul(self.a, self.b)
        test_writer.add_tensor(
@@ -65,95 +82,43 @@ class MulTestCase(InfiniopTestCase):

 if __name__ == '__main__':
    test_writer = InfiniopTestWriter("mul.gguf")
-    test_cases = [
-        MulTestCase(
-            random_tensor((2, 3), np.float32),
-            gguf_strides(3, 1),  
-            random_tensor((2, 3), np.float32),
-            gguf_strides(1, 2),  
-            random_tensor((2, 3), np.float32),
-            gguf_strides(3, 1),  
-        ),
-        MulTestCase(
-            random_tensor((2, 3), np.float16),
-            gguf_strides(1, 2),  
-            random_tensor((2, 3), np.float16),
-            gguf_strides(3, 1), 
-            random_tensor((2, 3), np.float16),
-            gguf_strides(1, 2),  
-        ),
-        MulTestCase(
-            random_tensor((2, 3), np.float64),
-            gguf_strides(3, 1),  
-            random_tensor((2, 3), np.float64),
-            gguf_strides(3, 1),  
-            random_tensor((2, 3), np.float64),
-            gguf_strides(1, 2),  
-        ),
-        MulTestCase(
-            random_tensor((4, 6), np.float16),
-            gguf_strides(1, 4),  
-            random_tensor((4, 6), np.float16),
-            gguf_strides(1, 5),  
-            random_tensor((4, 6), np.float16),
-            gguf_strides(6, 1),  
-        ),
-        MulTestCase(
-            random_tensor((1, 2048), np.float16),
-            gguf_strides(1, 1),  
-            random_tensor((1, 2048), np.float16),
-            gguf_strides(2048, 1),  
-            random_tensor((1, 2048), np.float16),
-            gguf_strides(1, 1),  
-        ),
-        MulTestCase(
-            random_tensor((2048, 2048), np.float32),
-            None,  
-            random_tensor((2048, 2048), np.float32),
-            gguf_strides(1, 2048),  
-            random_tensor((2048, 2048), np.float32),
-            None,  
-        ),
-        MulTestCase(
-            random_tensor((2, 4, 2048), np.float16),
-            gguf_strides(4 * 2048, 2048, 1),  
-            random_tensor((2, 4, 2048), np.float16),
-            gguf_strides(1, 2, 2 * 4),  
-            random_tensor((2, 4, 2048), np.float16),
-            gguf_strides(4 * 2048, 2048, 1),  
-        ),
-        MulTestCase(
-            random_tensor((2, 4, 2048), np.float32),
-            gguf_strides(1, 2, 2 * 4),  
-            random_tensor((2, 4, 2048), np.float32),
-            None,  
-            random_tensor((2, 4, 2048), np.float32),
-            gguf_strides(1, 2, 2 * 4),  
-        ),
-        MulTestCase(
-            random_tensor((2048, 2560), np.float32),
-            gguf_strides(2560, 1),  
-            random_tensor((2048, 2560), np.float32),
-            gguf_strides(1, 2048),  
-            random_tensor((2048, 2560), np.float32),
-            gguf_strides(2560, 1),  
-        ),
-        MulTestCase(
-            random_tensor((4, 48, 64), np.float16),
-            gguf_strides(64 * 48, 64, 1),  
-            random_tensor((4, 48, 64), np.float16),
-            gguf_strides(1, 4, 4 * 48),  
-            random_tensor((4, 48, 64), np.float16),
-            None  
-        ),
-        MulTestCase(
-            random_tensor((4, 48, 64), np.float32),
-            None,  
-            random_tensor((4, 48, 64), np.float32),
-            gguf_strides(1, 4, 4 * 48),  
-            random_tensor((4, 48, 64), np.float32),
-            gguf_strides(48 * 64, 64, 1),  
-        )
-    ]
+    test_cases = []
+
+    _TEST_CASES_ = [
+        ((2, 3), (3, 1), (1, 2), (3, 1)),
+        ((2, 3), (1, 2), (3, 1), (1, 2)),
+        ((2, 3), (3, 1), (3, 1), (1, 2)),
+        ((4, 6), (1, 4), (1, 5), (6, 1)),
+        ((1, 2048), (1, 1), (2048, 1), (1, 1)),
+        ((2048, 2048), None, (1, 2048), None),
+        ((2, 4, 2048), (4 * 2048, 2048, 1), (1, 2, 8), (4 * 2048, 2048, 1)),
+        ((2, 4, 2048), (1, 2, 8), None, (1, 2, 8)),
+        ((2048, 2560), (2560, 1), (1, 2048), (2560, 1)),
+        ((4, 48, 64), (64 * 48, 64, 1), (1, 4, 192), None),
+        ((4, 48, 64), None, (1, 4, 192), (48 * 64, 64, 1)),
+    ]   
+    _TENSOR_DTYPES_ = [np.float32, np.float16]
+    
+    for dtype in _TENSOR_DTYPES_:
+        for shape, stride_a, stride_b, stride_c in _TEST_CASES_:
+            a = random_tensor(shape, dtype)
+            b = random_tensor(shape, dtype)
+            c = np.empty(tuple(0 for _ in shape), dtype=dtype)
+
+                
+            test_cases.append(
+                MulTestCase(
+                    a=a,
+                    shape_a=shape,
+                    stride_a=stride_a,
+                    b=b,
+                    shape_b=shape,
+                    stride_b=stride_b,
+                    c=c,
+                    shape_c=shape,
+                    stride_c=stride_c,
+                )
+            )   
+    
    test_writer.add_tests(test_cases)
    test_writer.save()
--- a/test/infiniop-test/test_generate/testcases/swiglu.py
+++ b/test/infiniop-test/test_generate/testcases/swiglu.py
@@ -2,7 +2,7 @@ import numpy as np
 import gguf
 from typing import List

-from .. import InfiniopTestWriter, InfiniopTestCase, np_dtype_to_ggml, gguf_strides
+from .. import InfiniopTestWriter, InfiniopTestCase, np_dtype_to_ggml, gguf_strides, contiguous_gguf_strides


 def swiglu(
@@ -18,28 +18,44 @@ class SwiGLUTestCase(InfiniopTestCase):
    def __init__(
        self,
        a: np.ndarray,
+        shape_a: List[int] | None,
        stride_a: List[int] | None,
        b: np.ndarray,
+        shape_b: List[int] | None,
        stride_b: List[int] | None,
        c: np.ndarray,
+        shape_c: List[int] | None,
        stride_c: List[int] | None,
+
    ):
        super().__init__("swiglu")
        self.a = a
+        self.shape_a = shape_a
        self.stride_a = stride_a
        self.b = b
+        self.shape_b = shape_b
        self.stride_b = stride_b
        self.c = c
+        self.shape_c = shape_c
        self.stride_c = stride_c

+
    def write_test(self, test_writer: "InfiniopTestWriter"):
        super().write_test(test_writer)
+        if self.shape_a is not None:
+            test_writer.add_array(test_writer.gguf_key("a.shape"), self.shape_a)
+        if self.shape_b is not None:
+            test_writer.add_array(test_writer.gguf_key("b.shape"), self.shape_b)
+        if self.shape_c is not None:
+            test_writer.add_array(test_writer.gguf_key("c.shape"), self.shape_c)  
        if self.stride_a is not None:
-            test_writer.add_array(test_writer.gguf_key("a.strides"), self.stride_a)
+            test_writer.add_array(test_writer.gguf_key("a.strides"), gguf_strides(*self.stride_a))
        if self.stride_b is not None:
-            test_writer.add_array(test_writer.gguf_key("b.strides"), self.stride_b)
-        if self.stride_c is not None:
-            test_writer.add_array(test_writer.gguf_key("c.strides"), self.stride_c)
+            test_writer.add_array(test_writer.gguf_key("b.strides"), gguf_strides(*self.stride_b))
+        test_writer.add_array(
+            test_writer.gguf_key("c.strides"),
+            gguf_strides(*self.stride_c if self.stride_c is not None else contiguous_gguf_strides(self.shape_c))
+        )
        test_writer.add_tensor(
            test_writer.gguf_key("a"), self.a, raw_dtype=np_dtype_to_ggml(self.a.dtype)
        )
@@ -60,191 +76,42 @@ class SwiGLUTestCase(InfiniopTestCase):

 if __name__ == "__main__":
    test_writer = InfiniopTestWriter("swiglu.gguf")
-    test_cases = [
-        SwiGLUTestCase(
-            np.random.rand(64, 128).astype(np.float32),
-            None,
-            np.random.rand(64, 128).astype(np.float32),
-            None,
-            np.random.rand(64, 128).astype(np.float32),
-            None,
-        ),
-        SwiGLUTestCase(
-            np.random.rand(64, 121).astype(np.float32),
-            None,
-            np.random.rand(64, 121).astype(np.float32),
-            None,
-            np.random.rand(64, 121).astype(np.float32),
-            None,
-        ),
-        SwiGLUTestCase(
-            np.random.rand(15, 512).astype(np.float32),
-            None,
-            np.random.rand(15, 512).astype(np.float32),
-            None,
-            np.random.rand(15, 512).astype(np.float32),
-            None,
-        ),
-        SwiGLUTestCase(
-            np.random.rand(13, 4).astype(np.float32),
-            None,
-            np.random.rand(13, 4).astype(np.float32),
-            None,
-            np.random.rand(13, 4).astype(np.float32),
-            None,
-        ),
-        SwiGLUTestCase(
-            np.random.rand(13, 4).astype(np.float16),
-            None,
-            np.random.rand(13, 4).astype(np.float16),
-            None,
-            np.random.rand(13, 4).astype(np.float16),
-            None,
-        ),
-        SwiGLUTestCase(
-            np.random.rand(13, 4).astype(np.float32),
-            gguf_strides(10, 1),
-            np.random.rand(13, 4).astype(np.float32),
-            gguf_strides(10, 1),
-            np.random.rand(13, 4).astype(np.float32),
-            gguf_strides(10, 1),
-        ),
-        SwiGLUTestCase(
-            np.random.rand(13, 4).astype(np.float16),
-            gguf_strides(10, 1),
-            np.random.rand(13, 4).astype(np.float16),
-            gguf_strides(10, 1),
-            np.random.rand(13, 4).astype(np.float16),
-            gguf_strides(10, 1),
-        ),
-        SwiGLUTestCase(
-            np.random.rand(13, 4, 4).astype(np.float32),
-            None,
-            np.random.rand(13, 4, 4).astype(np.float32),
-            None,
-            np.random.rand(13, 4, 4).astype(np.float32),
-            None,
-        ),
-        SwiGLUTestCase(
-            np.random.rand(13, 4, 4).astype(np.float16),
-            None,
-            np.random.rand(13, 4, 4).astype(np.float16),
-            None,
-            np.random.rand(13, 4, 4).astype(np.float16),
-            None,
-        ),
-        SwiGLUTestCase(
-            np.random.rand(13, 4, 4).astype(np.float32),
-            gguf_strides(20, 4, 1),
-            np.random.rand(13, 4, 4).astype(np.float32),
-            gguf_strides(20, 4, 1),
-            np.random.rand(13, 4, 4).astype(np.float32),
-            gguf_strides(20, 4, 1),
-        ),
-        SwiGLUTestCase(
-            np.random.rand(13, 4, 4).astype(np.float16),
-            gguf_strides(20, 4, 1),
-            np.random.rand(13, 4, 4).astype(np.float16),
-            gguf_strides(20, 4, 1),
-            np.random.rand(13, 4, 4).astype(np.float16),
-            gguf_strides(20, 4, 1),
-        ),
-        SwiGLUTestCase(
-            np.random.rand(16, 5632).astype(np.float32),
-            None,
-            np.random.rand(16, 5632).astype(np.float32),
-            None,
-            np.random.rand(16, 5632).astype(np.float32),
-            None,
-        ),
-        SwiGLUTestCase(
-            np.random.rand(16, 5632).astype(np.float16),
-            None,
-            np.random.rand(16, 5632).astype(np.float16),
-            None,
-            np.random.rand(16, 5632).astype(np.float16),
-            None,
-        ),
-        SwiGLUTestCase(
-            np.random.rand(16, 5632).astype(np.float32),
-            gguf_strides(13312, 1),
-            np.random.rand(16, 5632).astype(np.float32),
-            gguf_strides(13312, 1),
-            np.random.rand(16, 5632).astype(np.float32),
-            gguf_strides(13312, 1),
-        ),
-        SwiGLUTestCase(
-            np.random.rand(16, 5632).astype(np.float16),
-            gguf_strides(13312, 1),
-            np.random.rand(16, 5632).astype(np.float16),
-            gguf_strides(13312, 1),
-            np.random.rand(16, 5632).astype(np.float16),
-            gguf_strides(13312, 1),
-        ),
-        SwiGLUTestCase(
-            np.random.rand(16, 5632).astype(np.float32),
-            gguf_strides(5632, 1),
-            np.random.rand(16, 5632).astype(np.float32),
-            gguf_strides(5632, 1),
-            np.random.rand(16, 5632).astype(np.float32),
-            gguf_strides(1, 16),
-        ),
-        SwiGLUTestCase(
-            np.random.rand(16, 5632).astype(np.float16),
-            gguf_strides(5632, 1),
-            np.random.rand(16, 5632).astype(np.float16),
-            gguf_strides(5632, 1),
-            np.random.rand(16, 5632).astype(np.float16),
-            gguf_strides(1, 16),
-        ),
-        SwiGLUTestCase(
-            np.random.rand(2, 3, 400).astype(np.float32),
-            gguf_strides(1200, 400, 1),
-            np.random.rand(2, 3, 400).astype(np.float32),
-            gguf_strides(1200, 400, 1),
-            np.random.rand(2, 3, 400).astype(np.float32),
-            gguf_strides(1, 2, 6),
-        ),
-        SwiGLUTestCase(
-            np.random.rand(2, 3, 400).astype(np.float16),
-            gguf_strides(1200, 400, 1),
-            np.random.rand(2, 3, 400).astype(np.float16),
-            gguf_strides(1200, 400, 1),
-            np.random.rand(2, 3, 400).astype(np.float16),
-            gguf_strides(1, 2, 6),
-        ),
-        SwiGLUTestCase(
-            np.random.rand(4, 4, 5632).astype(np.float32),
-            None,
-            np.random.rand(4, 4, 5632).astype(np.float32),
-            None,
-            np.random.rand(4, 4, 5632).astype(np.float32),
-            None,
-        ),
-        SwiGLUTestCase(
-            np.random.rand(4, 4, 5632).astype(np.float16),
-            None,
-            np.random.rand(4, 4, 5632).astype(np.float16),
-            None,
-            np.random.rand(4, 4, 5632).astype(np.float16),
-            None,
-        ),
-        SwiGLUTestCase(
-            np.random.rand(4, 4, 5632).astype(np.float32),
-            gguf_strides(45056, 5632, 1),
-            np.random.rand(4, 4, 5632).astype(np.float32),
-            gguf_strides(45056, 5632, 1),
-            np.random.rand(4, 4, 5632).astype(np.float32),
-            gguf_strides(45056, 5632, 1),
-        ),
-        SwiGLUTestCase(
-            np.random.rand(4, 4, 5632).astype(np.float16),
-            gguf_strides(45056, 5632, 1),
-            np.random.rand(4, 4, 5632).astype(np.float16),
-            gguf_strides(45056, 5632, 1),
-            np.random.rand(4, 4, 5632).astype(np.float16),
-            gguf_strides(45056, 5632, 1),
-        ),
+    test_cases = []
+
+    _TEST_CASES_ = [
+        ((64, 128), None, None, None),
+        ((64, 121), None, None, None),
+        ((15, 512), None, None, None),
+        ((13, 4), None, None, None),
+        ((13, 4), (10, 1), (10, 1), (10, 1)),
+        ((13, 4, 4), None, None, None),
+        ((13, 4, 4), (20, 4, 1), (20, 4, 1), (20, 4, 1)),
+        ((16, 5632), None, None, None),
+        ((16, 5632), (13312, 1), (13312, 1), (13312, 1)),
+        ((16, 5632), (5632, 1), (5632, 1), (1, 16)),
+        ((2, 3, 400), (1200, 400, 1), (1200, 400, 1), (1, 2, 6)),
+        ((4, 4, 5632), None, None, None),
+        ((4, 4, 5632), (45056, 5632, 1), (45056, 5632, 1), (45056, 5632, 1)),
    ]
+    _TENSOR_DTYPES_ = [np.float32, np.float16]
+
+    for dtype in _TENSOR_DTYPES_:
+        for shape, stride_a, stride_b, stride_c in _TEST_CASES_:
+            a = np.random.rand(*shape).astype(dtype)
+            b = np.random.rand(*shape).astype(dtype)
+            c = np.empty(tuple(0 for _ in shape), dtype=dtype)
+            test_case = SwiGLUTestCase(
+                a=a,
+                shape_a=list(shape),
+                stride_a=stride_a,
+                b=b,
+                shape_b=list(shape),
+                stride_b=stride_b,
+                c=c,
+                shape_c=list(shape),
+                stride_c=stride_c,
+            )
+            test_cases.append(test_case)
+
    test_writer.add_tests(test_cases)
    test_writer.save()