issue/228: infiniop-test框架支持0步长，gguf冗余移除，算子测例重构

6bb801f6 · Catheriany · 5a22f833 · 6bb801f6 · 6bb801f6 · 6bb801f6
Commit 6bb801f6 authored Jun 03, 2025 by Catheriany
15 changed files
--- a/src/infiniop-test/include/ops.hpp
+++ b/src/infiniop-test/include/ops.hpp
@@ -18,6 +18,7 @@ DECLARE_INFINIOP_TEST(add)
            infiniop_test::name::Test::build,             \
            infiniop_test::name::Test::attribute_names(), \
            infiniop_test::name::Test::tensor_names(),    \
+            infiniop_test::name::Test::output_names(),    \
        }},

 /*

--- a/src/infiniop-test/include/tensor.hpp
+++ b/src/infiniop-test/include/tensor.hpp
@@ -58,8 +58,9 @@ private:
 public:
    Tensor(const GGUFTensorInfo *info,
           const void *ggml_ptr,
+           const GGUFKeyValue *shape_meta = nullptr,
           const GGUFKeyValue *strides_meta = nullptr,
-           const GGUFKeyValue *shapes_meta = nullptr);
+           bool isOutput = false);
    Tensor(std::shared_ptr<Memory> memory, size_t offset,
           const std::vector<size_t> &shape,
           const std::vector<ptrdiff_t> &strides,

--- a/src/infiniop-test/include/test.hpp
+++ b/src/infiniop-test/include/test.hpp
@@ -92,6 +92,7 @@ public:
                                                                              \
        static std::vector<std::string> attribute_names();                    \
        static std::vector<std::string> tensor_names();                       \
+        static std::vector<std::string> output_names();                       \
                                                                              \
        std::shared_ptr<infiniop_test::Result> run(                           \
            infiniopHandle_t handle, infiniDevice_t device, int device_id,    \
@@ -121,6 +122,7 @@ struct TestBuilder {
    BuilderFunc build;
    std::vector<std::string> attribute_names;
    std::vector<std::string> tensor_names;
+    std::vector<std::string> output_names;
 };
 } // namespace infiniop_test


--- a/src/infiniop-test/src/ops/add.cpp
+++ b/src/infiniop-test/src/ops/add.cpp
@@ -87,6 +87,10 @@ std::vector<std::string> Test::tensor_names() {
    return {"a", "b", "c", "ans"};
 }

+std::vector<std::string> Test::output_names() {
+    return {"c"};
+}
+
 std::string Test::toString() const {
    std::ostringstream oss;
    oss << op_name() << std::endl;

--- a/src/infiniop-test/src/ops/gemm.cpp
+++ b/src/infiniop-test/src/ops/gemm.cpp
@@ -113,6 +113,10 @@ std::vector<std::string> Test::tensor_names() {
    return {"a", "b", "c", "ans"};
 }

+std::vector<std::string> Test::output_names() {
+    return {};
+}
+
 std::string Test::toString() const {
    std::ostringstream oss;
    oss << op_name() << std::endl;

--- a/src/infiniop-test/src/ops/mul.cpp
+++ b/src/infiniop-test/src/ops/mul.cpp
@@ -87,6 +87,10 @@ std::vector<std::string> Test::tensor_names() {
    return {"a", "b", "c", "ans"};
 }

+std::vector<std::string> Test::output_names() {
+    return {"c"};
+}
+
 std::string Test::toString() const {
    std::ostringstream oss;
    oss << op_name() << std::endl;

--- a/src/infiniop-test/src/ops/random_sample.cpp
+++ b/src/infiniop-test/src/ops/random_sample.cpp
@@ -109,6 +109,10 @@ std::vector<std::string> Test::tensor_names() {
    return {"data", "ans", "result"};
 }

+std::vector<std::string> Test::output_names() {
+    return {"result"};
+}
+
 std::string Test::toString() const {
    std::ostringstream oss;
    oss << op_name() << std::endl;

--- a/src/infiniop-test/src/ops/swiglu.cpp
+++ b/src/infiniop-test/src/ops/swiglu.cpp
@@ -82,6 +82,10 @@ std::vector<std::string> Test::tensor_names() {
    return {"a", "b", "c", "ans"};
 }

+std::vector<std::string> Test::output_names() {
+    return {"c"};
+}
+
 std::string Test::toString() const {
    std::ostringstream oss;
    oss << op_name() << std::endl;

--- a/src/infiniop-test/src/tensor.cpp
+++ b/src/infiniop-test/src/tensor.cpp
@@ -98,21 +98,28 @@ void *Tensor::data() const {

 Tensor::Tensor(const GGUFTensorInfo *info,
               const void *ggml_ptr,
+               const GGUFKeyValue *shape_meta,
               const GGUFKeyValue *strides_meta,
-               const GGUFKeyValue *shapes_meta) {
+               bool isOutput) {
+
    _ggml_type = info->ggml_type;
    _offset = 0;
    size_t ndim = static_cast<size_t>(info->ndim);
+    // `_shape`存储真实的tensor形状（来自shape_meta），`temp_shape`存储用于rearrange和计算内存的tensor形状
    _shape = std::vector<size_t>(ndim);
+    std::vector<size_t> temp_shape(ndim);
    _strides = std::vector<ptrdiff_t>(ndim);
    std::vector<ptrdiff_t> contiguous_strides(ndim);
    for (size_t i = 0; i < ndim; i++) {
-        _shape[i] = static_cast<size_t>(info->shape[ndim - 1 - i]);
+        temp_shape[i] = static_cast<size_t>(info->shape[ndim - 1 - i]);
        if (i == 0) {
            contiguous_strides[ndim - 1] = (ptrdiff_t)1;
        } else {
            contiguous_strides[ndim - 1 - i] = (ptrdiff_t)info->shape[i - 1] * contiguous_strides[ndim - i];
        }
+        if (isOutput) {
+            contiguous_strides[i] = (ptrdiff_t)0;
+        }
    }

    if (strides_meta == nullptr) {
@@ -133,30 +140,57 @@ Tensor::Tensor(const GGUFTensorInfo *info,
        }
    }

-    infiniopCreateTensorDescriptor(&_desc, ndim, _shape.data(), _strides.data(), ggmlTypeToInfiniType(_ggml_type));
+    if (isOutput) {
+        if (shape_meta == nullptr) {
+            throw std::runtime_error("Error Creating Tensor: shape_meta cannot be null for output tensor");
+        }
+        for (size_t i = 0; i < ndim; i++) {
+            if (shape_meta->gguf_type == GGUF_TYPE_INT64) {
+                int64_t val = reinterpret_cast<const int64_t *>(shape_meta->value.data())[i];
+                if (val < 0) {
+                    throw std::runtime_error("Shape must be non-negative");
+                }
+                temp_shape[i] = static_cast<size_t>(val);
+            } else if (shape_meta->gguf_type == GGUF_TYPE_INT32) {
+                int32_t val = reinterpret_cast<const int32_t *>(shape_meta->value.data())[i];
+                if (val < 0) {
+                    throw std::runtime_error("Shape must be non-negative");
+                }
+                temp_shape[i] = static_cast<size_t>(val);
+            } else {
+                throw std::runtime_error("Error Creating Tensor: Unsupported shape type");
+            }
+        }
+    }
+    infiniopCreateTensorDescriptor(&_desc, ndim, temp_shape.data(), _strides.data(), ggmlTypeToInfiniType(_ggml_type));
    size_t size;
-    calculateTensorMemory(size, _offset, _shape, _strides, ggmlTypeSize(_ggml_type));
+    calculateTensorMemory(size, _offset, temp_shape, _strides, ggmlTypeSize(_ggml_type));
    _memory = std::make_shared<Memory>(size, INFINI_DEVICE_CPU, 0);
    utils::rearrange(
        (char *)_memory->ptr() + _offset,
        (char *)ggml_ptr + info->data_offset,
-        _shape.data(),
+        temp_shape.data(),
        _strides.data(),
        contiguous_strides.data(),
        ndim,
        ggmlTypeSize(_ggml_type));

-    if (shapes_meta != nullptr) {
-
+    if (shape_meta == nullptr) {
+        _shape = temp_shape;
+    } else {
        for (size_t i = 0; i < ndim; i++) {
-
-            if (shapes_meta->gguf_type == GGUF_TYPE_INT64) {
-                _shape[i] = (ptrdiff_t)(reinterpret_cast<const int64_t *>(
-                    shapes_meta->value.data())[i]);
-
-            } else if (shapes_meta->gguf_type == GGUF_TYPE_INT32) {
-                _shape[i] = (ptrdiff_t)(reinterpret_cast<const int32_t *>(
-                    shapes_meta->value.data())[i]);
+            if (shape_meta->gguf_type == GGUF_TYPE_INT64) {
+                int64_t val = reinterpret_cast<const int64_t *>(shape_meta->value.data())[i];
+                if (val < 0) {
+                    throw std::runtime_error("Shape must be non-negative");
+                }
+                _shape[i] = static_cast<size_t>(val);
+            } else if (shape_meta->gguf_type == GGUF_TYPE_INT32) {
+                int32_t val = reinterpret_cast<const int32_t *>(shape_meta->value.data())[i];
+                if (val < 0) {
+                    throw std::runtime_error("Shape must be non-negative");
+                }
+                _shape[i] = static_cast<size_t>(val);
            } else {
                throw std::runtime_error("Error Creating Tensor: Unsupported shape type");
            }

--- a/src/infiniop-test/src/test.cpp
+++ b/src/infiniop-test/src/test.cpp
@@ -90,17 +90,19 @@ std::shared_ptr<Result> runTest(const GGUFFileReader &gguf_reader,
                attrs[attr_name] = attr->second->value;
            }
        }
+
        for (auto tensor_name : builder.tensor_names) {
            auto info = tensor_info.find("test." + std::to_string(test_id) + "." + tensor_name);
            if (info != tensor_info.end()) {
-                auto strides = meta.find("test." + std::to_string(test_id) + "." + tensor_name + ".strides");
                auto shape = meta.find("test." + std::to_string(test_id) + "." + tensor_name + ".shape");
-
+                auto strides = meta.find("test." + std::to_string(test_id) + "." + tensor_name + ".strides");
+                bool is_output = std::find(builder.output_names.begin(), builder.output_names.end(), tensor_name) != builder.output_names.end();
                tensors[tensor_name] = std::make_shared<Tensor>(
                    info->second.get(),
                    gguf_reader.getGgmlStart(),
+                    shape != meta.end() ? shape->second.get() : nullptr,
                    strides != meta.end() ? strides->second.get() : nullptr,
-                    shape != meta.end() ? shape->second.get() : nullptr);
+                    is_output);
            }
        }
        std::shared_ptr<infiniop_test::base::Test> test;

--- a/test/infiniop-test/test_generate/__init__.py
+++ b/test/infiniop-test/test_generate/__init__.py
-from .infiniop_test import InfiniopTestCase, InfiniopTestWriter, np_dtype_to_ggml, gguf_strides
+from .infiniop_test import InfiniopTestCase, InfiniopTestWriter, np_dtype_to_ggml, gguf_strides, contiguous_gguf_strides
--- a/test/infiniop-test/test_generate/infiniop_test.py
+++ b/test/infiniop-test/test_generate/infiniop_test.py
@@ -29,6 +29,14 @@ def gguf_strides(*args: int) -> list[int] | None:
    return list(args)[::-1] if args else None


+def contiguous_gguf_strides(shape: tuple[int, ...]) -> list[int]:
+    strides = []
+    acc = 1
+    for size in reversed(shape):
+        strides.append(acc)
+        acc *= size
+    return strides[::-1]
+
 class InfiniopTestCase:
    op_name: str


--- a/test/infiniop-test/test_generate/testcases/add.py
+++ b/test/infiniop-test/test_generate/testcases/add.py
@@ -4,7 +4,7 @@ import gguf
 from typing import List
 from numpy.lib.stride_tricks import as_strided

-from .. import InfiniopTestWriter, InfiniopTestCase, np_dtype_to_ggml, gguf_strides
+from .. import InfiniopTestWriter, InfiniopTestCase, np_dtype_to_ggml, gguf_strides, contiguous_gguf_strides


 def add(
@@ -13,64 +13,60 @@ def add(
 ):
    return a + b

-def process_tensors(a, a_stride, b, b_stride):
-
-    def _rearrange(tensor, strides):
-        if strides and 0 in strides:
-            byte_strides = tuple(s * tensor.itemsize for s in strides)
-            return as_strided(tensor, shape=tensor.shape, strides=byte_strides)
+def process_tensor(a, b, stride_a=None, stride_b=None):
+    def normalize_stride(tensor, stride):
+        if stride:
+            slices = tuple(slice(0, 1) if s == 0 else slice(None) for s in stride)
+            return tensor[slices]
        else:
            return tensor

-    a = _rearrange(a, a_stride)
-    b = _rearrange(b, b_stride)
-
-    return a, b
-
-def get_effective_shape(shape, strides):
-
-    effective_shape = tuple(dim if stride != 0 else 1 for dim, stride in zip(shape, strides))
-    return effective_shape
+    a_unique = normalize_stride(a, stride_a)
+    b_unique = normalize_stride(b, stride_b)
+    return a_unique, b_unique

 class AddTestCase(InfiniopTestCase):
    def __init__(
        self,
        a: np.ndarray,
-        a_rearranged:np.ndarray,
-        stride_a: List[int] | None,
        shape_a: List[int] | None,
+        stride_a: List[int] | None,
        b: np.ndarray,
-        b_rearranged:np.ndarray,
-        stride_b: List[int] | None,
        shape_b: List[int] | None,
+        stride_b: List[int] | None,
        c: np.ndarray,
+        shape_c: List[int] | None,
        stride_c: List[int] | None,
+
    ):
        super().__init__("add")
        self.a = a
-        self.a_rearranged = a_rearranged
-        self.stride_a = stride_a
        self.shape_a = shape_a
+        self.stride_a = stride_a
        self.b = b
-        self.b_rearranged = b_rearranged
-        self.stride_b = stride_b
        self.shape_b = shape_b
+        self.stride_b = stride_b
        self.c = c
+        self.shape_c = shape_c
        self.stride_c = stride_c


    def write_test(self, test_writer: "InfiniopTestWriter"):
        super().write_test(test_writer)
-        if self.stride_a is not None:
-            test_writer.add_array(test_writer.gguf_key("a.strides"), self.stride_a)
-        if self.stride_b is not None:
-            test_writer.add_array(test_writer.gguf_key("b.strides"), self.stride_b)
-        if self.stride_c is not None:
-            test_writer.add_array(test_writer.gguf_key("c.strides"), self.stride_c)
        if self.shape_a is not None:
            test_writer.add_array(test_writer.gguf_key("a.shape"), self.shape_a)
        if self.shape_b is not None:
            test_writer.add_array(test_writer.gguf_key("b.shape"), self.shape_b)
+        if self.shape_c is not None:
+            test_writer.add_array(test_writer.gguf_key("c.shape"), self.shape_c)
+        if self.stride_a is not None:
+            test_writer.add_array(test_writer.gguf_key("a.strides"), gguf_strides(*self.stride_a))
+        if self.stride_b is not None:
+            test_writer.add_array(test_writer.gguf_key("b.strides"), gguf_strides(*self.stride_b))
+        test_writer.add_array(
+            test_writer.gguf_key("c.strides"),
+            gguf_strides(*self.stride_c if self.stride_c is not None else contiguous_gguf_strides(self.shape_c))
+        )
        test_writer.add_tensor(
            test_writer.gguf_key("a"), self.a, raw_dtype=np_dtype_to_ggml(self.a.dtype)
        )
@@ -81,8 +77,8 @@ class AddTestCase(InfiniopTestCase):
            test_writer.gguf_key("c"), self.c, raw_dtype=np_dtype_to_ggml(self.c.dtype)
        )
        ans = add(
-            self.a_rearranged.astype(np.float64),
-            self.b_rearranged.astype(np.float64),
+            self.a.astype(np.float64),
+            self.b.astype(np.float64),
        )
        test_writer.add_tensor(
            test_writer.gguf_key("ans"), ans, raw_dtype=gguf.GGMLQuantizationType.F64
@@ -99,51 +95,38 @@ if __name__ == "__main__":
    _TEST_CASES_ = [
        # shape, a_stride, b_stride, c_stride
        ((13, 4), None, None, None),
-        ((13, 4), gguf_strides(10, 1), gguf_strides(10, 1), gguf_strides(10, 1)),
-        ((13, 4), gguf_strides(0, 1), None, None),
+        ((13, 4), (10, 1), (10, 1), (10, 1)),
+        ((13, 4), (0, 1), None, None),
        ((13, 4, 4), None, None, None),
-        ((13, 4, 4), gguf_strides(20, 4, 1), gguf_strides(20, 4, 1), gguf_strides(20, 4, 1)),
-        ((13, 4, 4), gguf_strides(4, 0, 1), gguf_strides(0, 4, 1), None),
+        ((13, 4, 4), (20, 4, 1), (20, 4, 1), (20, 4, 1)),
+        ((13, 4, 4), (4, 0, 1), (0, 4, 1), None),
        ((16, 5632), None, None, None),
-        ((16, 5632), gguf_strides(13312, 1), gguf_strides(13312, 1), gguf_strides(13312, 1)),
+        ((16, 5632), (13312, 1), (13312, 1), (13312, 1)),
        ((4, 4, 5632), None, None, None),
-        ((4, 4, 5632), gguf_strides(45056, 5632, 1), gguf_strides(45056, 5632, 1), gguf_strides(45056, 5632, 1)),
+        ((4, 4, 5632), (45056, 5632, 1), (45056, 5632, 1), (45056, 5632, 1)),
    ]
-    _TENSOR_DTYPES_ = [np.float32] # np.float16
+    _TENSOR_DTYPES_ = [np.float32, np.float16]
    for dtype in _TENSOR_DTYPES_:
        for shape, stride_a, stride_b, stride_c in _TEST_CASES_:
            a = np.random.rand(*shape).astype(dtype)
            b = np.random.rand(*shape).astype(dtype)
-            c = np.random.rand(*shape).astype(dtype)
-
-            # Reverse strides to match internal layout expectations
-            reversed_stride_a = tuple(reversed(stride_a)) if stride_a else None
-            reversed_stride_b = tuple(reversed(stride_b)) if stride_b else None
-
-            a_rearranged, b_rearranged = process_tensors(a, reversed_stride_a, b, reversed_stride_b)
-
-            effective_shape_a = get_effective_shape(a_rearranged.shape, tuple(s // a.itemsize for s in a_rearranged.strides))
-            effective_shape_b = get_effective_shape(b_rearranged.shape, tuple(s // b.itemsize for s in b_rearranged.strides))
-
-            # Extract unique data region (eliminate broadcast repetition)
-            slices_a = tuple(slice(0, 1) if dim == 1 else slice(None) for dim in effective_shape_a)
-            slices_b = tuple(slice(0, 1) if dim == 1 else slice(None) for dim in effective_shape_b)
-
-            a_unique = a_rearranged[slices_a]
-            b_unique = b_rearranged[slices_b]
-
+            c = np.empty(tuple(0 for _ in shape), dtype=dtype)
+            a, b = process_tensor(a, b, stride_a, stride_b)
+            if stride_c is None:
+                stride_c = contiguous_gguf_strides(shape)
            test_case = AddTestCase(
-                a=a_unique,
-                a_rearranged=a_rearranged,
-                stride_a=stride_a,
+                a=a,
                shape_a=shape,
-                b=b_unique,
-                b_rearranged=b_rearranged,
-                stride_b=stride_b,
+                stride_a=stride_a,
+                b=b,
                shape_b=shape,
+                stride_b=stride_b,
                c=c,
+                shape_c=shape,
                stride_c=stride_c,
            )
            test_cases.append(test_case)
+            
    test_writer.add_tests(test_cases)
    test_writer.save()
+    
\ No newline at end of file
--- a/test/infiniop-test/test_generate/testcases/mul.py
+++ b/test/infiniop-test/test_generate/testcases/mul.py
@@ -2,7 +2,7 @@ import numpy as np
 import gguf
 from typing import List

-from .. import InfiniopTestWriter, InfiniopTestCase, np_dtype_to_ggml, gguf_strides
+from .. import InfiniopTestWriter, InfiniopTestCase, np_dtype_to_ggml, gguf_strides, contiguous_gguf_strides

 def mul(
    a: np.ndarray,
@@ -19,28 +19,44 @@ class MulTestCase(InfiniopTestCase):
    def __init__(
        self,
        a: np.ndarray,
+        shape_a: List[int] | None,        
        stride_a: List[int] | None,
        b: np.ndarray,
+        shape_b: List[int] | None,       
        stride_b: List[int] | None,
        c: np.ndarray,
+        shape_c: List[int] | None,    
        stride_c: List[int] | None,
    ):
        super().__init__("mul")
        self.a = a
+        self.shape_a = shape_a
        self.stride_a = stride_a
        self.b = b
+        self.shape_b = shape_b
        self.stride_b = stride_b
        self.c = c
+        self.shape_c = shape_c
        self.stride_c = stride_c

+
    def write_test(self, test_writer: "InfiniopTestWriter"):
        super().write_test(test_writer)
+        if self.shape_a is not None:
+            test_writer.add_array(test_writer.gguf_key("a.shape"), self.shape_a)
+        if self.shape_b is not None:
+            test_writer.add_array(test_writer.gguf_key("b.shape"), self.shape_b)
+        if self.shape_c is not None:
+            test_writer.add_array(test_writer.gguf_key("c.shape"), self.shape_c)
        if self.stride_a is not None:
-            test_writer.add_array(test_writer.gguf_key("a.strides"), self.stride_a)
+            test_writer.add_array(test_writer.gguf_key("a.strides"), gguf_strides(*self.stride_a))
        if self.stride_b is not None:
-            test_writer.add_array(test_writer.gguf_key("b.strides"), self.stride_b)
-        if self.stride_c is not None:
-            test_writer.add_array(test_writer.gguf_key("c.strides"), self.stride_c)
+            test_writer.add_array(test_writer.gguf_key("b.strides"), gguf_strides(*self.stride_b))
+        test_writer.add_array(
+            test_writer.gguf_key("c.strides"),
+            gguf_strides(*self.stride_c if self.stride_c is not None else contiguous_gguf_strides(self.shape_c))
+        )
+
        test_writer.add_tensor(
            test_writer.gguf_key("a"), self.a, raw_dtype=np_dtype_to_ggml(self.a.dtype)
        )
@@ -52,6 +68,7 @@ class MulTestCase(InfiniopTestCase):
        )
        a_fp64 = self.a.astype(np.float64)
        b_fp64 = self.b.astype(np.float64)
+        
        ans_fp64 = np.multiply(a_fp64, b_fp64)
        ans = mul(self.a, self.b)
        test_writer.add_tensor(
@@ -65,95 +82,43 @@ class MulTestCase(InfiniopTestCase):

 if __name__ == '__main__':
    test_writer = InfiniopTestWriter("mul.gguf")
-    test_cases = [
-        MulTestCase(
-            random_tensor((2, 3), np.float32),
-            gguf_strides(3, 1),  
-            random_tensor((2, 3), np.float32),
-            gguf_strides(1, 2),  
-            random_tensor((2, 3), np.float32),
-            gguf_strides(3, 1),  
-        ),
-        MulTestCase(
-            random_tensor((2, 3), np.float16),
-            gguf_strides(1, 2),  
-            random_tensor((2, 3), np.float16),
-            gguf_strides(3, 1), 
-            random_tensor((2, 3), np.float16),
-            gguf_strides(1, 2),  
-        ),
-        MulTestCase(
-            random_tensor((2, 3), np.float64),
-            gguf_strides(3, 1),  
-            random_tensor((2, 3), np.float64),
-            gguf_strides(3, 1),  
-            random_tensor((2, 3), np.float64),
-            gguf_strides(1, 2),  
-        ),
-        MulTestCase(
-            random_tensor((4, 6), np.float16),
-            gguf_strides(1, 4),  
-            random_tensor((4, 6), np.float16),
-            gguf_strides(1, 5),  
-            random_tensor((4, 6), np.float16),
-            gguf_strides(6, 1),  
-        ),
-        MulTestCase(
-            random_tensor((1, 2048), np.float16),
-            gguf_strides(1, 1),  
-            random_tensor((1, 2048), np.float16),
-            gguf_strides(2048, 1),  
-            random_tensor((1, 2048), np.float16),
-            gguf_strides(1, 1),  
-        ),
-        MulTestCase(
-            random_tensor((2048, 2048), np.float32),
-            None,  
-            random_tensor((2048, 2048), np.float32),
-            gguf_strides(1, 2048),  
-            random_tensor((2048, 2048), np.float32),
-            None,  
-        ),
-        MulTestCase(
-            random_tensor((2, 4, 2048), np.float16),
-            gguf_strides(4 * 2048, 2048, 1),  
-            random_tensor((2, 4, 2048), np.float16),
-            gguf_strides(1, 2, 2 * 4),  
-            random_tensor((2, 4, 2048), np.float16),
-            gguf_strides(4 * 2048, 2048, 1),  
-        ),
-        MulTestCase(
-            random_tensor((2, 4, 2048), np.float32),
-            gguf_strides(1, 2, 2 * 4),  
-            random_tensor((2, 4, 2048), np.float32),
-            None,  
-            random_tensor((2, 4, 2048), np.float32),
-            gguf_strides(1, 2, 2 * 4),  
-        ),
-        MulTestCase(
-            random_tensor((2048, 2560), np.float32),
-            gguf_strides(2560, 1),  
-            random_tensor((2048, 2560), np.float32),
-            gguf_strides(1, 2048),  
-            random_tensor((2048, 2560), np.float32),
-            gguf_strides(2560, 1),  
-        ),
-        MulTestCase(
-            random_tensor((4, 48, 64), np.float16),
-            gguf_strides(64 * 48, 64, 1),  
-            random_tensor((4, 48, 64), np.float16),
-            gguf_strides(1, 4, 4 * 48),  
-            random_tensor((4, 48, 64), np.float16),
-            None  
-        ),
+    test_cases = []
+
+    _TEST_CASES_ = [
+        ((2, 3), (3, 1), (1, 2), (3, 1)),
+        ((2, 3), (1, 2), (3, 1), (1, 2)),
+        ((2, 3), (3, 1), (3, 1), (1, 2)),
+        ((4, 6), (1, 4), (1, 5), (6, 1)),
+        ((1, 2048), (1, 1), (2048, 1), (1, 1)),
+        ((2048, 2048), None, (1, 2048), None),
+        ((2, 4, 2048), (4 * 2048, 2048, 1), (1, 2, 8), (4 * 2048, 2048, 1)),
+        ((2, 4, 2048), (1, 2, 8), None, (1, 2, 8)),
+        ((2048, 2560), (2560, 1), (1, 2048), (2560, 1)),
+        ((4, 48, 64), (64 * 48, 64, 1), (1, 4, 192), None),
+        ((4, 48, 64), None, (1, 4, 192), (48 * 64, 64, 1)),
+    ]   
+    _TENSOR_DTYPES_ = [np.float32, np.float16]
+    
+    for dtype in _TENSOR_DTYPES_:
+        for shape, stride_a, stride_b, stride_c in _TEST_CASES_:
+            a = random_tensor(shape, dtype)
+            b = random_tensor(shape, dtype)
+            c = np.empty(tuple(0 for _ in shape), dtype=dtype)
+
+                
+            test_cases.append(
                MulTestCase(
-            random_tensor((4, 48, 64), np.float32),
-            None,  
-            random_tensor((4, 48, 64), np.float32),
-            gguf_strides(1, 4, 4 * 48),  
-            random_tensor((4, 48, 64), np.float32),
-            gguf_strides(48 * 64, 64, 1),  
+                    a=a,
+                    shape_a=shape,
+                    stride_a=stride_a,
+                    b=b,
+                    shape_b=shape,
+                    stride_b=stride_b,
+                    c=c,
+                    shape_c=shape,
+                    stride_c=stride_c,
                )
-    ]
+            )   
+    
    test_writer.add_tests(test_cases)
    test_writer.save()
--- a/test/infiniop-test/test_generate/testcases/swiglu.py
+++ b/test/infiniop-test/test_generate/testcases/swiglu.py
@@ -2,7 +2,7 @@ import numpy as np
 import gguf
 from typing import List

-from .. import InfiniopTestWriter, InfiniopTestCase, np_dtype_to_ggml, gguf_strides
+from .. import InfiniopTestWriter, InfiniopTestCase, np_dtype_to_ggml, gguf_strides, contiguous_gguf_strides


 def swiglu(
@@ -18,28 +18,44 @@ class SwiGLUTestCase(InfiniopTestCase):
    def __init__(
        self,
        a: np.ndarray,
+        shape_a: List[int] | None,
        stride_a: List[int] | None,
        b: np.ndarray,
+        shape_b: List[int] | None,
        stride_b: List[int] | None,
        c: np.ndarray,
+        shape_c: List[int] | None,
        stride_c: List[int] | None,
+
    ):
        super().__init__("swiglu")
        self.a = a
+        self.shape_a = shape_a
        self.stride_a = stride_a
        self.b = b
+        self.shape_b = shape_b
        self.stride_b = stride_b
        self.c = c
+        self.shape_c = shape_c
        self.stride_c = stride_c

+
    def write_test(self, test_writer: "InfiniopTestWriter"):
        super().write_test(test_writer)
+        if self.shape_a is not None:
+            test_writer.add_array(test_writer.gguf_key("a.shape"), self.shape_a)
+        if self.shape_b is not None:
+            test_writer.add_array(test_writer.gguf_key("b.shape"), self.shape_b)
+        if self.shape_c is not None:
+            test_writer.add_array(test_writer.gguf_key("c.shape"), self.shape_c)  
        if self.stride_a is not None:
-            test_writer.add_array(test_writer.gguf_key("a.strides"), self.stride_a)
+            test_writer.add_array(test_writer.gguf_key("a.strides"), gguf_strides(*self.stride_a))
        if self.stride_b is not None:
-            test_writer.add_array(test_writer.gguf_key("b.strides"), self.stride_b)
-        if self.stride_c is not None:
-            test_writer.add_array(test_writer.gguf_key("c.strides"), self.stride_c)
+            test_writer.add_array(test_writer.gguf_key("b.strides"), gguf_strides(*self.stride_b))
+        test_writer.add_array(
+            test_writer.gguf_key("c.strides"),
+            gguf_strides(*self.stride_c if self.stride_c is not None else contiguous_gguf_strides(self.shape_c))
+        )
        test_writer.add_tensor(
            test_writer.gguf_key("a"), self.a, raw_dtype=np_dtype_to_ggml(self.a.dtype)
        )
@@ -60,191 +76,42 @@ class SwiGLUTestCase(InfiniopTestCase):

 if __name__ == "__main__":
    test_writer = InfiniopTestWriter("swiglu.gguf")
-    test_cases = [
-        SwiGLUTestCase(
-            np.random.rand(64, 128).astype(np.float32),
-            None,
-            np.random.rand(64, 128).astype(np.float32),
-            None,
-            np.random.rand(64, 128).astype(np.float32),
-            None,
-        ),
-        SwiGLUTestCase(
-            np.random.rand(64, 121).astype(np.float32),
-            None,
-            np.random.rand(64, 121).astype(np.float32),
-            None,
-            np.random.rand(64, 121).astype(np.float32),
-            None,
-        ),
-        SwiGLUTestCase(
-            np.random.rand(15, 512).astype(np.float32),
-            None,
-            np.random.rand(15, 512).astype(np.float32),
-            None,
-            np.random.rand(15, 512).astype(np.float32),
-            None,
-        ),
-        SwiGLUTestCase(
-            np.random.rand(13, 4).astype(np.float32),
-            None,
-            np.random.rand(13, 4).astype(np.float32),
-            None,
-            np.random.rand(13, 4).astype(np.float32),
-            None,
-        ),
-        SwiGLUTestCase(
-            np.random.rand(13, 4).astype(np.float16),
-            None,
-            np.random.rand(13, 4).astype(np.float16),
-            None,
-            np.random.rand(13, 4).astype(np.float16),
-            None,
-        ),
-        SwiGLUTestCase(
-            np.random.rand(13, 4).astype(np.float32),
-            gguf_strides(10, 1),
-            np.random.rand(13, 4).astype(np.float32),
-            gguf_strides(10, 1),
-            np.random.rand(13, 4).astype(np.float32),
-            gguf_strides(10, 1),
-        ),
-        SwiGLUTestCase(
-            np.random.rand(13, 4).astype(np.float16),
-            gguf_strides(10, 1),
-            np.random.rand(13, 4).astype(np.float16),
-            gguf_strides(10, 1),
-            np.random.rand(13, 4).astype(np.float16),
-            gguf_strides(10, 1),
-        ),
-        SwiGLUTestCase(
-            np.random.rand(13, 4, 4).astype(np.float32),
-            None,
-            np.random.rand(13, 4, 4).astype(np.float32),
-            None,
-            np.random.rand(13, 4, 4).astype(np.float32),
-            None,
-        ),
-        SwiGLUTestCase(
-            np.random.rand(13, 4, 4).astype(np.float16),
-            None,
-            np.random.rand(13, 4, 4).astype(np.float16),
-            None,
-            np.random.rand(13, 4, 4).astype(np.float16),
-            None,
-        ),
-        SwiGLUTestCase(
-            np.random.rand(13, 4, 4).astype(np.float32),
-            gguf_strides(20, 4, 1),
-            np.random.rand(13, 4, 4).astype(np.float32),
-            gguf_strides(20, 4, 1),
-            np.random.rand(13, 4, 4).astype(np.float32),
-            gguf_strides(20, 4, 1),
-        ),
-        SwiGLUTestCase(
-            np.random.rand(13, 4, 4).astype(np.float16),
-            gguf_strides(20, 4, 1),
-            np.random.rand(13, 4, 4).astype(np.float16),
-            gguf_strides(20, 4, 1),
-            np.random.rand(13, 4, 4).astype(np.float16),
-            gguf_strides(20, 4, 1),
-        ),
-        SwiGLUTestCase(
-            np.random.rand(16, 5632).astype(np.float32),
-            None,
-            np.random.rand(16, 5632).astype(np.float32),
-            None,
-            np.random.rand(16, 5632).astype(np.float32),
-            None,
-        ),
-        SwiGLUTestCase(
-            np.random.rand(16, 5632).astype(np.float16),
-            None,
-            np.random.rand(16, 5632).astype(np.float16),
-            None,
-            np.random.rand(16, 5632).astype(np.float16),
-            None,
-        ),
-        SwiGLUTestCase(
-            np.random.rand(16, 5632).astype(np.float32),
-            gguf_strides(13312, 1),
-            np.random.rand(16, 5632).astype(np.float32),
-            gguf_strides(13312, 1),
-            np.random.rand(16, 5632).astype(np.float32),
-            gguf_strides(13312, 1),
-        ),
-        SwiGLUTestCase(
-            np.random.rand(16, 5632).astype(np.float16),
-            gguf_strides(13312, 1),
-            np.random.rand(16, 5632).astype(np.float16),
-            gguf_strides(13312, 1),
-            np.random.rand(16, 5632).astype(np.float16),
-            gguf_strides(13312, 1),
-        ),
-        SwiGLUTestCase(
-            np.random.rand(16, 5632).astype(np.float32),
-            gguf_strides(5632, 1),
-            np.random.rand(16, 5632).astype(np.float32),
-            gguf_strides(5632, 1),
-            np.random.rand(16, 5632).astype(np.float32),
-            gguf_strides(1, 16),
-        ),
-        SwiGLUTestCase(
-            np.random.rand(16, 5632).astype(np.float16),
-            gguf_strides(5632, 1),
-            np.random.rand(16, 5632).astype(np.float16),
-            gguf_strides(5632, 1),
-            np.random.rand(16, 5632).astype(np.float16),
-            gguf_strides(1, 16),
-        ),
-        SwiGLUTestCase(
-            np.random.rand(2, 3, 400).astype(np.float32),
-            gguf_strides(1200, 400, 1),
-            np.random.rand(2, 3, 400).astype(np.float32),
-            gguf_strides(1200, 400, 1),
-            np.random.rand(2, 3, 400).astype(np.float32),
-            gguf_strides(1, 2, 6),
-        ),
-        SwiGLUTestCase(
-            np.random.rand(2, 3, 400).astype(np.float16),
-            gguf_strides(1200, 400, 1),
-            np.random.rand(2, 3, 400).astype(np.float16),
-            gguf_strides(1200, 400, 1),
-            np.random.rand(2, 3, 400).astype(np.float16),
-            gguf_strides(1, 2, 6),
-        ),
-        SwiGLUTestCase(
-            np.random.rand(4, 4, 5632).astype(np.float32),
-            None,
-            np.random.rand(4, 4, 5632).astype(np.float32),
-            None,
-            np.random.rand(4, 4, 5632).astype(np.float32),
-            None,
-        ),
-        SwiGLUTestCase(
-            np.random.rand(4, 4, 5632).astype(np.float16),
-            None,
-            np.random.rand(4, 4, 5632).astype(np.float16),
-            None,
-            np.random.rand(4, 4, 5632).astype(np.float16),
-            None,
-        ),
-        SwiGLUTestCase(
-            np.random.rand(4, 4, 5632).astype(np.float32),
-            gguf_strides(45056, 5632, 1),
-            np.random.rand(4, 4, 5632).astype(np.float32),
-            gguf_strides(45056, 5632, 1),
-            np.random.rand(4, 4, 5632).astype(np.float32),
-            gguf_strides(45056, 5632, 1),
-        ),
-        SwiGLUTestCase(
-            np.random.rand(4, 4, 5632).astype(np.float16),
-            gguf_strides(45056, 5632, 1),
-            np.random.rand(4, 4, 5632).astype(np.float16),
-            gguf_strides(45056, 5632, 1),
-            np.random.rand(4, 4, 5632).astype(np.float16),
-            gguf_strides(45056, 5632, 1),
-        ),
+    test_cases = []
+
+    _TEST_CASES_ = [
+        ((64, 128), None, None, None),
+        ((64, 121), None, None, None),
+        ((15, 512), None, None, None),
+        ((13, 4), None, None, None),
+        ((13, 4), (10, 1), (10, 1), (10, 1)),
+        ((13, 4, 4), None, None, None),
+        ((13, 4, 4), (20, 4, 1), (20, 4, 1), (20, 4, 1)),
+        ((16, 5632), None, None, None),
+        ((16, 5632), (13312, 1), (13312, 1), (13312, 1)),
+        ((16, 5632), (5632, 1), (5632, 1), (1, 16)),
+        ((2, 3, 400), (1200, 400, 1), (1200, 400, 1), (1, 2, 6)),
+        ((4, 4, 5632), None, None, None),
+        ((4, 4, 5632), (45056, 5632, 1), (45056, 5632, 1), (45056, 5632, 1)),
    ]
+    _TENSOR_DTYPES_ = [np.float32, np.float16]
+
+    for dtype in _TENSOR_DTYPES_:
+        for shape, stride_a, stride_b, stride_c in _TEST_CASES_:
+            a = np.random.rand(*shape).astype(dtype)
+            b = np.random.rand(*shape).astype(dtype)
+            c = np.empty(tuple(0 for _ in shape), dtype=dtype)
+            test_case = SwiGLUTestCase(
+                a=a,
+                shape_a=list(shape),
+                stride_a=stride_a,
+                b=b,
+                shape_b=list(shape),
+                stride_b=stride_b,
+                c=c,
+                shape_c=list(shape),
+                stride_c=stride_c,
+            )
+            test_cases.append(test_case)
+
    test_writer.add_tests(test_cases)
    test_writer.save()