issue/183 根据反馈修改

d4c0cdf9 · Graylatzhou · 975559ee · d4c0cdf9 · d4c0cdf9 · d4c0cdf9
Commit d4c0cdf9 authored Apr 24, 2025 by Graylatzhou
8 changed files
--- a/include/infiniop/ops/mul.h
+++ b/include/infiniop/ops/mul.h
@@ -17,10 +17,10 @@ __C __export infiniStatus_t infiniopMul(infiniopMulDescriptor_t desc,
                                        void *workspace,
                                        size_t workspace_size,
                                        void *c,
-                                        void const *a,
-                                        void const *b,
+                                        const void *a,
+                                        const void *b,
                                        void *stream);
                
 __C __export infiniStatus_t infiniopDestroyMulDescriptor(infiniopMulDescriptor_t desc);

-#endif
\ No newline at end of file
+#endif
--- a/src/infiniop-test/src/ops/mul.cpp
+++ b/src/infiniop-test/src/ops/mul.cpp
@@ -80,13 +80,12 @@ std::shared_ptr<infiniop_test::Result> Test::run(
                b->data(),
                nullptr);
        },
-        (warm_ups + 1) / 2, (iterations + 1) / 2);
+        warm_ups, iterations);

    return TEST_PASSED(elapsed_time);
 }

 std::vector<std::string> Test::attribute_names() {
-    // MUL 操作不需要特殊属性（与 GEMM 不同，GEMM 需要 alpha 和 beta）
    return {};
 }

@@ -109,4 +108,4 @@ Test::~Test() {
    delete _attributes;
 }

-} // namespace infiniop_test::mul
\ No newline at end of file
+} // namespace infiniop_test::mul
--- a/src/infiniop/ops/mul/cpu/mul_cpu.h
+++ b/src/infiniop/ops/mul/cpu/mul_cpu.h
@@ -16,4 +16,4 @@ public:
 } MulOp;
 } // namespace op::mul::cpu

-#endif // __MUL_CPU_H__
\ No newline at end of file
+#endif // __MUL_CPU_H__
--- a/src/infiniop/ops/mul/cuda/mul_cuda.cu
+++ b/src/infiniop/ops/mul/cuda/mul_cuda.cu
@@ -54,4 +54,4 @@ infiniStatus_t Descriptor::calculate(

    return INFINI_STATUS_SUCCESS;
 }
-} // namespace op::add::cuda
\ No newline at end of file
+} // namespace op::mul::cuda
--- a/src/infiniop/ops/mul/cuda/mul_cuda.cuh
+++ b/src/infiniop/ops/mul/cuda/mul_cuda.cuh
@@ -5,4 +5,4 @@

 ELEMENTWISE_DESCRIPTOR(mul, cuda)

-#endif // __MUL_CUDA_API_H__
\ No newline at end of file
+#endif // __MUL_CUDA_API_H__
--- a/src/infiniop/ops/mul/cuda/mul_cuda_internal.cuh
+++ b/src/infiniop/ops/mul/cuda/mul_cuda_internal.cuh
@@ -21,6 +21,6 @@ typedef struct MulOp {
    }
 } MulOp;

-} // namespace op::add::cuda
+} // namespace op::mul::cuda

-#endif // __MUL_CUDA_H__
\ No newline at end of file
+#endif // __MUL_CUDA_H__
--- a/src/infiniop/ops/mul/operator.cc
+++ b/src/infiniop/ops/mul/operator.cc
@@ -117,4 +117,4 @@ infiniopDestroyMulDescriptor(infiniopMulDescriptor_t desc) {
    }

 #undef DELETE
-}
\ No newline at end of file
+}
--- a/test/infiniop-test/test_generate/testcases/mul.py
+++ b/test/infiniop-test/test_generate/testcases/mul.py
-from ast import List
 import numpy as np
 import gguf
 from typing import List
@@ -54,101 +53,109 @@ class MulTestCase(InfiniopTestCase):
        test_writer.add_tensor(
            test_writer.gguf_key("c"), self.c, raw_dtype=np_dtype_to_ggml(self.c.dtype)
        )
+        a_fp64 = self.a.astype(np.float64)
+        b_fp64 = self.b.astype(np.float64)
+        ans_fp64 = np.multiply(a_fp64, b_fp64)
        ans = mul(self.a, self.b, self.c)
        test_writer.add_tensor(
            test_writer.gguf_key("ans"), ans, raw_dtype=np_dtype_to_ggml(ans.dtype)
        )
+        test_writer.add_tensor(
+            test_writer.gguf_key("ans_fp64"),
+            ans_fp64,
+            raw_dtype=np_dtype_to_ggml(ans_fp64.dtype),
+        )

 if __name__ == '__main__':
    test_writer = InfiniopTestWriter("mul.gguf")
    test_cases = [
        MulTestCase(
            random_tensor((2, 3), np.float32),
-            gguf_strides(3, 1),
+            gguf_strides(3, 1),  
            random_tensor((2, 3), np.float32),
-            gguf_strides(3, 1),
+            (1, 2),  
            random_tensor((2, 3), np.float32),
-            gguf_strides(3, 1),
+            gguf_strides(3, 1),  
        ),
        MulTestCase(
            random_tensor((2, 3), np.float16),
-            gguf_strides(3, 1),
+            (1, 2),  
            random_tensor((2, 3), np.float16),
-            gguf_strides(3, 1),
+            gguf_strides(3, 1), 
            random_tensor((2, 3), np.float16),
-            gguf_strides(3, 1),
+            (1, 2),  
        ),
        MulTestCase(
            random_tensor((2, 3), np.float64),
-            gguf_strides(3, 1),
+            gguf_strides(3, 1),  
            random_tensor((2, 3), np.float64),
-            gguf_strides(3, 1),
+            gguf_strides(3, 1),  
            random_tensor((2, 3), np.float64),
-            gguf_strides(3, 1),
+            (1, 2),  
        ),
        MulTestCase(
            random_tensor((4, 6), np.float16),
-            gguf_strides(1, 4),
+            gguf_strides(1, 4),  
            random_tensor((4, 6), np.float16),
-            gguf_strides(1, 5),
+            gguf_strides(1, 5),  
            random_tensor((4, 6), np.float16),
-            gguf_strides(1, 4),
+            gguf_strides(6, 1),  
        ),
        MulTestCase(
            random_tensor((1, 2048), np.float16),
-            gguf_strides(1, 2048),
+            (1, 1),  
            random_tensor((1, 2048), np.float16),
-            gguf_strides(1, 2048),
+            gguf_strides(2048, 1),  
            random_tensor((1, 2048), np.float16),
-            gguf_strides(1, 2048),
+            (1, 1),  
        ),
        MulTestCase(
            random_tensor((2048, 2048), np.float32),
-            None,
+            None,  
            random_tensor((2048, 2048), np.float32),
-            None,
+            (1, 2048),  
            random_tensor((2048, 2048), np.float32),
-            None,
+            None,  
        ),
        MulTestCase(
            random_tensor((2, 4, 2048), np.float16),
-            None,
+            (4 * 2048, 2048, 1),  
            random_tensor((2, 4, 2048), np.float16),
-            None,
+            (1, 2, 2 * 4),  
            random_tensor((2, 4, 2048), np.float16),
-            None,
+            (4 * 2048, 2048, 1),  
        ),
        MulTestCase(
            random_tensor((2, 4, 2048), np.float32),
-            None,
+            (1, 2, 2 * 4),  
            random_tensor((2, 4, 2048), np.float32),
-            None,
+            None,  
            random_tensor((2, 4, 2048), np.float32),
-            None,
+            (1, 2, 2 * 4),  
        ),
        MulTestCase(
            random_tensor((2048, 2560), np.float32),
-            gguf_strides(1, 2560),
+            gguf_strides(2560, 1),  
            random_tensor((2048, 2560), np.float32),
-            gguf_strides(1, 2560),
+            (1, 2048),  
            random_tensor((2048, 2560), np.float32),
-            gguf_strides(1, 2560),
+            gguf_strides(2560, 1),  
        ),
        MulTestCase(
            random_tensor((4, 48, 64), np.float16),
-            None,
+            (64 * 48, 64, 1),  
            random_tensor((4, 48, 64), np.float16),
-            None,
+            (1, 4, 4 * 48),  
            random_tensor((4, 48, 64), np.float16),
-            None
+            None  
        ),
        MulTestCase(
            random_tensor((4, 48, 64), np.float32),
-            None,
+            None,  
            random_tensor((4, 48, 64), np.float32),
-            None,
+            (1, 4, 4 * 48),  
            random_tensor((4, 48, 64), np.float32),
-            None
+            (48 * 64, 64, 1),  
        ),

    ]