issue/180：删除双目算子残留代码和补充gguf测试

64b5f7c8 · goldenfox2025 · 8a49900f · 64b5f7c8 · 64b5f7c8 · 8a49900f
Commit 64b5f7c8 authored Apr 28, 2025 by goldenfox2025
9 changed files
--- a/src/infiniop-test/include/ops.hpp
+++ b/src/infiniop-test/include/ops.hpp
@@ -8,6 +8,7 @@
 DECLARE_INFINIOP_TEST(gemm)
 DECLARE_INFINIOP_TEST(random_sample)
 DECLARE_INFINIOP_TEST(mul)
+DECLARE_INFINIOP_TEST(clip)

 #define REGISTER_INFINIOP_TEST(name)                      \
    {                                                     \
@@ -26,6 +27,7 @@ DECLARE_INFINIOP_TEST(mul)
        REGISTER_INFINIOP_TEST(gemm)          \
        REGISTER_INFINIOP_TEST(random_sample) \
        REGISTER_INFINIOP_TEST(mul)           \
+        REGISTER_INFINIOP_TEST(clip)          \
    }

 namespace infiniop_test {

--- a/src/infiniop-test/src/ops/clip.cpp
+++ b/src/infiniop-test/src/ops/clip.cpp
+#include "ops.hpp"
+#include "utils.hpp"
+#include <infinirt.h>
+#include <iomanip>
+#include <iostream>
+
+namespace infiniop_test::clip {
+struct Test::Attributes {
+    std::shared_ptr<Tensor> x;
+    std::shared_ptr<Tensor> min_val;
+    std::shared_ptr<Tensor> max_val;
+    std::shared_ptr<Tensor> y;
+    std::shared_ptr<Tensor> ans;
+};
+
+std::shared_ptr<Test> Test::build(
+    std::unordered_map<std::string, std::vector<uint8_t>> attributes,
+    std::unordered_map<std::string, std::shared_ptr<Tensor>> tensors,
+    double rtol, double atol) {
+    auto test = std::shared_ptr<Test>(new Test(rtol, atol));
+    test->_attributes = new Attributes();
+    if (tensors.find("x") == tensors.end()
+        || tensors.find("min_val") == tensors.end()
+        || tensors.find("max_val") == tensors.end()
+        || tensors.find("y") == tensors.end()
+        || tensors.find("ans") == tensors.end()) {
+        throw std::runtime_error("Invalid Test");
+    }
+
+    test->_attributes->x = tensors["x"];
+    test->_attributes->min_val = tensors["min_val"];
+    test->_attributes->max_val = tensors["max_val"];
+    test->_attributes->y = tensors["y"];
+    test->_attributes->ans = tensors["ans"];
+
+    return test;
+}
+
+std::shared_ptr<infiniop_test::Result> Test::run(
+    infiniopHandle_t handle, infiniDevice_t device, int device_id, size_t warm_ups, size_t iterations) {
+    infiniopClipDescriptor_t op_desc;
+    auto x = _attributes->x->to(device, device_id);
+    auto min_val = _attributes->min_val->to(device, device_id);
+    auto max_val = _attributes->max_val->to(device, device_id);
+    auto y = _attributes->y->to(device, device_id);
+    CHECK_OR(infiniopCreateClipDescriptor(handle, &op_desc,
+                                         y->desc(),
+                                         x->desc(),
+                                         min_val->desc(),
+                                         max_val->desc()),
+             return TEST_FAILED(OP_CREATION_FAILED, "Failed to create clip descriptor."));
+    size_t workspace_size;
+    CHECK_OR(infiniopGetClipWorkspaceSize(op_desc, &workspace_size),
+             return TEST_FAILED(OP_CREATION_FAILED, "Failed to get workspace size."));
+    void *workspace;
+    CHECK_OR(infinirtMalloc(&workspace, workspace_size),
+             return TEST_FAILED(OP_CREATION_FAILED, "Failed to allocate workspace."));
+    CHECK_OR(infiniopClip(op_desc, workspace, workspace_size,
+                         y->data(),
+                         x->data(),
+                         min_val->data(),
+                         max_val->data(),
+                         nullptr),
+             return TEST_FAILED(OP_EXECUTION_FAILED, "Failed during execution."));
+
+    try {
+        allClose(y, _attributes->ans, _rtol, _atol);
+    } catch (const std::exception &e) {
+        return TEST_FAILED(RESULT_INCORRECT, e.what());
+    }
+
+    double elapsed_time = 0.;
+
+    elapsed_time = benchmark(
+        [=]() {
+            infiniopClip(
+                op_desc, workspace, workspace_size,
+                y->data(),
+                x->data(),
+                min_val->data(),
+                max_val->data(),
+                nullptr);
+        },
+        warm_ups, iterations);
+
+    infiniopDestroyClipDescriptor(op_desc);
+    infinirtFree(workspace);
+
+    return TEST_PASSED(elapsed_time);
+}
+
+std::vector<std::string> Test::attribute_names() {
+    return {};
+}
+
+std::vector<std::string> Test::tensor_names() {
+    return {"x", "min_val", "max_val", "y", "ans"};
+}
+
+std::string Test::toString() const {
+    std::ostringstream oss;
+    oss << op_name() << std::endl;
+    oss << "- x: " << _attributes->x->info() << std::endl;
+    oss << "- min_val: " << _attributes->min_val->info() << std::endl;
+    oss << "- max_val: " << _attributes->max_val->info() << std::endl;
+    oss << "- y: " << _attributes->y->info() << std::endl;
+    oss << std::scientific << std::setprecision(2);
+    oss << "- rtol=" << _rtol << ", atol=" << _atol << std::endl;
+    return oss.str();
+}
+
+Test::~Test() {
+    delete _attributes;
+}
+
+} // namespace infiniop_test::clip
--- a/src/infiniop/ops/clip/clip.h
+++ b/src/infiniop/ops/clip/clip.h
-#ifndef __CLIP_H__
-#define __CLIP_H__
-
-#include "../../elementwise/elementwise.h"
-#include "../../operator.h"
-
-/**
- * @brief Define the Clip descriptor for the ternary operator
- *
- * This macro defines a Descriptor class for the Clip operator that inherits from InfiniopDescriptor.
- * It uses the standard elementwise operation fields and methods for a ternary operator
- * where min_val and max_val are tensors.
- *
- * @param OP The operator name (clip)
- * @param NAMESPACE The namespace (cpu or cuda)
- */
-#define CLIP_DESCRIPTOR(OP, NAMESPACE)                                        \
-                                                                              \
-    namespace op::OP::NAMESPACE {                                             \
-    class Descriptor final : public InfiniopDescriptor {                      \
-        infiniDtype_t _dtype;                                                 \
-        op::elementwise::ElementwiseInfo _info;                               \
-        std::unique_ptr<op::elementwise::NAMESPACE::DeviceImpl> _device_info; \
-        size_t _workspace_size;                                               \
-                                                                              \
-    public:                                                                   \
-        Descriptor(                                                           \
-            infiniDtype_t dtype,                                              \
-            op::elementwise::ElementwiseInfo info,                            \
-            op::elementwise::NAMESPACE::DeviceImpl *device_info,              \
-            size_t workspace_size,                                            \
-            infiniDevice_t device_type,                                       \
-            int device_id)                                                    \
-            : InfiniopDescriptor{device_type, device_id},                     \
-              _dtype(dtype),                                                  \
-              _info(std::move(info)),                                         \
-              _device_info(std::move(device_info)),                           \
-              _workspace_size(workspace_size) {}                              \
-                                                                              \
-        ~Descriptor();                                                        \
-                                                                              \
-        size_t workspaceSize() const { return _workspace_size; }              \
-                                                                              \
-        infiniStatus_t calculate(                                             \
-            void *workspace, size_t workspace_size,                           \
-            void *output,                                                     \
-            std::vector<const void *> inputs,                                 \
-            void *stream) const;                                              \
-    };                                                                        \
-    }
-
-#endif // __CLIP_H__
--- a/src/infiniop/ops/clip/cpu/clip_cpu.cc
+++ b/src/infiniop/ops/clip/cpu/clip_cpu.cc
@@ -4,7 +4,7 @@ namespace op::clip::cpu {

 Descriptor::~Descriptor() = default;

-infiniStatus_t createClipDescriptor(
+infiniStatus_t Descriptor::create(
    infiniopHandle_t handle_,
    Descriptor **desc_ptr,
    infiniopTensorDescriptor_t out_desc,

--- a/src/infiniop/ops/clip/cpu/clip_cpu.h
+++ b/src/infiniop/ops/clip/cpu/clip_cpu.h
@@ -2,9 +2,9 @@
 #define __CLIP_CPU_H__

 #include "../../../elementwise/cpu/elementwise_cpu.h"
-#include "../clip.h"
+#include "infiniop/ops/clip.h"

-CLIP_DESCRIPTOR(clip, cpu)
+ELEMENTWISE_DESCRIPTOR(clip, cpu)

 namespace op::clip::cpu {

@@ -18,13 +18,6 @@ public:
    }
 } ClipOp;

-// Create clip descriptor
-infiniStatus_t createClipDescriptor(
-    infiniopHandle_t handle,
-    Descriptor **desc_ptr,
-    infiniopTensorDescriptor_t out_desc,
-    std::vector<infiniopTensorDescriptor_t> input_desc_vec);
-
 } // namespace op::clip::cpu

 #endif // __CLIP_CPU_H__
--- a/src/infiniop/ops/clip/cuda/clip_cuda.cu
+++ b/src/infiniop/ops/clip/cuda/clip_cuda.cu
@@ -5,7 +5,7 @@ namespace op::clip::cuda {

 Descriptor::~Descriptor() = default;

-infiniStatus_t createClipDescriptor(
+infiniStatus_t Descriptor::create(
    infiniopHandle_t handle_,
    Descriptor **desc_ptr,
    infiniopTensorDescriptor_t out_desc,

--- a/src/infiniop/ops/clip/cuda/clip_cuda.cuh
+++ b/src/infiniop/ops/clip/cuda/clip_cuda.cuh
@@ -2,18 +2,12 @@
 #define __CLIP_CUDA_API_H__

 #include "../../../elementwise/cuda/elementwise_cuda_api.cuh"
-#include "../clip.h"
+#include "infiniop/ops/clip.h"

-CLIP_DESCRIPTOR(clip, cuda)
+ELEMENTWISE_DESCRIPTOR(clip, cuda)

 namespace op::clip::cuda {

-infiniStatus_t createClipDescriptor(
-    infiniopHandle_t handle,
-    Descriptor **desc_ptr,
-    infiniopTensorDescriptor_t out_desc,
-    std::vector<infiniopTensorDescriptor_t> input_desc_vec);
-
 } // namespace op::clip::cuda

 #endif // __CLIP_CUDA_API_H__
--- a/src/infiniop/ops/clip/operator.cc
+++ b/src/infiniop/ops/clip/operator.cc
@@ -19,7 +19,7 @@ __C infiniStatus_t infiniopCreateClipDescriptor(

 #define CREATE(CASE, NAMESPACE)                                             \
    case CASE:                                                              \
-        return op::clip::NAMESPACE::createClipDescriptor(                   \
+        return op::clip::NAMESPACE::Descriptor::create(                     \
            handle,                                                         \
            reinterpret_cast<op::clip::NAMESPACE::Descriptor **>(desc_ptr), \
            y,                                                              \

--- a/test/infiniop-test/test_generate/testcases/clip.py
+++ b/test/infiniop-test/test_generate/testcases/clip.py
+import numpy as np
+import gguf
+from typing import List, Optional, Tuple
+
+from .. import InfiniopTestWriter, InfiniopTestCase, np_dtype_to_ggml, gguf_strides
+
+
+def clip(
+    x: np.ndarray,
+    min_val: np.ndarray,
+    max_val: np.ndarray,
+) -> np.ndarray:
+    """
+    Clip the values in input tensor x to the range [min_val, max_val].
+
+    Args:
+        x: Input tensor
+        min_val: Tensor with minimum values (same shape as x)
+        max_val: Tensor with maximum values (same shape as x)
+
+    Returns:
+        Clipped tensor with the same shape as x
+    """
+    return np.maximum(np.minimum(x, max_val), min_val)
+
+
+def random_tensor(shape, dtype):
+    """
+    Generate a random tensor with values in the range [-2, 2].
+
+    Args:
+        shape: Shape of the tensor
+        dtype: Data type of the tensor
+
+    Returns:
+        Random tensor with the specified shape and dtype
+    """
+    return (np.random.rand(*shape).astype(dtype) * 4.0 - 2.0)
+
+
+class ClipTestCase(InfiniopTestCase):
+    """
+    Test case for the Clip operator.
+    """
+
+    def __init__(
+        self,
+        x: np.ndarray,
+        x_stride: Optional[List[int]],
+        min_val: np.ndarray,
+        min_stride: Optional[List[int]],
+        max_val: np.ndarray,
+        max_stride: Optional[List[int]],
+        y: np.ndarray,
+        y_stride: Optional[List[int]],
+    ):
+        super().__init__("clip")
+        self.x = x
+        self.x_stride = x_stride
+        self.min_val = min_val
+        self.min_stride = min_stride
+        self.max_val = max_val
+        self.max_stride = max_stride
+        self.y = y
+        self.y_stride = y_stride
+
+    def write_test(self, test_writer: "InfiniopTestWriter"):
+        super().write_test(test_writer)
+
+        # Add strides as arrays if they exist
+        if self.x_stride is not None:
+            test_writer.add_array(test_writer.gguf_key("x.strides"), self.x_stride)
+        if self.min_stride is not None:
+            test_writer.add_array(test_writer.gguf_key("min_val.strides"), self.min_stride)
+        if self.max_stride is not None:
+            test_writer.add_array(test_writer.gguf_key("max_val.strides"), self.max_stride)
+        if self.y_stride is not None:
+            test_writer.add_array(test_writer.gguf_key("y.strides"), self.y_stride)
+
+        # Add tensors to the test
+        test_writer.add_tensor(
+            test_writer.gguf_key("x"),
+            self.x,
+            raw_dtype=np_dtype_to_ggml(self.x.dtype)
+        )
+
+        test_writer.add_tensor(
+            test_writer.gguf_key("min_val"),
+            self.min_val,
+            raw_dtype=np_dtype_to_ggml(self.min_val.dtype)
+        )
+
+        test_writer.add_tensor(
+            test_writer.gguf_key("max_val"),
+            self.max_val,
+            raw_dtype=np_dtype_to_ggml(self.max_val.dtype)
+        )
+
+        test_writer.add_tensor(
+            test_writer.gguf_key("y"),
+            self.y,
+            raw_dtype=np_dtype_to_ggml(self.y.dtype)
+        )
+
+        # Calculate the expected result
+        ans = clip(
+            self.x.astype(np.float64),
+            self.min_val.astype(np.float64),
+            self.max_val.astype(np.float64)
+        )
+
+        # Add the expected result to the test
+        test_writer.add_tensor(
+            test_writer.gguf_key("ans"),
+            ans,
+            raw_dtype=gguf.GGMLQuantizationType.F64
+        )
+
+if __name__ == "__main__":
+    test_writer = InfiniopTestWriter("clip.gguf")
+
+    # Create test cases for different shapes, strides, and data types
+    test_cases = []
+
+    # Test case shapes
+    shapes = [
+        (10,),                # 1D tensor
+        (5, 10),              # 2D tensor
+        (2, 3, 4),            # 3D tensor
+        (7, 13),              # Prime dimensions
+        (1, 1),               # Minimum shape
+        (100, 100),           # Large shape
+        (16, 16, 16),         # Large 3D
+    ]
+
+    # Test case min/max values
+    min_max_values = [
+        (-1.0, 1.0),          # Standard range
+        (0.0, 2.0),           # Positive range
+        (-2.0, 0.0),          # Negative range
+        (-1000.0, 1000.0),    # Large range
+        (-0.001, 0.001),      # Small range
+        (0.0, 0.0),           # min=max
+    ]
+
+    # Data types to test
+    dtypes = [np.float16, np.float32, np.float64]
+
+    # Generate test cases with contiguous tensors
+    for shape in shapes:
+        for min_val, max_val in min_max_values:
+            for dtype in dtypes:
+                x = random_tensor(shape, dtype)
+                min_tensor = np.full(shape, min_val, dtype=dtype)
+                max_tensor = np.full(shape, max_val, dtype=dtype)
+                y = np.zeros(shape, dtype=dtype)
+
+                test_cases.append(
+                    ClipTestCase(
+                        x=x,
+                        x_stride=None,
+                        min_val=min_tensor,
+                        min_stride=None,
+                        max_val=max_tensor,
+                        max_stride=None,
+                        y=y,
+                        y_stride=None
+                    )
+                )
+
+    # Generate test cases with strided tensors (for 2D shapes only)
+    for shape in [s for s in shapes if len(s) == 2]:
+        for dtype in dtypes:
+            # Row-major stride
+            row_stride = gguf_strides(shape[1], 1)
+            # Column-major stride
+            col_stride = gguf_strides(1, shape[0])
+
+            # Test case with row-major input and output
+            x = random_tensor(shape, dtype)
+            min_tensor = np.full(shape, -1.0, dtype=dtype)
+            max_tensor = np.full(shape, 1.0, dtype=dtype)
+            y = np.zeros(shape, dtype=dtype)
+
+            test_cases.append(
+                ClipTestCase(
+                    x=x,
+                    x_stride=row_stride,
+                    min_val=min_tensor,
+                    min_stride=row_stride,
+                    max_val=max_tensor,
+                    max_stride=row_stride,
+                    y=y,
+                    y_stride=row_stride
+                )
+            )
+
+            # Test case with column-major input and output
+            x = random_tensor(shape, dtype)
+            min_tensor = np.full(shape, -1.0, dtype=dtype)
+            max_tensor = np.full(shape, 1.0, dtype=dtype)
+            y = np.zeros(shape, dtype=dtype)
+
+            test_cases.append(
+                ClipTestCase(
+                    x=x,
+                    x_stride=col_stride,
+                    min_val=min_tensor,
+                    min_stride=col_stride,
+                    max_val=max_tensor,
+                    max_stride=col_stride,
+                    y=y,
+                    y_stride=col_stride
+                )
+            )
+
+            # Test case with different strides for input and output
+            x = random_tensor(shape, dtype)
+            min_tensor = np.full(shape, -1.0, dtype=dtype)
+            max_tensor = np.full(shape, 1.0, dtype=dtype)
+            y = np.zeros(shape, dtype=dtype)
+
+            test_cases.append(
+                ClipTestCase(
+                    x=x,
+                    x_stride=row_stride,
+                    min_val=min_tensor,
+                    min_stride=row_stride,
+                    max_val=max_tensor,
+                    max_stride=row_stride,
+                    y=y,
+                    y_stride=col_stride
+                )
+            )
+
+    # Add all test cases to the writer
+    test_writer.add_tests(test_cases)
+
+    # Save the test cases to a GGUF file
+    test_writer.save()
+
+    print(f"Generated {len(test_cases)} test cases for the Clip operator")