Merge remote-tracking branch 'upstream/main'

96cc1528 · Zimin Li · 2f2a74b6 · bd58898b · 96cc1528 · 96cc1528
Commit 96cc1528 authored Mar 25, 2025 by Zimin Li
5 changed files
--- a/src/infiniop-test/include/ops.hpp
+++ b/src/infiniop-test/include/ops.hpp
@@ -6,21 +6,24 @@
 * Declare all the tests here
 */
 DECLARE_INFINIOP_TEST(gemm)
+DECLARE_INFINIOP_TEST(random_sample)

 #define REGISTER_INFINIOP_TEST(name)                      \
    {                                                     \
        #name,                                            \
-        { infiniop_test::name::Test::build,             \
+        {                                                 \
+            infiniop_test::name::Test::build,             \
            infiniop_test::name::Test::attribute_names(), \
-          infiniop_test::name::Test::tensor_names() }   \
-    }
+            infiniop_test::name::Test::tensor_names(),    \
+        }},

 /*
 * Register all the tests here
 */
 #define TEST_BUILDER_MAPPINGS                 \
    {                                         \
-        REGISTER_INFINIOP_TEST(gemm), \
+        REGISTER_INFINIOP_TEST(gemm)          \
+        REGISTER_INFINIOP_TEST(random_sample) \
    }

 namespace infiniop_test {

--- a/src/infiniop-test/src/ops/random_sample.cpp
+++ b/src/infiniop-test/src/ops/random_sample.cpp
+#include "ops.hpp"
+#include "utils.hpp"
+#include <infinirt.h>
+#include <iomanip>
+#include <iostream>
+
+namespace infiniop_test::random_sample {
+struct Test::Attributes {
+    float random_val;
+    float topp;
+    int topk;
+    int voc;
+    float temperature;
+
+    std::shared_ptr<Tensor> data;
+    std::shared_ptr<Tensor> ans;
+    std::shared_ptr<Tensor> result;
+};
+
+std::shared_ptr<Test> Test::build(
+    std::unordered_map<std::string, std::vector<uint8_t>> attributes,
+    std::unordered_map<std::string, std::shared_ptr<Tensor>> tensors,
+    double rtol, double atol) {
+    auto test = std::shared_ptr<Test>(new Test(rtol, atol));
+    test->_attributes = new Attributes();
+    if (attributes.find("random_val") == attributes.end()
+        || attributes.find("topp") == attributes.end()
+        || attributes.find("topk") == attributes.end()
+        || attributes.find("voc") == attributes.end()
+        || attributes.find("temperature") == attributes.end()
+        || tensors.find("data") == tensors.end()
+        || tensors.find("ans") == tensors.end()
+        || tensors.find("result") == tensors.end()) {
+        throw std::runtime_error("Invalid Test");
+    }
+
+    test->_attributes->random_val = *reinterpret_cast<float *>(attributes["random_val"].data());
+    test->_attributes->topp = *reinterpret_cast<float *>(attributes["topp"].data());
+    test->_attributes->topk = *reinterpret_cast<int *>(attributes["topk"].data());
+    test->_attributes->voc = *reinterpret_cast<int *>(attributes["voc"].data());
+    test->_attributes->temperature = *reinterpret_cast<float *>(attributes["temperature"].data());
+
+    test->_attributes->data = tensors["data"];
+    test->_attributes->ans = tensors["ans"];
+    test->_attributes->result = tensors["result"];
+
+    return test;
+}
+
+std::shared_ptr<infiniop_test::Result> Test::run(
+    infiniopHandle_t handle, infiniDevice_t device, int device_id, size_t warm_ups, size_t iterations) {
+    infiniopRandomSampleDescriptor_t op_desc;
+    auto random_val = _attributes->random_val;
+    auto topp = _attributes->topp;
+    auto topk = _attributes->topk;
+    auto temperature = _attributes->temperature;
+    auto data = _attributes->data->to(device, device_id);
+    auto result = _attributes->result->to(device, device_id);
+    CHECK_OR(infiniopCreateRandomSampleDescriptor(handle, &op_desc,
+                                                  result->desc(),
+                                                  data->desc()),
+             return TEST_FAILED(OP_CREATION_FAILED, "Failed to create op descriptor."));
+    size_t workspace_size;
+    CHECK_OR(infiniopGetRandomSampleWorkspaceSize(op_desc, &workspace_size),
+             return TEST_FAILED(OP_CREATION_FAILED, "Failed to get workspace size."));
+    void *workspace;
+    CHECK_OR(infinirtMalloc(&workspace, workspace_size),
+             return TEST_FAILED(OP_CREATION_FAILED, "Failed to allocate workspace."));
+    CHECK_OR(infiniopRandomSample(op_desc, workspace, workspace_size,
+                                  result->data(),
+                                  data->data(),
+                                  random_val,
+                                  topp,
+                                  topk,
+                                  temperature,
+                                  nullptr),
+             return TEST_FAILED(OP_EXECUTION_FAILED, "Failed during execution."));
+
+    try {
+        allClose(result, _attributes->ans, _rtol, _atol);
+    } catch (const std::exception &e) {
+        return TEST_FAILED(RESULT_INCORRECT, e.what());
+    }
+
+    double elapsed_time = 0.;
+
+    elapsed_time = benchmark(
+        [=]() {
+            infiniopRandomSample(
+                op_desc, workspace, workspace_size,
+                result->data(),
+                data->data(),
+                random_val,
+                topp,
+                topk,
+                temperature,
+                nullptr);
+        },
+        warm_ups, iterations);
+
+    return TEST_PASSED(elapsed_time);
+}
+
+std::vector<std::string> Test::attribute_names() {
+    return {"random_val", "topp", "topk", "voc", "temperature"};
+}
+
+std::vector<std::string> Test::tensor_names() {
+    return {"data", "ans", "result"};
+}
+
+std::string Test::toString() const {
+    std::ostringstream oss;
+    oss << op_name() << std::endl;
+    oss << "- random_val=" << _attributes->random_val
+        << ", topp=" << _attributes->topp << std::endl
+        << ", topk=" << _attributes->topk << std::endl
+        << ", voc=" << _attributes->voc << std::endl
+        << ", temperature=" << _attributes->temperature << std::endl;
+    oss << "- data: " << _attributes->data->info() << std::endl;
+    oss << "- result: " << _attributes->result->info() << std::endl;
+    oss << std::scientific << std::setprecision(2);
+    oss << "- rtol=" << _rtol << ", atol=" << _atol << std::endl;
+    return oss.str();
+}
+
+Test::~Test() {
+    delete _attributes;
+}
+
+} // namespace infiniop_test::random_sample
--- a/src/infiniop/tensor_descriptor.cc
+++ b/src/infiniop/tensor_descriptor.cc
@@ -10,10 +10,12 @@ __C __export infiniStatus_t infiniopCreateTensorDescriptor(infiniopTensorDescrip
    } else {
        std::vector<ptrdiff_t> strides(ndim);
        ptrdiff_t dsize = 1;
+        if (ndim > 0) {
            for (size_t i = ndim - 1; i >= 0; i--) {
                strides[i] = dsize;
                dsize *= shape_[i];
            }
+        }
        *desc_ptr = new InfiniopTensorDescriptor(datatype, ndim, shape_, strides.data());
    }


--- a/test/infiniop-test/test_generate/testcases/random_sample.py
+++ b/test/infiniop-test/test_generate/testcases/random_sample.py
+from ast import List
+import numpy as np
+import gguf
+from typing import List
+
+from .. import InfiniopTestWriter, InfiniopTestCase, np_dtype_to_ggml, gguf_strides
+
+
+def random_tensor(voc, topk, dtype):
+    assert voc >= topk
+    unique_values = np.linspace(1, 2, num=topk + 2, endpoint=False)[1:]
+    # 使用random.rand还是可能出现重复值，为此使用linspace来随机选择topk个不重复的元素
+    x = np.random.choice(unique_values, size=topk, replace=False)
+    y = np.random.rand(voc - topk)
+
+    # 将x和y合并，并打乱顺序
+    z = np.concatenate([x, y]).astype(dtype)
+    np.random.shuffle(z)
+
+    return z
+
+
+def softmax(x, axis):
+    M = np.max(x, axis=axis, keepdims=True)
+    tmp = np.exp(x - M)
+    return tmp / np.sum(tmp, axis=axis, keepdims=True)
+
+
+def random_sample(
+    data: np.ndarray,
+    random_val: float = 0.08,
+    topp: float = 0.8,
+    topk: int = 50,
+    voc: int = 32000,
+    temperature: float = 1.0,
+):
+    if topp > 0 and topk > 1:
+        indices = np.zeros([topk], dtype=np.int64)
+        dataNp = data.copy()
+        sorted_indices = np.arange(voc)
+
+        for i in range(topk):
+            for j in range(i + 1, voc):
+                if dataNp[i] < dataNp[j]:
+                    tmp = dataNp[i].copy()
+                    dataNp[i] = dataNp[j].copy()
+                    dataNp[j] = tmp
+
+                    tmpInd = sorted_indices[i].copy()
+                    sorted_indices[i] = sorted_indices[j].copy()
+                    sorted_indices[j] = tmpInd
+
+        # sorted_indices = np.argsort(dataNp, descending=True)
+        indices = sorted_indices[:topk]
+
+        globalM = dataNp[0]
+        dataNp = (dataNp - globalM) / temperature
+        dataNp = softmax(dataNp, axis=0)
+        sum_s = 0
+        for end in range(topk):
+            sum_s += dataNp[end]
+            if sum_s >= topp:
+                break
+        if end < topk - 1:
+            end += 1
+        else:
+            end = topk
+
+        sum_s = 0
+        for i in range(end):
+            sum_s += dataNp[i]
+        random_val *= sum_s
+
+        sum_s = 0
+        for i in range(end):
+            sum_s += dataNp[i]
+            if random_val < sum_s:
+                return indices[i]
+    else:
+        return np.argmax(data)
+
+
+class RandomSampleTestCase(InfiniopTestCase):
+    def __init__(
+        self,
+        data: np.ndarray,
+        random_val: float,
+        topp: float,
+        topk: int,
+        temperature: float,
+    ):
+        super().__init__("random_sample")
+        self.data = data
+        self.random_val = random_val
+        self.topp = topp
+        self.topk = topk
+        self.voc = data.shape[0]
+        self.temperature = temperature
+
+    def write_test(self, test_writer: "InfiniopTestWriter"):
+        super().write_test(test_writer)
+        test_writer.add_float32(test_writer.gguf_key("random_val"), self.random_val)
+        test_writer.add_float32(test_writer.gguf_key("topp"), self.topp)
+        test_writer.add_int32(test_writer.gguf_key("topk"), self.topk)
+        test_writer.add_int32(test_writer.gguf_key("voc"), self.voc)
+        test_writer.add_float32(test_writer.gguf_key("temperature"), self.temperature)
+        ans = random_sample(
+            self.data,
+            self.random_val,
+            self.topp,
+            self.topk,
+            self.voc,
+            self.temperature,
+        )
+        result = np.int64(0)
+        test_writer.add_tensor(
+            test_writer.gguf_key("data"),
+            self.data,
+            raw_dtype=np_dtype_to_ggml(self.data.dtype),
+        )
+        test_writer.add_tensor(test_writer.gguf_key("ans"), ans)
+        test_writer.add_tensor(test_writer.gguf_key("result"), result)
+
+
+if __name__ == "__main__":
+    test_writer = InfiniopTestWriter("random_sample.gguf")
+    # data, random_val, topp, topk, temperature
+    test_cases = [
+        RandomSampleTestCase(
+            random_tensor(512, 3, np.float32),
+            0.8,
+            0.8,
+            3,
+            0.5,
+        ),
+        RandomSampleTestCase(
+            random_tensor(4096, 5, np.float32),
+            0.05,
+            0.9,
+            5,
+            1.0,
+        ),
+        RandomSampleTestCase(
+            random_tensor(16384, 10, np.float32),
+            0.15,
+            0.85,
+            10,
+            2.0,
+        ),
+        RandomSampleTestCase(
+            random_tensor(512, 3, np.float32),
+            0.08,
+            0,
+            3,
+            0.5,
+        ),
+        RandomSampleTestCase(
+            random_tensor(4096, 1, np.float32),
+            0.5,
+            0.9,
+            1,
+            1.0,
+        ),
+        RandomSampleTestCase(
+            random_tensor(16384, 1, np.float32),
+            0.15,
+            0,
+            1,
+            2.0,
+        ),
+        RandomSampleTestCase(
+            random_tensor(32000, 50, np.float32),
+            0.08,
+            0.8,
+            50,
+            1.0,
+        ),
+        RandomSampleTestCase(
+            random_tensor(32000, 25, np.float32),
+            0.08,
+            1.0,
+            25,
+            1.0,
+        ),
+        RandomSampleTestCase(
+            random_tensor(512, 3, np.float16),
+            0.8,
+            0.8,
+            3,
+            0.5,
+        ),
+        RandomSampleTestCase(
+            random_tensor(4096, 5, np.float16),
+            0.05,
+            0.9,
+            5,
+            1.0,
+        ),
+        RandomSampleTestCase(
+            random_tensor(16384, 10, np.float16),
+            0.15,
+            0.85,
+            10,
+            2.0,
+        ),
+        RandomSampleTestCase(
+            random_tensor(512, 3, np.float16),
+            0.08,
+            0,
+            3,
+            0.5,
+        ),
+        RandomSampleTestCase(
+            random_tensor(4096, 1, np.float16),
+            0.5,
+            0.9,
+            1,
+            1.0,
+        ),
+        RandomSampleTestCase(
+            random_tensor(16384, 1, np.float16),
+            0.15,
+            0,
+            1,
+            2.0,
+        ),
+        RandomSampleTestCase(
+            random_tensor(32000, 50, np.float16),
+            0.08,
+            0.8,
+            50,
+            1.0,
+        ),
+        RandomSampleTestCase(
+            random_tensor(32000, 25, np.float16),
+            0.08,
+            1.0,
+            25,
+            1.0,
+        ),
+    ]
+    test_writer.add_tests(test_cases)
+    test_writer.save()
--- a/test/infiniop/random_sample.py
+++ b/test/infiniop/random_sample.py
@@ -77,8 +77,6 @@ def random_sample(data, random_val, topp, topk, voc, temperature):
        # sorted_indices = torch.argsort(dataNp, descending=True)
        indices = sorted_indices[:topk]

-        dataNp = dataNp[sorted_indices]
-
        globalM = dataNp[0]
        dataNp = (dataNp - globalM) / temperature
        dataNp = torch.softmax(dataNp.float(), dim=0)