Unverified Commit da0cef14 authored by PanZezhong1725's avatar PanZezhong1725 Committed by GitHub
Browse files

Merge pull request #146 from InfiniTensor/issue/142

issue/142:添加rms_norm算子测例
parents 3329034a faf97b39
......@@ -7,6 +7,7 @@
*/
DECLARE_INFINIOP_TEST(gemm)
DECLARE_INFINIOP_TEST(random_sample)
DECLARE_INFINIOP_TEST(rms_norm)
DECLARE_INFINIOP_TEST(mul)
DECLARE_INFINIOP_TEST(clip)
DECLARE_INFINIOP_TEST(swiglu)
......@@ -33,6 +34,7 @@ DECLARE_INFINIOP_TEST(add)
REGISTER_INFINIOP_TEST(mul) \
REGISTER_INFINIOP_TEST(clip) \
REGISTER_INFINIOP_TEST(swiglu) \
REGISTER_INFINIOP_TEST(rms_norm) \
}
namespace infiniop_test {
......
#include "ops.hpp"
#include "utils.hpp"
#include <infinirt.h>
#include <iomanip>
#include <iostream>
namespace infiniop_test::rms_norm {
struct Test::Attributes {
float epsilon;
std::shared_ptr<Tensor> x;
std::shared_ptr<Tensor> w;
std::shared_ptr<Tensor> ans;
std::shared_ptr<Tensor> y;
};
std::shared_ptr<Test> Test::build(
std::unordered_map<std::string, std::vector<uint8_t>> attributes,
std::unordered_map<std::string, std::shared_ptr<Tensor>> tensors,
double rtol, double atol) {
auto test = std::shared_ptr<Test>(new Test(rtol, atol));
test->_attributes = new Attributes();
if (attributes.find("epsilon") == attributes.end()
|| tensors.find("x") == tensors.end()
|| tensors.find("w") == tensors.end()
|| tensors.find("ans") == tensors.end()
|| tensors.find("y") == tensors.end()) {
throw std::runtime_error("Invalid Test: Missing attributes or tensors");
}
test->_attributes->epsilon = *reinterpret_cast<float *>(attributes["epsilon"].data());
test->_attributes->x = tensors["x"];
test->_attributes->w = tensors["w"];
test->_attributes->ans = tensors["ans"];
test->_attributes->y = tensors["y"];
return test;
}
std::shared_ptr<infiniop_test::Result> Test::run(
infiniopHandle_t handle, infiniDevice_t device, int device_id,
size_t warm_ups, size_t iterations) {
infiniopRMSNormDescriptor_t op_desc;
CHECK_OR(infiniopCreateRMSNormDescriptor(handle, &op_desc,
_attributes->y->desc(),
_attributes->x->desc(),
_attributes->w->desc(),
_attributes->epsilon),
return TEST_FAILED(OP_CREATION_FAILED, "Failed to create RMSNorm descriptor"));
auto x = _attributes->x->to(device, device_id);
auto w = _attributes->w->to(device, device_id);
auto y = _attributes->y->to(device, device_id);
size_t workspace_size;
CHECK_OR(infiniopGetRMSNormWorkspaceSize(op_desc, &workspace_size),
return TEST_FAILED(OP_CREATION_FAILED, "Failed to get workspace size"));
void *workspace = nullptr;
if (workspace_size > 0) {
CHECK_OR(infinirtMalloc(&workspace, workspace_size),
return TEST_FAILED(OP_CREATION_FAILED, "Failed to allocate workspace"));
}
CHECK_OR(infiniopRMSNorm(op_desc,
workspace, workspace_size,
y->data(),
x->data(),
w->data(),
nullptr),
return TEST_FAILED(OP_EXECUTION_FAILED, "RMSNorm execution failed"));
try {
allClose(y, _attributes->ans, _rtol, _atol);
} catch (const std::exception &e) {
return TEST_FAILED(RESULT_INCORRECT, e.what());
}
double elapsed_time = 0.;
elapsed_time = benchmark(
[=]() {
infiniopRMSNorm(op_desc,
workspace, workspace_size,
y->data(),
x->data(),
w->data(),
nullptr);
},
warm_ups, iterations);
if (workspace != nullptr) {
infinirtFree(workspace);
}
return TEST_PASSED(elapsed_time);
}
std::vector<std::string> Test::attribute_names() {
return {"epsilon"};
}
std::vector<std::string> Test::tensor_names() {
return {"x", "w", "ans", "y"};
}
std::vector<std::string> Test::output_names() {
return {"y"};
}
std::string Test::toString() const {
std::ostringstream oss;
oss << op_name() << std::endl;
oss << "- epsilon=" << _attributes->epsilon << std::endl;
oss << "- x: " << _attributes->x->info() << std::endl;
oss << "- w: " << _attributes->w->info() << std::endl;
oss << "- y: " << _attributes->y->info() << std::endl;
oss << std::scientific << std::setprecision(2);
oss << "- rtol=" << _rtol << ", atol=" << _atol << std::endl;
return oss.str();
}
Test::~Test() {
delete _attributes;
}
} // namespace infiniop_test::rms_norm
import numpy as np
from typing import List
from .. import InfiniopTestWriter, InfiniopTestCase, np_dtype_to_ggml, gguf_strides, contiguous_gguf_strides
def random_tensor(shape: tuple, dtype: np.dtype) -> np.ndarray:
return np.random.uniform(-1.0, 1.0, shape).astype(dtype) * 0.001
def rms_norm(x: np.ndarray, w: np.ndarray, epsilon: float) -> np.ndarray:
"""
使用numpy计算rms_norm结果
Args:
x: 输入张量, 维度为2, 形状为 [..., hidden_size]
w: 缩放权重, 形状为 [hidden_size]
epsilon: 避免除零的小常数
Returns:
输出张量, 形状与 input 相同
"""
squared = x ** 2
mean = np.mean(squared, axis=-1, keepdims=True)
rms = np.sqrt(mean + epsilon)
normalized = x / rms
return normalized * w
class RMSNormTestCase(InfiniopTestCase):
def __init__(
self,
x: np.ndarray,
w: np.ndarray,
y: np.ndarray,
shape: List[int] | None,
x_strides: List[int] | None,
y_strides: List[int] | None,
epsilon: float = 1e-5,
):
super().__init__("rms_norm")
self.x = x
self.w = w
self.y = y
self.shape = shape
self.epsilon = epsilon
self.x_strides=x_strides
self.y_strides=y_strides
def write_test(self, test_writer: "InfiniopTestWriter"):
super().write_test(test_writer)
test_writer.add_float32(test_writer.gguf_key("epsilon"), self.epsilon)
if self.shape is not None:
test_writer.add_array(test_writer.gguf_key("x.shape"), self.shape)
test_writer.add_array(test_writer.gguf_key("y.shape"), self.shape)
if self.x_strides is not None:
test_writer.add_array(test_writer.gguf_key("x.strides"), gguf_strides(*self.x_strides))
test_writer.add_array(
test_writer.gguf_key("y.strides"),
gguf_strides(*self.y_strides if self.y_strides is not None else contiguous_gguf_strides(self.shape))
)
test_writer.add_tensor(
test_writer.gguf_key("x"),
self.x,
raw_dtype=np_dtype_to_ggml(self.x.dtype),
)
test_writer.add_tensor(
test_writer.gguf_key("w"),
self.w,
raw_dtype=np_dtype_to_ggml(self.w.dtype),
)
test_writer.add_tensor(
test_writer.gguf_key("y"),
self.y,
raw_dtype=np_dtype_to_ggml(self.y.dtype),
)
ans = rms_norm(self.x.astype(np.float64), self.w.astype(np.float64), self.epsilon)
test_writer.add_tensor(
test_writer.gguf_key("ans"),
ans,
raw_dtype=np_dtype_to_ggml(np.float64),
)
if __name__ == "__main__":
test_writer = InfiniopTestWriter("rms_norm.gguf")
test_cases = []
_TEST_CASES_ = [
# shape, x_strides, y_strides
((2, 256), None, None),
((4, 512), None, None),
((8, 1024), None, None),
((1, 768), None, None),
((8, 256), None, None),
((500, 4096), None, None),
((4, 512), (1024, 1), None),
((4, 512), (512, 1), None),
((500, 4096), (9192, 1), None),
((500, 4096), (4096, 1), None),
((4, 512), None, (1024, 1)),
((500, 4096), None, (8192, 1)),
((4, 512), (1024, 1), (512, 1)),
((4, 512), None, (2048, 1)),
]
_TENSOR_DTYPES_ = [np.float32, np.float16]
for dtype in _TENSOR_DTYPES_:
for shape, x_strides, y_strides in _TEST_CASES_:
w = np.random.rand(shape[-1]).astype(dtype)
x = np.random.rand(*shape).astype(dtype)
y = np.empty(tuple(0 for _ in shape), dtype=dtype)
epsilon = 1e-5
test_case = RMSNormTestCase(
x=x,
w=w,
y=y,
shape=shape,
x_strides=x_strides,
y_strides=y_strides,
epsilon=epsilon
)
test_cases.append(test_case)
test_writer.add_tests(test_cases)
test_writer.save()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment