fix shape in GEMM W8A8

63913f29 · LeeDongYeun · Zhekai Zhang · af6b1a3c · 63913f29
Commit 63913f29 authored Mar 28, 2025 by LeeDongYeun Committed by Zhekai Zhang Apr 01, 2025
Hide whitespace changes
Inline Side-by-side

Showing with 3 additions and 3 deletions

src/Linear.cpp src/Linear.cpp +3 -3

No files found.
--- a/src/Linear.cpp
+++ b/src/Linear.cpp
@@ -451,9 +451,9 @@ GEMM_W8A8::QuantizedActivation GEMM_W8A8::quantize(Tensor x, bool fuse_glu) {
 }
 Tensor GEMM_W8A8::forward_quant(QuantizedActivation qact) {
-    auto oshape = qact.act.shape;
+    auto shape = TensorShape(qact.act.shape.dataExtent);
-    oshape[-1] = out_features;
+    shape[-1] = out_features;
-    Tensor out = Tensor::allocate(oshape, this->dtype, qact.act.device());
+    Tensor out = Tensor::allocate(shape, this->dtype, qact.act.device());
    kernels::gemm_w8a8(qact.act, this->qweight, out, qact.ascales, this->wscales, this->bias);
    debug("gemm.out", out);