opt quantize_q8_1 kernel

4667452a · xuxzh1 · 1dc4b857 · 4667452a
Commit 4667452a authored Mar 06, 2025 by xuxzh1 🎱
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 3 deletions

llama/ggml-cuda/quantize.cu llama/ggml-cuda/quantize.cu +4 -3

No files found.
--- a/llama/ggml-cuda/quantize.cu
+++ b/llama/ggml-cuda/quantize.cu
@@ -58,9 +58,10 @@ static __global__ __launch_bounds__(1024) void quantize_q8_1(const float * __res
    if (iqs > 0) {
        return;
    }
+    ggml_half2 ds = {d, sum};
-    reinterpret_cast<half&>(y[ib].ds.x) = d;
+    y[ib].ds = ds;
-    reinterpret_cast<half&>(y[ib].ds.y) = sum;
+    //reinterpret_cast<half&>(y[ib].ds) = ds;
+    //reinterpret_cast<half&>(y[ib].ds.y) = sum;
 }
 template <mmq_q8_1_ds_layout ds_layout>