Commit 4667452a authored by xuxzh1's avatar xuxzh1 🎱
Browse files

opt quantize_q8_1 kernel

parent 1dc4b857
...@@ -58,9 +58,10 @@ static __global__ __launch_bounds__(1024) void quantize_q8_1(const float * __res ...@@ -58,9 +58,10 @@ static __global__ __launch_bounds__(1024) void quantize_q8_1(const float * __res
if (iqs > 0) { if (iqs > 0) {
return; return;
} }
ggml_half2 ds = {d, sum};
reinterpret_cast<half&>(y[ib].ds.x) = d; y[ib].ds = ds;
reinterpret_cast<half&>(y[ib].ds.y) = sum; //reinterpret_cast<half&>(y[ib].ds) = ds;
//reinterpret_cast<half&>(y[ib].ds.y) = sum;
} }
template <mmq_q8_1_ds_layout ds_layout> template <mmq_q8_1_ds_layout ds_layout>
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment