[fix] f16 dequantize device ignored

29f4151e · molamooo · GitHub · cbc47d0b · 29f4151e
Unverified Commit 29f4151e authored Aug 22, 2024 by molamooo Committed by GitHub Aug 22, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

ktransformers/util/custom_gguf.py ktransformers/util/custom_gguf.py +1 -1

No files found.
--- a/ktransformers/util/custom_gguf.py
+++ b/ktransformers/util/custom_gguf.py
@@ -681,7 +681,7 @@ def dequantize_f16_gpu(data, device):
    res = torch.from_numpy(data)
    res_gpu = torch.empty_like(res, device=device)
    res_gpu.copy_(res)
-    return res
+    return res_gpu

 GGML_DEQUANTIZE = {
    "F32": dequantize_f32,