Merge pull request #51 from molamooo/fix-f16-dequantize-device

[fix] f16 dequantize device ignored

Merge pull request #51 from molamooo/fix-f16-dequantize-device
[fix] f16 dequantize device ignored
1f85db3d · UnicornChan · GitHub · cbc47d0b · 29f4151e · 1f85db3d
Unverified Commit 1f85db3d authored Aug 22, 2024 by UnicornChan Committed by GitHub Aug 22, 2024
Show whitespace changes
Inline Side-by-side

Showing with 1 addition and 1 deletion

ktransformers/util/custom_gguf.py ktransformers/util/custom_gguf.py +1 -1

No files found.
--- a/ktransformers/util/custom_gguf.py
+++ b/ktransformers/util/custom_gguf.py
@@ -681,7 +681,7 @@ def dequantize_f16_gpu(data, device):
    res = torch.from_numpy(data)
    res_gpu = torch.empty_like(res, device=device)
    res_gpu.copy_(res)
-    return res
+    return res_gpu

 GGML_DEQUANTIZE = {
    "F32": dequantize_f32,