fix(server): Fixing non parameters in quantize script `bigcode/starcoder` was an example. (#661)

08b8eec1 · Nicolas Patry · GitHub · 362883f2 · 08b8eec1
Unverified Commit 08b8eec1 authored Jul 20, 2023 by Nicolas Patry Committed by GitHub Jul 20, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 4 additions and 1 deletion

server/text_generation_server/utils/gptq/quantize.py server/text_generation_server/utils/gptq/quantize.py +4 -1

No files found.
--- a/server/text_generation_server/utils/gptq/quantize.py
+++ b/server/text_generation_server/utils/gptq/quantize.py
@@ -812,10 +812,13 @@ def load_weights_pre_hook(module_name, weights, recursive=False):
                tensor = weights.get_tensor(tensor_name)
                setdeepattr(module, local_param, nn.Parameter(tensor))
            else:
+                tensor = current_tensor.to(device=torch.device("cuda:0"))
+                if current_tensor.requires_grad:
+                    tensor = nn.Parameter(tensor)
                setdeepattr(
                    module,
                    local_param,
-                    nn.Parameter(current_tensor.to(device=torch.device("cuda:0"))),
+                    tensor
                )
    return inner