add transformers gptq support (#963)

Proposal to fix https://github.com/huggingface/text-generation-inference/issues/962

add transformers gptq support (#963)
Proposal to fix https://github.com/huggingface/text-generation-inference/issues/962
b03d2621 · Florian Zimmermeister · GitHub · 935a77fb · b03d2621
Unverified Commit b03d2621 authored Sep 07, 2023 by Florian Zimmermeister Committed by GitHub Sep 07, 2023
Show whitespace changes
Inline Side-by-side

Showing with 15 additions and 4 deletions

server/text_generation_server/utils/weights.py server/text_generation_server/utils/weights.py +15 -4

No files found.
--- a/server/text_generation_server/utils/weights.py
+++ b/server/text_generation_server/utils/weights.py
@@ -223,6 +223,17 @@ class Weights:
        return bits, groupsize

    def _set_gptq_params(self, model_id):
+        filename = "config.json"
+        try:
+            if os.path.exists(os.path.join(model_id, filename)):
+                filename = os.path.join(model_id, filename)
+            else:
+                filename = hf_hub_download(model_id, filename=filename)
+            with open(filename, "r") as f:
+                data = json.load(f)
+            self.gptq_bits = data["quantization_config"]["bits"]
+            self.gptq_groupsize = data["quantization_config"]["group_size"]
+        except Exception:
            filename = "quantize_config.json"
            try:
                if os.path.exists(os.path.join(model_id, filename)):