fix(server): fix quantization

bf7f1d54 · OlivierDehaene · 49a6c8c1 · bf7f1d54 · bf7f1d54 · bf7f1d54
Commit bf7f1d54 authored May 30, 2023 by OlivierDehaene
4 changed files
--- a/server/text_generation_server/models/bloom.py
+++ b/server/text_generation_server/models/bloom.py
@@ -246,9 +246,7 @@ class BLOOMSharded(BLOOM):

                            module.linear = replace_linear(state)
                    elif quantize == "gptq":
-                            raise NotImplementedError(
-                                "`gptq` is not implemented for now"
-                            )
+                        raise NotImplementedError("`gptq` is not implemented for now")
                    elif quantize is None:
                        tensor = tensor.to(device)
                    else:

--- a/server/text_generation_server/models/galactica.py
+++ b/server/text_generation_server/models/galactica.py
@@ -365,9 +365,7 @@ class GalacticaSharded(Galactica):

                            module.linear = replace_linear(state)
                    elif quantize == "gptq":
-                            raise NotImplementedError(
-                                "`gptq` is not implemented for now"
-                            )
+                        raise NotImplementedError("`gptq` is not implemented for now")
                    elif quantize is None:
                        tensor = tensor.to(device)
                    else:

--- a/server/text_generation_server/models/gpt_neox.py
+++ b/server/text_generation_server/models/gpt_neox.py
@@ -211,9 +211,7 @@ class GPTNeoxSharded(CausalLM):

                            module.linear = replace_linear(state)
                    elif quantize == "gptq":
-                            raise NotImplementedError(
-                                "`gptq` is not implemented for now"
-                            )
+                        raise NotImplementedError("`gptq` is not implemented for now")
                    elif quantize is None:
                        tensor = tensor.to(device)
                    else:

--- a/server/text_generation_server/models/t5.py
+++ b/server/text_generation_server/models/t5.py
@@ -224,10 +224,8 @@ class T5Sharded(Seq2SeqLM):
                            module.linear = replace_linear(state)

                    elif quantize == "gptq" and not module_name.endswith("wo"):
-                            raise NotImplementedError(
-                                "`gptq` is not implemented for now"
-                            )
-                        elif quantize is None:
+                        raise NotImplementedError("`gptq` is not implemented for now")
+                    elif quantize is None or module_name.endswith("wo"):
                        tensor = tensor.to(device)
                    else:
                        raise ValueError(f"Unexpected quantize `{quantize}`")