Merge pull request #585 from EleutherAI/fix-quantized-device

Add error handling for calling `.to(device)`

Merge pull request #585 from EleutherAI/fix-quantized-device
Add error handling for calling `.to(device)`
9d06c953 · Hailey Schoelkopf · GitHub · 51d6951c · 2d27f9e1 · 9d06c953
Unverified Commit 9d06c953 authored Jun 13, 2023 by Hailey Schoelkopf Committed by GitHub Jun 13, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 11 additions and 5 deletions

lm_eval/models/gpt2.py lm_eval/models/gpt2.py +6 -3

lm_eval/models/huggingface.py lm_eval/models/huggingface.py +5 -2

No files found.
--- a/lm_eval/models/gpt2.py
+++ b/lm_eval/models/gpt2.py
@@ -65,9 +65,12 @@ class HFLM(BaseLM):
            revision=revision,
            torch_dtype=_get_dtype(dtype),
            trust_remote_code=trust_remote_code,
-        ).to(self.device)
-        self.gpt2.eval()
-
+        ).eval()
+        if not load_in_8bit:
+            try:
+                self.gpt2.to(self.device)
+            except:
+                print("Failed to place model onto specified device. This may be because the model is quantized via `bitsandbytes`. If the desired GPU is being used, this message is safe to ignore.")
        self.tokenizer = transformers.AutoTokenizer.from_pretrained(
            pretrained if tokenizer is None else tokenizer,
            revision=revision,

--- a/lm_eval/models/huggingface.py
+++ b/lm_eval/models/huggingface.py
@@ -235,8 +235,11 @@ class HuggingFaceAutoLM(BaseLM):
            # the user specified one so we force `self._device` to be the same as
            # `lm_head`'s.
            self._device = self.model.hf_device_map["lm_head"]
-        if not use_accelerate:
-            self.model.to(self._device)
+        if not use_accelerate and not (load_in_4bit or load_in_8bit):
+            try:
+                self.model.to(self._device)
+            except:
+                print("Failed to place model onto specified device. This may be because the model is quantized via `bitsandbytes`. If the desired GPU is being used, this message is safe to ignore.")

    def _create_auto_model(
        self,