add error handling for moving model to device

2d27f9e1 · haileyschoelkopf · 4c08d72a · 2d27f9e1 · 2d27f9e1
Commit 2d27f9e1 authored Jun 13, 2023 by haileyschoelkopf
Show whitespace changes
Inline Side-by-side

Showing with 11 additions and 5 deletions

lm_eval/models/gpt2.py lm_eval/models/gpt2.py +6 -3

lm_eval/models/huggingface.py lm_eval/models/huggingface.py +5 -2

No files found.
--- a/lm_eval/models/gpt2.py
+++ b/lm_eval/models/gpt2.py
@@ -65,9 +65,12 @@ class HFLM(BaseLM):
            revision=revision,
            torch_dtype=_get_dtype(dtype),
            trust_remote_code=trust_remote_code,
-        ).to(self.device)
+        ).eval()
-        self.gpt2.eval()
+        if not load_in_8bit:
+            try:
+                self.gpt2.to(self.device)
+            except:
+                print("Failed to place model onto specified device. This may be because the model is quantized via `bitsandbytes`. If the desired GPU is being used, this message is safe to ignore.")
        self.tokenizer = transformers.AutoTokenizer.from_pretrained(
            pretrained if tokenizer is None else tokenizer,
            revision=revision,

--- a/lm_eval/models/huggingface.py
+++ b/lm_eval/models/huggingface.py
@@ -235,8 +235,11 @@ class HuggingFaceAutoLM(BaseLM):
            # the user specified one so we force `self._device` to be the same as
            # `lm_head`'s.
            self._device = self.model.hf_device_map["lm_head"]
-        if not use_accelerate:
+        if not use_accelerate and not (load_in_4bit or load_in_8bit):
+            try:
                self.model.to(self._device)
+            except:
+                print("Failed to place model onto specified device. This may be because the model is quantized via `bitsandbytes`. If the desired GPU is being used, this message is safe to ignore.")
    def _create_auto_model(
        self,