Unverified Commit 9d06c953 authored by Hailey Schoelkopf's avatar Hailey Schoelkopf Committed by GitHub
Browse files

Merge pull request #585 from EleutherAI/fix-quantized-device

Add error handling for calling `.to(device)`
parents 51d6951c 2d27f9e1
......@@ -65,9 +65,12 @@ class HFLM(BaseLM):
revision=revision,
torch_dtype=_get_dtype(dtype),
trust_remote_code=trust_remote_code,
).to(self.device)
self.gpt2.eval()
).eval()
if not load_in_8bit:
try:
self.gpt2.to(self.device)
except:
print("Failed to place model onto specified device. This may be because the model is quantized via `bitsandbytes`. If the desired GPU is being used, this message is safe to ignore.")
self.tokenizer = transformers.AutoTokenizer.from_pretrained(
pretrained if tokenizer is None else tokenizer,
revision=revision,
......
......@@ -235,8 +235,11 @@ class HuggingFaceAutoLM(BaseLM):
# the user specified one so we force `self._device` to be the same as
# `lm_head`'s.
self._device = self.model.hf_device_map["lm_head"]
if not use_accelerate:
self.model.to(self._device)
if not use_accelerate and not (load_in_4bit or load_in_8bit):
try:
self.model.to(self._device)
except:
print("Failed to place model onto specified device. This may be because the model is quantized via `bitsandbytes`. If the desired GPU is being used, this message is safe to ignore.")
def _create_auto_model(
self,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment