# or any other option that preloads model onto device
try:
self.model.to(self.device)
exceptValueError:
eval_logger.debug(
"Failed to place model onto specified device. This may be because the model is quantized via `bitsandbytes` or `device_map` is provided. If the desired GPU is being used, this message is safe to ignore."
)
self.truncation=truncation
self.logits_cache=logits_cache
self.vocab_size=self.tokenizer.vocab_size
...
...
@@ -249,24 +302,15 @@ class HFLM(TemplateLM):
self.batch_size_per_gpu=int(batch_size)
ifisinstance(pretrained,str):
if(gpus>=1orstr(self.device)=="mps"):
# TODO: can remove this whole snippet except in the mps case, perhaps?
# or any other option that preloads model onto device
try:
self.model.to(self.device)
exceptValueError:
eval_logger.debug(
"Failed to place model onto specified device. This may be because the model is quantized via `bitsandbytes` or `device_map` is provided. If the desired GPU is being used, this message is safe to ignore."
)
# multigpu data-parallel support when launched with accelerate
ifgpus>1:
ifparallelizeandaccelerator.num_processes>1:
eval_logger.warning(
"You are both using a HF Accelerate `device_map` and launching via `accelerate launch`. This will attempt to do model and data parallelism depending on the resources available."
)
ifparallelize:
ifaccelerator.num_processes>1:
raiseRuntimeError(
"Attempted to use both a HF Accelerate `device_map` and to launch via `accelerate launch`. If this is the case, please either remove `parallelize=True` from --model_args or launch outside of the Accelerate launcher."
)
else:
pass
elifaccelerator.num_processes==1:
# if we aren't launching via accelerate, ditch
self._rank=0
...
...
@@ -315,77 +359,6 @@ class HFLM(TemplateLM):
f"Loglikelihood prefix token id used in evaluation: {self.prefix_token_id}"
)
def_get_accelerate_args(
self,
parallelize:bool=None,
device_map:Optional[str]="auto",
max_memory_per_gpu:Optional[Union[int,str]]=None,
max_cpu_memory:Optional[Union[int,str]]=None,
offload_folder:Optional[str]="./offload",
gpus:Optional[int]=None,
)->dict:
"""Returns the kwargs needed to apply `accelerate` in `AutoModel.from_pretrained`."""