Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
lm-evaluation-harness
Commits
b8154374
Commit
b8154374
authored
Jul 03, 2023
by
haileyschoelkopf
Browse files
handle device assignment better
parent
d15ee17a
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
34 additions
and
25 deletions
+34
-25
lm_eval/models/huggingface.py
lm_eval/models/huggingface.py
+34
-25
No files found.
lm_eval/models/huggingface.py
View file @
b8154374
...
...
@@ -58,7 +58,7 @@ class HFLM(LM):
def
__init__
(
self
,
device
=
"cuda"
,
device
:
Optional
[
Union
[
str
,
int
]]
=
"cuda"
,
pretrained
=
"gpt2"
,
revision
=
"main"
,
low_cpu_mem_usage
=
None
,
...
...
@@ -82,11 +82,16 @@ class HFLM(LM):
assert
isinstance
(
batch_size
,
int
)
gpus
=
torch
.
cuda
.
device_count
()
accelerator
=
Accelerator
()
if
gpus
<=
1
and
not
parallelize
:
if
not
(
parallelize
or
accelerator
.
num_processes
>
1
)
:
# use user-passed device
device_list
=
set
(
[
"cuda"
,
"cpu"
]
+
[
f
"cuda:
{
i
}
"
for
i
in
range
(
torch
.
cuda
.
device_count
())]
)
if
device
:
if
device
not
in
[
"cuda"
,
"cpu"
]
:
if
device
not
in
device_list
:
device
=
int
(
device
)
self
.
_device
=
torch
.
device
(
device
)
eval_logger
.
info
(
f
"Using device '
{
device
}
'"
)
...
...
@@ -100,7 +105,7 @@ class HFLM(LM):
)
else
:
eval_logger
.
info
(
f
"
Passed device '
{
device
}
', but u
sing `accelerate launch` or `parallelize=True`
. This
will be overridden when placing model."
f
"
U
sing `accelerate launch` or `parallelize=True`
, device '
{
device
}
'
will be overridden when placing model."
)
# TODO: include in warning that `load_in_8bit` etc. affect this too
self
.
_device
=
device
...
...
@@ -162,7 +167,6 @@ class HFLM(LM):
# multigpu data-parallel support when launched with accelerate
if
gpus
>
1
:
accelerator
=
Accelerator
()
if
parallelize
:
if
accelerator
.
num_processes
>
1
:
raise
RuntimeError
(
...
...
@@ -170,34 +174,39 @@ class HFLM(LM):
)
else
:
pass
elif
gpus
>
accelerator
.
num_processes
:
# TODO: make sure there's still never an edge case where we unintentionally default to CPU
eval_logger
.
warning
(
"WARNING: The number of total system GPUs does not match the number of spawned processes. "
"If you would like to use data parallelism, please launch the script "
"with 'accelerate launch *script*'. "
f
"Current run will proceed with
{
accelerator
.
num_processes
}
devices."
)
else
:
if
gpus
>
accelerator
.
num_processes
:
# TODO: make sure there's still never an edge case where we unintentionally default to CPU
eval_logger
.
warning
(
"WARNING: The number of total system GPUs does not match the number of spawned processes. "
"If you would like to use data parallelism, please launch the script "
"with 'accelerate launch *script*'. "
f
"Current run will proceed with
{
accelerator
.
num_processes
}
devices."
)
self
.
_rank
=
accelerator
.
local_process_index
self
.
_world_size
=
accelerator
.
num_processes
# manually set model to use gpu, for case where many GPUs available but
# only seek to use one
self
.
_device
=
(
torch
.
device
(
f
"cuda:
{
accelerator
.
local_process_index
}
"
)
if
torch
.
cuda
.
is_available
()
else
torch
.
device
(
"cpu"
)
)
self
.
model
.
to
(
self
.
device
)
else
:
self
.
_model
=
accelerator
.
prepare
(
self
.
model
)
self
.
_device
=
torch
.
device
(
f
"cuda:
{
accelerator
.
local_process_index
}
"
)
self
.
accelerator
=
accelerator
if
self
.
accelerator
.
is_local_main_process
:
eval_logger
.
info
(
f
"Using
{
gpus
}
devices with data parallelism"
)
self
.
_rank
=
self
.
accelerator
.
local_process_index
self
.
_world_size
=
self
.
accelerator
.
num_processes
# manually set model to use gpu, for case where many GPUs available but
# only seek to use one
# self._device = (
# torch.device(f"cuda:{accelerator.local_process_index}")
# if torch.cuda.is_available()
# else torch.device("cpu")
# )
# self.model.to(self.device)
# else:
# self._model = accelerator.prepare(self.model)
# self._device = torch.device(f"cuda:{accelerator.local_process_index}")
# self.accelerator = accelerator
# self._rank = self.accelerator.local_process_index
# self._world_size = self.accelerator.num_processes
@
property
def
config
(
self
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment