Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
c5d7754b
Unverified
Commit
c5d7754b
authored
Nov 09, 2023
by
Hz, Ji
Committed by
GitHub
Nov 09, 2023
Browse files
device-agnostic deepspeed testing (#27342)
parent
9999b739
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
14 additions
and
13 deletions
+14
-13
tests/deepspeed/test_deepspeed.py
tests/deepspeed/test_deepspeed.py
+14
-13
No files found.
tests/deepspeed/test_deepspeed.py
View file @
c5d7754b
...
@@ -38,17 +38,18 @@ from transformers.testing_utils import (
...
@@ -38,17 +38,18 @@ from transformers.testing_utils import (
CaptureStderr
,
CaptureStderr
,
LoggingLevel
,
LoggingLevel
,
TestCasePlus
,
TestCasePlus
,
backend_device_count
,
execute_subprocess_async
,
execute_subprocess_async
,
get_gpu_count
,
mockenv_context
,
mockenv_context
,
require_deepspeed
,
require_deepspeed
,
require_optuna
,
require_optuna
,
require_torch_
gpu
,
require_torch_
accelerator
,
require_torch_multi_
gpu
,
require_torch_multi_
accelerator
,
slow
,
slow
,
torch_device
,
)
)
from
transformers.trainer_utils
import
get_last_checkpoint
,
set_seed
from
transformers.trainer_utils
import
get_last_checkpoint
,
set_seed
from
transformers.utils
import
SAFE_WEIGHTS_NAME
,
is_torch_bf16_
gpu_
available
from
transformers.utils
import
SAFE_WEIGHTS_NAME
,
is_torch_bf16_available
_on_device
if
is_torch_available
():
if
is_torch_available
():
...
@@ -125,7 +126,7 @@ def get_launcher(distributed=False):
...
@@ -125,7 +126,7 @@ def get_launcher(distributed=False):
# - it won't be able to handle that
# - it won't be able to handle that
# 2. for now testing with just 2 gpus max (since some quality tests may give different
# 2. for now testing with just 2 gpus max (since some quality tests may give different
# results with mode gpus because we use very little data)
# results with mode gpus because we use very little data)
num_gpus
=
min
(
2
,
get_gpu_count
(
))
if
distributed
else
1
num_gpus
=
min
(
2
,
backend_device_count
(
torch_device
))
if
distributed
else
1
master_port
=
get_master_port
(
real_launcher
=
True
)
master_port
=
get_master_port
(
real_launcher
=
True
)
return
f
"deepspeed --num_nodes 1 --num_gpus
{
num_gpus
}
--master_port
{
master_port
}
"
.
split
()
return
f
"deepspeed --num_nodes 1 --num_gpus
{
num_gpus
}
--master_port
{
master_port
}
"
.
split
()
...
@@ -145,7 +146,7 @@ optims = [HF_OPTIM, DS_OPTIM]
...
@@ -145,7 +146,7 @@ optims = [HF_OPTIM, DS_OPTIM]
schedulers
=
[
HF_SCHEDULER
,
DS_SCHEDULER
]
schedulers
=
[
HF_SCHEDULER
,
DS_SCHEDULER
]
stages
=
[
ZERO2
,
ZERO3
]
stages
=
[
ZERO2
,
ZERO3
]
if
is_torch_bf16_
gpu_
available
(
):
if
is_torch_bf16_available
_on_device
(
torch_device
):
dtypes
=
[
FP16
,
BF16
]
dtypes
=
[
FP16
,
BF16
]
else
:
else
:
dtypes
=
[
FP16
]
dtypes
=
[
FP16
]
...
@@ -165,7 +166,7 @@ params_with_optims_and_schedulers = list(itertools.product(stages, dtypes, optim
...
@@ -165,7 +166,7 @@ params_with_optims_and_schedulers = list(itertools.product(stages, dtypes, optim
@
require_deepspeed
@
require_deepspeed
@
require_torch_
gpu
@
require_torch_
accelerator
class
CoreIntegrationDeepSpeed
(
TestCasePlus
,
TrainerIntegrationCommon
):
class
CoreIntegrationDeepSpeed
(
TestCasePlus
,
TrainerIntegrationCommon
):
"""
"""
Testing non-Trainer DeepSpeed integration
Testing non-Trainer DeepSpeed integration
...
@@ -273,7 +274,7 @@ class TrainerIntegrationDeepSpeedWithCustomConfig(TestCasePlus):
...
@@ -273,7 +274,7 @@ class TrainerIntegrationDeepSpeedWithCustomConfig(TestCasePlus):
@
require_deepspeed
@
require_deepspeed
@
require_torch_
gpu
@
require_torch_
accelerator
class
TrainerIntegrationDeepSpeed
(
TrainerIntegrationDeepSpeedWithCustomConfig
,
TrainerIntegrationCommon
):
class
TrainerIntegrationDeepSpeed
(
TrainerIntegrationDeepSpeedWithCustomConfig
,
TrainerIntegrationCommon
):
"""
"""
...
@@ -875,7 +876,7 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
...
@@ -875,7 +876,7 @@ class TrainerIntegrationDeepSpeed(TrainerIntegrationDeepSpeedWithCustomConfig, T
@
slow
@
slow
@
require_deepspeed
@
require_deepspeed
@
require_torch_
gpu
@
require_torch_
accelerator
class
TestDeepSpeedWithLauncher
(
TestCasePlus
):
class
TestDeepSpeedWithLauncher
(
TestCasePlus
):
"""This class is for testing via an external script - can do multiple gpus"""
"""This class is for testing via an external script - can do multiple gpus"""
...
@@ -896,7 +897,7 @@ class TestDeepSpeedWithLauncher(TestCasePlus):
...
@@ -896,7 +897,7 @@ class TestDeepSpeedWithLauncher(TestCasePlus):
#
#
@
parameterized
.
expand
(
params
,
name_func
=
parameterized_custom_name_func
)
@
parameterized
.
expand
(
params
,
name_func
=
parameterized_custom_name_func
)
@
require_torch_multi_
gpu
@
require_torch_multi_
accelerator
def
test_basic_distributed
(
self
,
stage
,
dtype
):
def
test_basic_distributed
(
self
,
stage
,
dtype
):
self
.
run_and_check
(
stage
=
stage
,
dtype
=
dtype
,
distributed
=
True
)
self
.
run_and_check
(
stage
=
stage
,
dtype
=
dtype
,
distributed
=
True
)
...
@@ -927,7 +928,7 @@ class TestDeepSpeedWithLauncher(TestCasePlus):
...
@@ -927,7 +928,7 @@ class TestDeepSpeedWithLauncher(TestCasePlus):
)
)
@
parameterized
.
expand
(
params
,
name_func
=
parameterized_custom_name_func
)
@
parameterized
.
expand
(
params
,
name_func
=
parameterized_custom_name_func
)
@
require_torch_multi_
gpu
@
require_torch_multi_
accelerator
def
test_fp32_distributed
(
self
,
stage
,
dtype
):
def
test_fp32_distributed
(
self
,
stage
,
dtype
):
# real model needs too much GPU memory under stage2+fp32, so using tiny random model here -
# real model needs too much GPU memory under stage2+fp32, so using tiny random model here -
# therefore no quality checks, just basic completion checks are done
# therefore no quality checks, just basic completion checks are done
...
@@ -968,9 +969,9 @@ class TestDeepSpeedWithLauncher(TestCasePlus):
...
@@ -968,9 +969,9 @@ class TestDeepSpeedWithLauncher(TestCasePlus):
self
.
do_checks
(
output_dir
,
do_train
=
do_train
,
do_eval
=
do_eval
)
self
.
do_checks
(
output_dir
,
do_train
=
do_train
,
do_eval
=
do_eval
)
@
parameterized
.
expand
([
"bf16"
,
"fp16"
,
"fp32"
])
@
parameterized
.
expand
([
"bf16"
,
"fp16"
,
"fp32"
])
@
require_torch_multi_
gpu
@
require_torch_multi_
accelerator
def
test_inference
(
self
,
dtype
):
def
test_inference
(
self
,
dtype
):
if
dtype
==
"bf16"
and
not
is_torch_bf16_
gpu_
available
(
):
if
dtype
==
"bf16"
and
not
is_torch_bf16_available
_on_device
(
torch_device
):
self
.
skipTest
(
"test requires bfloat16 hardware support"
)
self
.
skipTest
(
"test requires bfloat16 hardware support"
)
# this is just inference, so no optimizer should be loaded
# this is just inference, so no optimizer should be loaded
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment