Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
OpenDAS
fairscale
Commits
acb9ef00
Unverified
Commit
acb9ef00
authored
Mar 31, 2021
by
msbaines
Committed by
GitHub
Mar 31, 2021
Browse files
[chore] add testing of torch 1.9.0 nightly build (#559)
parent
daa1bad5
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
73 additions
and
5 deletions
+73
-5
.circleci/config.yml
.circleci/config.yml
+68
-3
tests/nn/data_parallel/test_sharded_ddp_pytorch_parity.py
tests/nn/data_parallel/test_sharded_ddp_pytorch_parity.py
+5
-2
No files found.
.circleci/config.yml
View file @
acb9ef00
...
...
@@ -121,6 +121,25 @@ install_dep_181: &install_dep_181
python -m torch.utils.collect_env
wget -O /home/circleci/venv/check_version.py https://raw.githubusercontent.com/min-xu-ai/check_verion/main/check_version.py
install_dep_190
:
&install_dep_190
-
run
:
name
:
Install Dependencies with torch 1.9.0
command
:
|
# make sure that apt-get retries if needed
sudo sh -c "echo 'APT::Acquire::Retries "3";' > /etc/apt/apt.conf.d/80-retries"
sudo apt-get update -y
sudo apt-get install -y libopenmpi-dev
# check if we have restored venv cache (/home/circleci/venv) correctly, if so, just skip
if [ -f /home/circleci/venv/check_version.py ]; then python /home/circleci/venv/check_version.py torch eq 1.8 && exit 0; fi
# start installing
pip install --pre --progress-bar off torch==1.9.0.dev20210330+cu101 torchvision==0.10.0.dev20210330+cu101 -f https://download.pytorch.org/whl/nightly/cu101/torch_nightly.html
pip install --progress-bar off -r requirements-test.txt
pip install --progress-bar off -r requirements-benchmarks.txt
python -c 'import torch; print("Torch version:", torch.__version__)'
python -c 'import torch; assert torch.__version__.split(".")[:2] == ["1", "9"], "wrong torch version"'
python -m torch.utils.collect_env
wget -O /home/circleci/venv/check_version.py https://raw.githubusercontent.com/min-xu-ai/check_verion/main/check_version.py
install_repo
:
&install_repo
-
run
:
name
:
Install Repository
...
...
@@ -393,7 +412,7 @@ jobs:
-
store_test_results
:
path
:
test-results
-
<<
:
*upload_coverage
gpu_tests_171
:
...
...
@@ -435,7 +454,7 @@ jobs:
-
store_test_results
:
path
:
test-results
-
<<
:
*upload_coverage
gpu_tests_181
:
...
...
@@ -477,9 +496,49 @@ jobs:
-
store_test_results
:
path
:
test-results
-
<<
:
*upload_coverage
gpu_tests_190
:
parameters
:
test_list_file
:
type
:
string
default
:
"
/dev/non_exist"
<<
:
*gpu
working_directory
:
~/fairscale
steps
:
-
checkout
-
run
:
nvidia-smi
# Run this to make sure we use python3 from the system.
-
run
:
pyenv global 3.7.0
-
<<
:
*setup_venv
# Cache the venv directory that contains dependencies
-
restore_cache
:
keys
:
-
cache-key-gpu-190-101-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
-
<<
:
*install_dep_190
-
save_cache
:
paths
:
-
~/venv
key
:
cache-key-gpu-190-101-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
-
<<
:
*install_repo
-
run_unittests_from_list
:
test_list_file
:
<<parameters.test_list_file>>
-
store_test_results
:
path
:
test-results
benchmarks_1
:
<<
:
*gpu
...
...
@@ -591,17 +650,23 @@ workflows:
test_list_file
:
tests/ci_test_list_1.txt
-
gpu_tests_181
:
test_list_file
:
tests/ci_test_list_1.txt
-
gpu_tests_190
:
test_list_file
:
tests/ci_test_list_1.txt
-
gpu_tests_160
:
test_list_file
:
tests/ci_test_list_2.txt
-
gpu_tests_171
:
test_list_file
:
tests/ci_test_list_2.txt
-
gpu_tests_181
:
test_list_file
:
tests/ci_test_list_2.txt
-
gpu_tests_190
:
test_list_file
:
tests/ci_test_list_2.txt
-
gpu_tests_160
:
test_list_file
:
tests/ci_test_list_3.txt
-
gpu_tests_171
:
test_list_file
:
tests/ci_test_list_3.txt
-
gpu_tests_181
:
test_list_file
:
tests/ci_test_list_3.txt
-
gpu_tests_190
:
test_list_file
:
tests/ci_test_list_3.txt
-
benchmarks_1
-
benchmarks_2
tests/nn/data_parallel/test_sharded_ddp_pytorch_parity.py
View file @
acb9ef00
...
...
@@ -23,7 +23,7 @@ from torch.nn.parallel import DistributedDataParallel as DDP
from
fairscale.nn.data_parallel
import
ShardedDataParallel
from
fairscale.optim
import
OSS
from
fairscale.optim.grad_scaler
import
ShardedGradScaler
from
fairscale.utils.testing
import
check_same_model_params
,
skip_if_no_cuda
,
skip_if_single_gpu
from
fairscale.utils.testing
import
check_same_model_params
,
skip_if_no_cuda
,
skip_if_single_gpu
,
torch_version
"""
Check that ShardedDDP gets the same results as DDP in a variety of scenarii
...
...
@@ -168,7 +168,10 @@ def run_ddp_parity(
# NOTE: DDP does not handle parameters trainability being changed after the fact, see
# https://github.com/pytorch/pytorch/blob/5781aec74ef00284e0262817a649278c2e8072bf/torch/nn/parallel/distributed.py#L471
if
clip_grad_norm
and
not
change_train_graph
:
total_norm
=
torch
.
nn
.
utils
.
clip_grad_norm_
(
ddp_model
.
parameters
(),
0.3
,
norm_type
=
2.0
)
# type: ignore
if
torch_version
()
>=
(
1
,
9
,
0
):
total_norm
=
torch
.
nn
.
utils
.
clip_grad_norm_
(
ddp_model
.
parameters
(),
0.3
,
norm_type
=
2.0
,
error_if_nonfinite
=
False
)
# type: ignore
else
:
total_norm
=
torch
.
nn
.
utils
.
clip_grad_norm_
(
ddp_model
.
parameters
(),
0.3
,
norm_type
=
2.0
)
# type: ignore
if
not
torch
.
isnan
(
total_norm
):
oss_total_norm
=
sharded_optimizer
.
clip_grad_norm
(
0.3
,
norm_type
=
2.0
)
allclose
=
torch
.
allclose
(
oss_total_norm
,
total_norm
,
atol
=
1e-2
if
amp
else
1e-8
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment