Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
fairscale
Commits
acb9ef00
Unverified
Commit
acb9ef00
authored
Mar 31, 2021
by
msbaines
Committed by
GitHub
Mar 31, 2021
Browse files
[chore] add testing of torch 1.9.0 nightly build (#559)
parent
daa1bad5
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
73 additions
and
5 deletions
+73
-5
.circleci/config.yml
.circleci/config.yml
+68
-3
tests/nn/data_parallel/test_sharded_ddp_pytorch_parity.py
tests/nn/data_parallel/test_sharded_ddp_pytorch_parity.py
+5
-2
No files found.
.circleci/config.yml
View file @
acb9ef00
...
@@ -121,6 +121,25 @@ install_dep_181: &install_dep_181
...
@@ -121,6 +121,25 @@ install_dep_181: &install_dep_181
python -m torch.utils.collect_env
python -m torch.utils.collect_env
wget -O /home/circleci/venv/check_version.py https://raw.githubusercontent.com/min-xu-ai/check_verion/main/check_version.py
wget -O /home/circleci/venv/check_version.py https://raw.githubusercontent.com/min-xu-ai/check_verion/main/check_version.py
install_dep_190
:
&install_dep_190
-
run
:
name
:
Install Dependencies with torch 1.9.0
command
:
|
# make sure that apt-get retries if needed
sudo sh -c "echo 'APT::Acquire::Retries "3";' > /etc/apt/apt.conf.d/80-retries"
sudo apt-get update -y
sudo apt-get install -y libopenmpi-dev
# check if we have restored venv cache (/home/circleci/venv) correctly, if so, just skip
if [ -f /home/circleci/venv/check_version.py ]; then python /home/circleci/venv/check_version.py torch eq 1.8 && exit 0; fi
# start installing
pip install --pre --progress-bar off torch==1.9.0.dev20210330+cu101 torchvision==0.10.0.dev20210330+cu101 -f https://download.pytorch.org/whl/nightly/cu101/torch_nightly.html
pip install --progress-bar off -r requirements-test.txt
pip install --progress-bar off -r requirements-benchmarks.txt
python -c 'import torch; print("Torch version:", torch.__version__)'
python -c 'import torch; assert torch.__version__.split(".")[:2] == ["1", "9"], "wrong torch version"'
python -m torch.utils.collect_env
wget -O /home/circleci/venv/check_version.py https://raw.githubusercontent.com/min-xu-ai/check_verion/main/check_version.py
install_repo
:
&install_repo
install_repo
:
&install_repo
-
run
:
-
run
:
name
:
Install Repository
name
:
Install Repository
...
@@ -480,6 +499,46 @@ jobs:
...
@@ -480,6 +499,46 @@ jobs:
-
<<
:
*upload_coverage
-
<<
:
*upload_coverage
gpu_tests_190
:
parameters
:
test_list_file
:
type
:
string
default
:
"
/dev/non_exist"
<<
:
*gpu
working_directory
:
~/fairscale
steps
:
-
checkout
-
run
:
nvidia-smi
# Run this to make sure we use python3 from the system.
-
run
:
pyenv global 3.7.0
-
<<
:
*setup_venv
# Cache the venv directory that contains dependencies
-
restore_cache
:
keys
:
-
cache-key-gpu-190-101-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
-
<<
:
*install_dep_190
-
save_cache
:
paths
:
-
~/venv
key
:
cache-key-gpu-190-101-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
-
<<
:
*install_repo
-
run_unittests_from_list
:
test_list_file
:
<<parameters.test_list_file>>
-
store_test_results
:
path
:
test-results
benchmarks_1
:
benchmarks_1
:
<<
:
*gpu
<<
:
*gpu
...
@@ -591,17 +650,23 @@ workflows:
...
@@ -591,17 +650,23 @@ workflows:
test_list_file
:
tests/ci_test_list_1.txt
test_list_file
:
tests/ci_test_list_1.txt
-
gpu_tests_181
:
-
gpu_tests_181
:
test_list_file
:
tests/ci_test_list_1.txt
test_list_file
:
tests/ci_test_list_1.txt
-
gpu_tests_190
:
test_list_file
:
tests/ci_test_list_1.txt
-
gpu_tests_160
:
-
gpu_tests_160
:
test_list_file
:
tests/ci_test_list_2.txt
test_list_file
:
tests/ci_test_list_2.txt
-
gpu_tests_171
:
-
gpu_tests_171
:
test_list_file
:
tests/ci_test_list_2.txt
test_list_file
:
tests/ci_test_list_2.txt
-
gpu_tests_181
:
-
gpu_tests_181
:
test_list_file
:
tests/ci_test_list_2.txt
test_list_file
:
tests/ci_test_list_2.txt
-
gpu_tests_190
:
test_list_file
:
tests/ci_test_list_2.txt
-
gpu_tests_160
:
-
gpu_tests_160
:
test_list_file
:
tests/ci_test_list_3.txt
test_list_file
:
tests/ci_test_list_3.txt
-
gpu_tests_171
:
-
gpu_tests_171
:
test_list_file
:
tests/ci_test_list_3.txt
test_list_file
:
tests/ci_test_list_3.txt
-
gpu_tests_181
:
-
gpu_tests_181
:
test_list_file
:
tests/ci_test_list_3.txt
test_list_file
:
tests/ci_test_list_3.txt
-
gpu_tests_190
:
test_list_file
:
tests/ci_test_list_3.txt
-
benchmarks_1
-
benchmarks_1
-
benchmarks_2
-
benchmarks_2
tests/nn/data_parallel/test_sharded_ddp_pytorch_parity.py
View file @
acb9ef00
...
@@ -23,7 +23,7 @@ from torch.nn.parallel import DistributedDataParallel as DDP
...
@@ -23,7 +23,7 @@ from torch.nn.parallel import DistributedDataParallel as DDP
from
fairscale.nn.data_parallel
import
ShardedDataParallel
from
fairscale.nn.data_parallel
import
ShardedDataParallel
from
fairscale.optim
import
OSS
from
fairscale.optim
import
OSS
from
fairscale.optim.grad_scaler
import
ShardedGradScaler
from
fairscale.optim.grad_scaler
import
ShardedGradScaler
from
fairscale.utils.testing
import
check_same_model_params
,
skip_if_no_cuda
,
skip_if_single_gpu
from
fairscale.utils.testing
import
check_same_model_params
,
skip_if_no_cuda
,
skip_if_single_gpu
,
torch_version
"""
"""
Check that ShardedDDP gets the same results as DDP in a variety of scenarii
Check that ShardedDDP gets the same results as DDP in a variety of scenarii
...
@@ -168,6 +168,9 @@ def run_ddp_parity(
...
@@ -168,6 +168,9 @@ def run_ddp_parity(
# NOTE: DDP does not handle parameters trainability being changed after the fact, see
# NOTE: DDP does not handle parameters trainability being changed after the fact, see
# https://github.com/pytorch/pytorch/blob/5781aec74ef00284e0262817a649278c2e8072bf/torch/nn/parallel/distributed.py#L471
# https://github.com/pytorch/pytorch/blob/5781aec74ef00284e0262817a649278c2e8072bf/torch/nn/parallel/distributed.py#L471
if
clip_grad_norm
and
not
change_train_graph
:
if
clip_grad_norm
and
not
change_train_graph
:
if
torch_version
()
>=
(
1
,
9
,
0
):
total_norm
=
torch
.
nn
.
utils
.
clip_grad_norm_
(
ddp_model
.
parameters
(),
0.3
,
norm_type
=
2.0
,
error_if_nonfinite
=
False
)
# type: ignore
else
:
total_norm
=
torch
.
nn
.
utils
.
clip_grad_norm_
(
ddp_model
.
parameters
(),
0.3
,
norm_type
=
2.0
)
# type: ignore
total_norm
=
torch
.
nn
.
utils
.
clip_grad_norm_
(
ddp_model
.
parameters
(),
0.3
,
norm_type
=
2.0
)
# type: ignore
if
not
torch
.
isnan
(
total_norm
):
if
not
torch
.
isnan
(
total_norm
):
oss_total_norm
=
sharded_optimizer
.
clip_grad_norm
(
0.3
,
norm_type
=
2.0
)
oss_total_norm
=
sharded_optimizer
.
clip_grad_norm
(
0.3
,
norm_type
=
2.0
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment