Unverified Commit e348806b authored by Min Xu's avatar Min Xu Committed by GitHub
Browse files

[test]: test with py39 + torch 1.8 nightly (#339)

* [test]: test with py39 + torch 1.8 nightly

* version fix

* more fix

* fix version function for nightly version

* fix torch_pg build

* invalidate cache

* separate benchmark requirements

* comment

* fixed mypy

* fixed a test
parent eaee5976
...@@ -50,22 +50,24 @@ setup_venv: &setup_venv ...@@ -50,22 +50,24 @@ setup_venv: &setup_venv
install_dep_151: &install_dep_151 install_dep_151: &install_dep_151
- run: - run:
name: Install Dependencies name: Install Dependencies with torch 1.5.1
command: | command: |
sudo apt-get install -y libopenmpi-dev sudo apt-get install -y libopenmpi-dev
pip install --progress-bar off torch==1.5.1+cu101 torchvision==0.6.1+cu101 -f https://download.pytorch.org/whl/torch_stable.html pip install --progress-bar off torch==1.5.1+cu101 torchvision==0.6.1+cu101 -f https://download.pytorch.org/whl/torch_stable.html
pip install --progress-bar off -r requirements-test.txt pip install --progress-bar off -r requirements-test.txt
pip install --progress-bar off -r requirements-benchmarks.txt
python -c 'import torch; print("Torch version:", torch.__version__)' python -c 'import torch; print("Torch version:", torch.__version__)'
python -c 'import torch; assert torch.__version__.split(".")[:2] == ["1", "5"], "wrong torch version"' python -c 'import torch; assert torch.__version__.split(".")[:2] == ["1", "5"], "wrong torch version"'
python -m torch.utils.collect_env python -m torch.utils.collect_env
install_dep_160: &install_dep_160 install_dep_160: &install_dep_160
- run: - run:
name: Install Dependencies name: Install Dependencies with torch 1.6.0
command: | command: |
sudo apt-get install -y libopenmpi-dev sudo apt-get install -y libopenmpi-dev
pip install --progress-bar off torch==1.6.0+cu101 torchvision==0.7.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html pip install --progress-bar off torch==1.6.0+cu101 torchvision==0.7.0+cu101 -f https://download.pytorch.org/whl/torch_stable.html
pip install --progress-bar off -r requirements-test.txt pip install --progress-bar off -r requirements-test.txt
pip install --progress-bar off -r requirements-benchmarks.txt
pip install --progress-bar off git+https://github.com/msbaines/torch_pg.git@c85c96f#egg=torch-pg pip install --progress-bar off git+https://github.com/msbaines/torch_pg.git@c85c96f#egg=torch-pg
python -c 'import torch; print("Torch version:", torch.__version__)' python -c 'import torch; print("Torch version:", torch.__version__)'
python -c 'import torch; assert torch.__version__.split(".")[:2] == ["1", "6"], "wrong torch version"' python -c 'import torch; assert torch.__version__.split(".")[:2] == ["1", "6"], "wrong torch version"'
...@@ -73,16 +75,31 @@ install_dep_160: &install_dep_160 ...@@ -73,16 +75,31 @@ install_dep_160: &install_dep_160
install_dep_171: &install_dep_171 install_dep_171: &install_dep_171
- run: - run:
name: Install Dependencies name: Install Dependencies with torch 1.7.1
command: | command: |
sudo apt-get install -y libopenmpi-dev sudo apt-get install -y libopenmpi-dev
pip install --progress-bar off torch==1.7.1+cu101 torchvision==0.8.2+cu101 -f https://download.pytorch.org/whl/torch_stable.html pip install --progress-bar off torch==1.7.1+cu101 torchvision==0.8.2+cu101 -f https://download.pytorch.org/whl/torch_stable.html
pip install --progress-bar off -r requirements-test.txt pip install --progress-bar off -r requirements-test.txt
pip install --progress-bar off -r requirements-benchmarks.txt
pip install --progress-bar off git+https://github.com/msbaines/torch_pg.git@c85c96f#egg=torch-pg pip install --progress-bar off git+https://github.com/msbaines/torch_pg.git@c85c96f#egg=torch-pg
python -c 'import torch; print("Torch version:", torch.__version__)' python -c 'import torch; print("Torch version:", torch.__version__)'
python -c 'import torch; assert torch.__version__.split(".")[:2] == ["1", "7"], "wrong torch version"' python -c 'import torch; assert torch.__version__.split(".")[:2] == ["1", "7"], "wrong torch version"'
python -m torch.utils.collect_env python -m torch.utils.collect_env
install_dep_180: &install_dep_180
- run:
name: Install Dependencies with torch 1.8.0 nightly
command: |
sudo apt-get install -y libopenmpi-dev
pip install --pre --progress-bar off torch==1.8.0.dev20210128+cu110 -f https://download.pytorch.org/whl/nightly/cu110/torch_nightly.html
pip install --progress-bar off git+https://github.com/min-xu-ai/torch_pg.git@c723ab4#egg=torch-pg
pip install --progress-bar off -r requirements-test.txt
# TODO: We don't use 180 to run benchmark yet, because torchvision is not yet available on py39.
#pip install --progress-bar off -r requirements-benchmarks.txt
python -c 'import torch; print("Torch version:", torch.__version__)'
python -c 'import torch; assert torch.__version__.split(".")[:2] == ["1", "8"], "wrong torch version"'
python -m torch.utils.collect_env
install_repo_cpu: &install_repo_cpu install_repo_cpu: &install_repo_cpu
- run: - run:
name: Install Repository name: Install Repository
...@@ -267,14 +284,18 @@ jobs: ...@@ -267,14 +284,18 @@ jobs:
# Cache the venv directory that contains dependencies # Cache the venv directory that contains dependencies
- restore_cache: - restore_cache:
keys: keys:
- cache-key-cpu-py39-171-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}} - cache-key-cpu-py39-180-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
- <<: *install_dep_171 # py3.9 doesn't work well with torch < 1.8. See this PR:
# https://github.com/pytorch/pytorch/pull/50998
#
# Therefore, we test py39 with torch 1.8.0.
- <<: *install_dep_180
- save_cache: - save_cache:
paths: paths:
- ~/venv - ~/venv
key: cache-key-cpu-py39-171-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}} key: cache-key-cpu-py39-180-{{ checksum "setup.py"}}-{{ checksum "requirements-test.txt"}}
- <<: *install_repo_cpu - <<: *install_repo_cpu
...@@ -282,12 +303,8 @@ jobs: ...@@ -282,12 +303,8 @@ jobs:
- <<: *run_black - <<: *run_black
- <<: *run_mypy - <<: *run_mypy
- <<: *run_flake8 - <<: *run_flake8
# FIXME: py39 still not stable for us, example: - <<: *run_unittests
# https://app.circleci.com/pipelines/github/facebookresearch/fairscale/1349/workflows/534aae41-e01d-404e-bfc1-fdc58566c39c/jobs/5952 - <<: *run_mpi_unittests
# - <<: *run_unittests
# example:
# https://app.circleci.com/pipelines/github/facebookresearch/fairscale/1350/workflows/26ebd69e-777e-491a-ae12-da3154ef80f9/jobs/5953
# - <<: *run_mpi_unittests
- <<: *run_doc_build - <<: *run_doc_build
- store_test_results: - store_test_results:
......
...@@ -10,6 +10,9 @@ ...@@ -10,6 +10,9 @@
build/ build/
dist/ dist/
# Pytest verbose output
test-results/
# Coverage reports # Coverage reports
.coverage .coverage
.coverage.* .coverage.*
......
...@@ -289,7 +289,7 @@ class AdaScale(Optimizer): ...@@ -289,7 +289,7 @@ class AdaScale(Optimizer):
if pg_idx is not None: if pg_idx is not None:
return self._state["grad_sqr_avg"][pg_idx] return self._state["grad_sqr_avg"][pg_idx]
else: else:
return np.sum(self._state["grad_sqr_avg"]) return float(np.sum(self._state["grad_sqr_avg"]))
def _grad_var_avg(self, pg_idx: Optional[int] = None) -> float: def _grad_var_avg(self, pg_idx: Optional[int] = None) -> float:
""" """
...@@ -307,7 +307,7 @@ class AdaScale(Optimizer): ...@@ -307,7 +307,7 @@ class AdaScale(Optimizer):
if pg_idx is not None: if pg_idx is not None:
return self._state["grad_var_avg"][pg_idx] return self._state["grad_var_avg"][pg_idx]
else: else:
return np.sum(self._state["grad_var_avg"]) return float(np.sum(self._state["grad_var_avg"]))
def gain(self, pg_idx: Optional[int] = None) -> float: def gain(self, pg_idx: Optional[int] = None) -> float:
""" """
...@@ -349,8 +349,8 @@ class AdaScale(Optimizer): ...@@ -349,8 +349,8 @@ class AdaScale(Optimizer):
# after some iterations are done. But, then the if condition # after some iterations are done. But, then the if condition
# below will need to be a np.where. I leave this corner # below will need to be a np.where. I leave this corner
# case to a future exercise. # case to a future exercise.
count = self._state.get(name + "_count", 0) count = self._state.get(name + "_count", np.zeros(1))
count += 1 count[0] += 1
self._state[name + "_count"] = count self._state[name + "_count"] = count
if count < 1 / (1 - self._smoothing): if count < 1 / (1 - self._smoothing):
total = self._state.get(name + "_total", None) total = self._state.get(name + "_total", None)
...@@ -514,8 +514,9 @@ class AdaScale(Optimizer): ...@@ -514,8 +514,9 @@ class AdaScale(Optimizer):
# Extend the states. # Extend the states.
for name in self._state.keys(): for name in self._state.keys():
assert name.startswith("grad_sqr_avg") or name.startswith("grad_var_avg"), name assert name.startswith("grad_sqr_avg") or name.startswith("grad_var_avg"), name
if isinstance(self._state[name], int): if name.endswith("_count"):
# This is the "_count" variable. # This is the "_count" variable, should be a 1D int.
assert self._state[name].shape == (1,), self._state[name].shape
continue continue
# must be a np array, extend it with the right value and check the shape. # must be a np array, extend it with the right value and check the shape.
val = 1 if name == "grad_sqr_avg" else 0 val = 1 if name == "grad_sqr_avg" else 0
......
...@@ -74,9 +74,7 @@ def set_random_seed(seed: int) -> None: ...@@ -74,9 +74,7 @@ def set_random_seed(seed: int) -> None:
def torch_version() -> Tuple[int, ...]: def torch_version() -> Tuple[int, ...]:
numbering = torch.__version__.split("+")[0].split(".") numbering = torch.__version__.split("+")[0].split(".")[:3]
assert len(numbering) == 3
# Catch torch version if run against internal pre-releases, like `1.8.0a0fb`, # Catch torch version if run against internal pre-releases, like `1.8.0a0fb`,
if not numbering[2].isnumeric(): if not numbering[2].isnumeric():
......
# Bring in everything that tests depends on.
-r requirements-test.txt
# Benchmark dependencies.
torchtext == 0.6.0
torchvision >= 0.6.0
timm == 0.3.4
# Core deps.
-r requirements.txt -r requirements.txt
# For pre-commit hooks.
pre-commit pre-commit
# Get core deps.
-r requirements.txt
# Tools for static checking. # Tools for static checking.
black == 19.10b0 black == 19.10b0
flake8 == 3.7.9 flake8 == 3.7.9
isort == 5.6.4 isort == 5.6.4
mypy == 0.790 mypy == 0.790
# Tools for testing & coverage.
# Tools for unit tests & coverage.
pytest == 5.4.1 pytest == 5.4.1
pytest-cov == 2.10.0 pytest-cov == 2.10.0
pytest-mpi == 0.4 pytest-mpi == 0.4
pytest-timeout == 1.4.2 pytest-timeout == 1.4.2
mpi4py == 3.0.3 mpi4py == 3.0.3
# Library dependencies.
torchtext == 0.6.0
torch >= 1.5.1
torchvision >= 0.6.0
timm == 0.3.4
# FairScale should only depends on torch, not things higher level than torch.
torch >= 1.5.1 torch >= 1.5.1
...@@ -203,7 +203,7 @@ def test_add_param_group(debias_ewma): ...@@ -203,7 +203,7 @@ def test_add_param_group(debias_ewma):
model1.weight.copy_(Tensor([1.0, 2.0, 3.0, 4.0]).reshape(2, 2)) model1.weight.copy_(Tensor([1.0, 2.0, 3.0, 4.0]).reshape(2, 2))
model1.bias.fill_(0.1) model1.bias.fill_(0.1)
optim = AdaScale(SGD(model1.parameters(), lr=0.1), num_gradients_to_accumulate=2, debias_ewma=debias_ewma) optim = AdaScale(SGD(model1.parameters(), lr=0.1), num_gradients_to_accumulate=2, debias_ewma=debias_ewma)
assert len(optim._hook_handles) == 2 assert len(optim._hook_handles) == 2, len(optim._hook_handles)
model2 = Linear(2, 3, bias=True) model2 = Linear(2, 3, bias=True)
with torch.no_grad(): with torch.no_grad():
...@@ -211,7 +211,7 @@ def test_add_param_group(debias_ewma): ...@@ -211,7 +211,7 @@ def test_add_param_group(debias_ewma):
model2.weight.copy_(Tensor([1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).reshape(3, 2)) model2.weight.copy_(Tensor([1.0, 2.0, 3.0, 4.0, 5.0, 6.0]).reshape(3, 2))
model2.bias.fill_(0.2) model2.bias.fill_(0.2)
optim.add_param_group({"params": model2.parameters()}) optim.add_param_group({"params": model2.parameters()})
assert len(optim._hook_handles) == 4 assert len(optim._hook_handles) == 4, len(optim._hook_handles)
# make sure we can run the model. # make sure we can run the model.
model = Sequential(model1, model2).cuda() model = Sequential(model1, model2).cuda()
...@@ -238,7 +238,7 @@ def test_add_param_group(debias_ewma): ...@@ -238,7 +238,7 @@ def test_add_param_group(debias_ewma):
model3.weight.copy_(Tensor([1.0, 2.0, 3.0, 4.0, 5.0, 6.0] * 2).reshape(4, 3)) model3.weight.copy_(Tensor([1.0, 2.0, 3.0, 4.0, 5.0, 6.0] * 2).reshape(4, 3))
model3.bias.fill_(0.2) model3.bias.fill_(0.2)
optim.add_param_group({"params": model3.parameters()}) optim.add_param_group({"params": model3.parameters()})
assert len(optim._hook_handles) == 6 assert len(optim._hook_handles) == 6, len(optim._hook_handles)
# make sure we can run the model. # make sure we can run the model.
model = Sequential(model1, model2, model3).cuda() model = Sequential(model1, model2, model3).cuda()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment