Unverified Commit 29f38f17 authored by Philip Meier's avatar Philip Meier Committed by GitHub
Browse files

Download model weights in parallel for prototype CI (#4772)

* enable caching of model weights for prototype CI

* syntax

* syntax

* make cache dir dynamic

* increase verbosity

* fix

* use larget CI machine

* revert debug output

* [DEBUG] test env var usage in save_cache

* retry

* use checksum for caching

* remove env vars because expansion is not working

* syntax

* cleanup

* base caching on model-urls

* relax regex

* cleanup skips

* cleanup

* fix skipping logic

* improve step name

* benchmark without caching

* benchmark with external download

* debug

* fix manual download location

* debug again

* download weights in the background

* try parallel download

* add missing import

* use correct decoractor

* up resource_class

* fix wording

* enable stdout passthrough to see download during test

* remove linebreak

* move checkout up

* cleanup

* debug failing test

* temp fix

* fix

* cleanup

* fix regex

* remove explicit install of numpy
parent cca16993
...@@ -263,14 +263,23 @@ jobs: ...@@ -263,14 +263,23 @@ jobs:
prototype_test: prototype_test:
docker: docker:
- image: circleci/python:3.7 - image: circleci/python:3.7
resource_class: xlarge
steps: steps:
- run: - run:
name: Install torch name: Install torch
command: pip install --user --progress-bar=off --pre torch -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html command: |
pip install --user --progress-bar=off --pre torch -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
- run: - run:
name: Install prototype dependencies name: Install prototype dependencies
command: pip install --user --progress-bar=off git+https://github.com/pytorch/data.git command: pip install --user --progress-bar=off git+https://github.com/pytorch/data.git
- checkout - checkout
- run:
name: Download model weights
background: true
command: |
sudo apt update -qy && sudo apt install -qy parallel wget
python scripts/collect_model_urls.py torchvision/prototype/models \
| parallel -j0 wget --no-verbose -P ~/.cache/torch/hub/checkpoints {}
- run: - run:
name: Install torchvision name: Install torchvision
command: pip install --user --progress-bar off --no-build-isolation . command: pip install --user --progress-bar off --no-build-isolation .
...@@ -279,6 +288,8 @@ jobs: ...@@ -279,6 +288,8 @@ jobs:
command: pip install --user --progress-bar=off pytest pytest-mock scipy iopath command: pip install --user --progress-bar=off pytest pytest-mock scipy iopath
- run: - run:
name: Run tests name: Run tests
environment:
PYTORCH_TEST_WITH_PROTOTYPE: 1
command: pytest --junitxml=test-results/junit.xml -v --durations 20 test/test_prototype_*.py command: pytest --junitxml=test-results/junit.xml -v --durations 20 test/test_prototype_*.py
- store_test_results: - store_test_results:
path: test-results path: test-results
......
...@@ -263,14 +263,23 @@ jobs: ...@@ -263,14 +263,23 @@ jobs:
prototype_test: prototype_test:
docker: docker:
- image: circleci/python:3.7 - image: circleci/python:3.7
resource_class: xlarge
steps: steps:
- run: - run:
name: Install torch name: Install torch
command: pip install --user --progress-bar=off --pre torch -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html command: |
pip install --user --progress-bar=off --pre torch -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html
- run: - run:
name: Install prototype dependencies name: Install prototype dependencies
command: pip install --user --progress-bar=off git+https://github.com/pytorch/data.git command: pip install --user --progress-bar=off git+https://github.com/pytorch/data.git
- checkout - checkout
- run:
name: Download model weights
background: true
command: |
sudo apt update -qy && sudo apt install -qy parallel wget
python scripts/collect_model_urls.py torchvision/prototype/models \
| parallel -j0 wget --no-verbose -P ~/.cache/torch/hub/checkpoints {}
- run: - run:
name: Install torchvision name: Install torchvision
command: pip install --user --progress-bar off --no-build-isolation . command: pip install --user --progress-bar off --no-build-isolation .
...@@ -279,6 +288,8 @@ jobs: ...@@ -279,6 +288,8 @@ jobs:
command: pip install --user --progress-bar=off pytest pytest-mock scipy iopath command: pip install --user --progress-bar=off pytest pytest-mock scipy iopath
- run: - run:
name: Run tests name: Run tests
environment:
PYTORCH_TEST_WITH_PROTOTYPE: 1
command: pytest --junitxml=test-results/junit.xml -v --durations 20 test/test_prototype_*.py command: pytest --junitxml=test-results/junit.xml -v --durations 20 test/test_prototype_*.py
- store_test_results: - store_test_results:
path: test-results path: test-results
......
import pathlib
import re
import sys
MODEL_URL_PATTERN = re.compile(r"https://download[.]pytorch[.]org/models/.*?[.]pth")
def main(root):
model_urls = set()
for path in pathlib.Path(root).glob("**/*"):
if path.name.startswith("_") or not path.suffix == ".py":
continue
with open(path, "r") as file:
for line in file:
model_urls.update(MODEL_URL_PATTERN.findall(line))
print("\n".join(sorted(model_urls)))
if __name__ == "__main__":
main(sys.argv[1])
...@@ -4,8 +4,10 @@ import os ...@@ -4,8 +4,10 @@ import os
import random import random
import shutil import shutil
import tempfile import tempfile
from distutils.util import strtobool
import numpy as np import numpy as np
import pytest
import torch import torch
from PIL import Image from PIL import Image
from torchvision import io from torchvision import io
...@@ -13,9 +15,18 @@ from torchvision import io ...@@ -13,9 +15,18 @@ from torchvision import io
import __main__ # noqa: 401 import __main__ # noqa: 401
IN_CIRCLE_CI = os.getenv("CIRCLECI", False) == "true" def get_bool_env_var(name, *, exist_ok=False, default=False):
IN_RE_WORKER = os.environ.get("INSIDE_RE_WORKER") is not None value = os.getenv(name)
IN_FBCODE = os.environ.get("IN_FBCODE_TORCHVISION") == "1" if value is None:
return default
if exist_ok:
return True
return bool(strtobool(value))
IN_CIRCLE_CI = get_bool_env_var("CIRCLECI")
IN_RE_WORKER = get_bool_env_var("INSIDE_RE_WORKER", exist_ok=True)
IN_FBCODE = get_bool_env_var("IN_FBCODE_TORCHVISION")
CUDA_NOT_AVAILABLE_MSG = "CUDA device not available" CUDA_NOT_AVAILABLE_MSG = "CUDA device not available"
CIRCLECI_GPU_NO_CUDA_MSG = "We're in a CircleCI GPU machine, and this test doesn't need cuda." CIRCLECI_GPU_NO_CUDA_MSG = "We're in a CircleCI GPU machine, and this test doesn't need cuda."
...@@ -202,3 +213,7 @@ def _test_fn_on_batch(batch_tensors, fn, scripted_fn_atol=1e-8, **fn_kwargs): ...@@ -202,3 +213,7 @@ def _test_fn_on_batch(batch_tensors, fn, scripted_fn_atol=1e-8, **fn_kwargs):
# scriptable function test # scriptable function test
s_transformed_batch = scripted_fn(batch_tensors, **fn_kwargs) s_transformed_batch = scripted_fn(batch_tensors, **fn_kwargs)
torch.testing.assert_close(transformed_batch, s_transformed_batch, rtol=1e-5, atol=scripted_fn_atol) torch.testing.assert_close(transformed_batch, s_transformed_batch, rtol=1e-5, atol=scripted_fn_atol)
def run_on_env_var(name, *, skip_reason=None, exist_ok=False, default=False):
return pytest.mark.skipif(not get_bool_env_var(name, exist_ok=exist_ok, default=default), reason=skip_reason)
import importlib import importlib
import os
import pytest import pytest
import test_models as TM import test_models as TM
import torch import torch
from common_utils import cpu_and_gpu from common_utils import cpu_and_gpu, run_on_env_var
from torchvision.prototype import models from torchvision.prototype import models
run_if_test_with_prototype = run_on_env_var(
"PYTORCH_TEST_WITH_PROTOTYPE",
skip_reason="Prototype tests are disabled by default. Set PYTORCH_TEST_WITH_PROTOTYPE=1 to run them.",
)
def _get_original_model(model_fn): def _get_original_model(model_fn):
original_module_name = model_fn.__module__.replace(".prototype", "") original_module_name = model_fn.__module__.replace(".prototype", "")
...@@ -48,34 +52,34 @@ def test_get_weight(model_fn, name, weight): ...@@ -48,34 +52,34 @@ def test_get_weight(model_fn, name, weight):
@pytest.mark.parametrize("model_fn", TM.get_models_from_module(models)) @pytest.mark.parametrize("model_fn", TM.get_models_from_module(models))
@pytest.mark.parametrize("dev", cpu_and_gpu()) @pytest.mark.parametrize("dev", cpu_and_gpu())
@pytest.mark.skipif(os.getenv("PYTORCH_TEST_WITH_PROTOTYPE", "0") == "0", reason="Prototype code tests are disabled") @run_if_test_with_prototype
def test_classification_model(model_fn, dev): def test_classification_model(model_fn, dev):
TM.test_classification_model(model_fn, dev) TM.test_classification_model(model_fn, dev)
@pytest.mark.parametrize("model_fn", TM.get_models_from_module(models.detection)) @pytest.mark.parametrize("model_fn", TM.get_models_from_module(models.detection))
@pytest.mark.parametrize("dev", cpu_and_gpu()) @pytest.mark.parametrize("dev", cpu_and_gpu())
@pytest.mark.skipif(os.getenv("PYTORCH_TEST_WITH_PROTOTYPE", "0") == "0", reason="Prototype code tests are disabled") @run_if_test_with_prototype
def test_detection_model(model_fn, dev): def test_detection_model(model_fn, dev):
TM.test_detection_model(model_fn, dev) TM.test_detection_model(model_fn, dev)
@pytest.mark.parametrize("model_fn", TM.get_models_from_module(models.quantization)) @pytest.mark.parametrize("model_fn", TM.get_models_from_module(models.quantization))
@pytest.mark.skipif(os.getenv("PYTORCH_TEST_WITH_PROTOTYPE", "0") == "0", reason="Prototype code tests are disabled") @run_if_test_with_prototype
def test_quantized_classification_model(model_fn): def test_quantized_classification_model(model_fn):
TM.test_quantized_classification_model(model_fn) TM.test_quantized_classification_model(model_fn)
@pytest.mark.parametrize("model_fn", TM.get_models_from_module(models.segmentation)) @pytest.mark.parametrize("model_fn", TM.get_models_from_module(models.segmentation))
@pytest.mark.parametrize("dev", cpu_and_gpu()) @pytest.mark.parametrize("dev", cpu_and_gpu())
@pytest.mark.skipif(os.getenv("PYTORCH_TEST_WITH_PROTOTYPE", "0") == "0", reason="Prototype code tests are disabled") @run_if_test_with_prototype
def test_segmentation_model(model_fn, dev): def test_segmentation_model(model_fn, dev):
TM.test_segmentation_model(model_fn, dev) TM.test_segmentation_model(model_fn, dev)
@pytest.mark.parametrize("model_fn", TM.get_models_from_module(models.video)) @pytest.mark.parametrize("model_fn", TM.get_models_from_module(models.video))
@pytest.mark.parametrize("dev", cpu_and_gpu()) @pytest.mark.parametrize("dev", cpu_and_gpu())
@pytest.mark.skipif(os.getenv("PYTORCH_TEST_WITH_PROTOTYPE", "0") == "0", reason="Prototype code tests are disabled") @run_if_test_with_prototype
def test_video_model(model_fn, dev): def test_video_model(model_fn, dev):
TM.test_video_model(model_fn, dev) TM.test_video_model(model_fn, dev)
...@@ -89,7 +93,7 @@ def test_video_model(model_fn, dev): ...@@ -89,7 +93,7 @@ def test_video_model(model_fn, dev):
+ get_models_with_module_names(models.video), + get_models_with_module_names(models.video),
) )
@pytest.mark.parametrize("dev", cpu_and_gpu()) @pytest.mark.parametrize("dev", cpu_and_gpu())
@pytest.mark.skipif(os.getenv("PYTORCH_TEST_WITH_PROTOTYPE", "0") == "0", reason="Prototype code tests are disabled") @run_if_test_with_prototype
def test_old_vs_new_factory(model_fn, module_name, dev): def test_old_vs_new_factory(model_fn, module_name, dev):
defaults = { defaults = {
"models": { "models": {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment