Unverified Commit 990d8b6a authored by Zaida Zhou's avatar Zaida Zhou Committed by GitHub
Browse files

[Fix] Add load_url to handle incompatibility of PyTorch versions (#1377)

* [Fix] Fix torch.load error

* [Fix] Fix torch.load error

* rename _save to _save_ckpt

* add load_url to handle imcompatibility of PyTorch versions

* add unittest for load_url

* fix typo

* print a friendly information when error occurred
parent add157cc
......@@ -47,7 +47,7 @@ jobs:
- name: Run unittests and generate coverage report
run: |
pip install -r requirements/test.txt
pytest tests/ --ignore=tests/test_runner --ignore=tests/test_optimizer.py --ignore=tests/test_cnn --ignore=tests/test_parallel.py --ignore=tests/test_ops --ignore=tests/test_load_model_zoo.py --ignore=tests/test_utils/test_logging.py --ignore=tests/test_image/test_io.py --ignore=tests/test_utils/test_registry.py --ignore=tests/test_utils/test_parrots_jit.py --ignore=tests/test_utils/test_trace.py
pytest tests/ --ignore=tests/test_runner --ignore=tests/test_optimizer.py --ignore=tests/test_cnn --ignore=tests/test_parallel.py --ignore=tests/test_ops --ignore=tests/test_load_model_zoo.py --ignore=tests/test_utils/test_logging.py --ignore=tests/test_image/test_io.py --ignore=tests/test_utils/test_registry.py --ignore=tests/test_utils/test_parrots_jit.py --ignore=tests/test_utils/test_trace.py --ignore=tests/test_utils/test_hub.py
build_without_ops:
runs-on: ubuntu-18.04
......
......@@ -13,13 +13,12 @@ from tempfile import TemporaryDirectory
import torch
import torchvision
from torch.optim import Optimizer
from torch.utils import model_zoo
import mmcv
from ..fileio import FileClient
from ..fileio import load as load_file
from ..parallel import is_module_wrapper
from ..utils import mkdir_or_exist
from ..utils import load_url, mkdir_or_exist
from .dist_utils import get_dist_info
ENV_MMCV_HOME = 'MMCV_HOME'
......@@ -281,12 +280,12 @@ def load_from_http(filename, map_location=None, model_dir=None):
rank, world_size = get_dist_info()
rank = int(os.environ.get('LOCAL_RANK', rank))
if rank == 0:
checkpoint = model_zoo.load_url(
checkpoint = load_url(
filename, model_dir=model_dir, map_location=map_location)
if world_size > 1:
torch.distributed.barrier()
if rank > 0:
checkpoint = model_zoo.load_url(
checkpoint = load_url(
filename, model_dir=model_dir, map_location=map_location)
return checkpoint
......
......@@ -46,6 +46,7 @@ else:
_MaxPoolNd, get_build_config, is_rocm_pytorch, _get_cuda_home)
from .registry import Registry, build_from_cfg
from .trace import is_jit_tracing
from .hub import load_url
__all__ = [
'Config', 'ConfigDict', 'DictAction', 'collect_env', 'get_logger',
'print_log', 'is_str', 'iter_cast', 'list_cast', 'tuple_cast',
......@@ -65,5 +66,5 @@ else:
'assert_dict_has_keys', 'assert_keys_equal', 'assert_is_norm_layer',
'assert_params_all_zeros', 'check_python_script',
'is_method_overridden', 'is_jit_tracing', 'is_rocm_pytorch',
'_get_cuda_home', 'has_method'
'_get_cuda_home', 'load_url', 'has_method'
]
# The 1.6 release of PyTorch switched torch.save to use a new zipfile-based
# file format. It will cause RuntimeError when a checkpoint was saved in
# torch >= 1.6.0 but loaded in torch < 1.7.0.
# More details at https://github.com/open-mmlab/mmpose/issues/904
from .parrots_wrapper import TORCH_VERSION
from .path import mkdir_or_exist
from .version_utils import digit_version
if TORCH_VERSION != 'parrots' and digit_version(TORCH_VERSION) < digit_version(
'1.7.0'):
# Modified from https://github.com/pytorch/pytorch/blob/master/torch/hub.py
import os
import torch
import warnings
from urllib.parse import urlparse
import sys
import zipfile
from torch.hub import download_url_to_file, _get_torch_home, HASH_REGEX
# Hub used to support automatically extracts from zipfile manually
# compressed by users. The legacy zip format expects only one file from
# torch.save() < 1.6 in the zip. We should remove this support since
# zipfile is now default zipfile format for torch.save().
def _is_legacy_zip_format(filename):
if zipfile.is_zipfile(filename):
infolist = zipfile.ZipFile(filename).infolist()
return len(infolist) == 1 and not infolist[0].is_dir()
return False
def _legacy_zip_load(filename, model_dir, map_location):
warnings.warn('Falling back to the old format < 1.6. This support will'
' be deprecated in favor of default zipfile format '
'introduced in 1.6. Please redo torch.save() to save it '
'in the new zipfile format.')
# Note: extractall() defaults to overwrite file if exists. No need to
# clean up beforehand. We deliberately don't handle tarfile here
# since our legacy serialization format was in tar.
# E.g. resnet18-5c106cde.pth which is widely used.
with zipfile.ZipFile(filename) as f:
members = f.infolist()
if len(members) != 1:
raise RuntimeError(
'Only one file(not dir) is allowed in the zipfile')
f.extractall(model_dir)
extraced_name = members[0].filename
extracted_file = os.path.join(model_dir, extraced_name)
return torch.load(extracted_file, map_location=map_location)
def load_url(url,
model_dir=None,
map_location=None,
progress=True,
check_hash=False,
file_name=None):
r"""Loads the Torch serialized object at the given URL.
If downloaded file is a zip file, it will be automatically decompressed
If the object is already present in `model_dir`, it's deserialized and
returned.
The default value of ``model_dir`` is ``<hub_dir>/checkpoints`` where
``hub_dir`` is the directory returned by :func:`~torch.hub.get_dir`.
Args:
url (str): URL of the object to download
model_dir (str, optional): directory in which to save the object
map_location (optional): a function or a dict specifying how to
remap storage locations (see torch.load)
progress (bool, optional): whether or not to display a progress bar
to stderr. Default: True
check_hash(bool, optional): If True, the filename part of the URL
should follow the naming convention ``filename-<sha256>.ext``
where ``<sha256>`` is the first eight or more digits of the
SHA256 hash of the contents of the file. The hash is used to
ensure unique names and to verify the contents of the file.
Default: False
file_name (str, optional): name for the downloaded file. Filename
from ``url`` will be used if not set. Default: None.
Example:
>>> url = ('https://s3.amazonaws.com/pytorch/models/resnet18-5c106'
... 'cde.pth')
>>> state_dict = torch.hub.load_state_dict_from_url(url)
"""
# Issue warning to move data if old env is set
if os.getenv('TORCH_MODEL_ZOO'):
warnings.warn('TORCH_MODEL_ZOO is deprecated, please use env '
'TORCH_HOME instead')
if model_dir is None:
torch_home = _get_torch_home()
model_dir = os.path.join(torch_home, 'checkpoints')
mkdir_or_exist(model_dir)
parts = urlparse(url)
filename = os.path.basename(parts.path)
if file_name is not None:
filename = file_name
cached_file = os.path.join(model_dir, filename)
if not os.path.exists(cached_file):
sys.stderr.write('Downloading: "{}" to {}\n'.format(
url, cached_file))
hash_prefix = None
if check_hash:
r = HASH_REGEX.search(filename) # r is Optional[Match[str]]
hash_prefix = r.group(1) if r else None
download_url_to_file(
url, cached_file, hash_prefix, progress=progress)
if _is_legacy_zip_format(cached_file):
return _legacy_zip_load(cached_file, model_dir, map_location)
try:
return torch.load(cached_file, map_location=map_location)
except RuntimeError as error:
if digit_version(TORCH_VERSION) < digit_version('1.5.0'):
warnings.warn(
f'If the error is the same as "{cached_file} is a zip '
'archive (did you mean to use torch.jit.load()?)", you can'
' upgrade your torch to 1.5.0 or higher (current torch '
f'version is {TORCH_VERSION}). The error was raised '
' because the checkpoint was saved in torch>=1.6.0 but '
'loaded in torch<1.5.')
raise error
else:
from torch.utils.model_zoo import load_url # noqa: F401
......@@ -73,8 +73,8 @@ def load(filepath, map_location=None):
@patch('mmcv.__path__', [osp.join(osp.dirname(__file__), 'data/')])
@patch('mmcv.runner.checkpoint.load_from_http', load_from_http)
@patch('mmcv.runner.checkpoint.load_url', load_url)
@patch('torch.load', load)
@patch('torch.utils.model_zoo.load_url', load_url)
def test_load_external_url():
# test modelzoo://
url = _load_checkpoint('modelzoo://resnet50')
......@@ -128,7 +128,7 @@ def test_load_external_url():
os.environ[ENV_MMCV_HOME] = mmcv_home
url = _load_checkpoint('open-mmlab://train')
assert url == 'url:https://localhost/train.pth'
with pytest.raises(IOError, match='train.pth is not a checkpoint ' 'file'):
with pytest.raises(IOError, match='train.pth is not a checkpoint file'):
_load_checkpoint('open-mmlab://train_empty')
url = _load_checkpoint('open-mmlab://test')
assert url == f'local:{osp.join(_get_mmcv_home(), "test.pth")}'
......@@ -140,7 +140,7 @@ def test_load_external_url():
assert url == 'url:http://localhost/train.pth'
# test local file
with pytest.raises(IOError, match='train.pth is not a checkpoint ' 'file'):
with pytest.raises(IOError, match='train.pth is not a checkpoint file'):
_load_checkpoint('train.pth')
url = _load_checkpoint(osp.join(_get_mmcv_home(), 'test.pth'))
assert url == f'local:{osp.join(_get_mmcv_home(), "test.pth")}'
import pytest
from torch.utils import model_zoo
from mmcv.utils import TORCH_VERSION, digit_version, load_url
def test_load_url():
url1 = 'https://download.openmmlab.com/mmcv/test_data/saved_in_pt1.5.pth'
url2 = 'https://download.openmmlab.com/mmcv/test_data/saved_in_pt1.6.pth'
# The 1.6 release of PyTorch switched torch.save to use a new zipfile-based
# file format. It will cause RuntimeError when a checkpoint was saved in
# torch >= 1.6.0 but loaded in torch < 1.7.0.
# More details at https://github.com/open-mmlab/mmpose/issues/904
if digit_version(TORCH_VERSION) < digit_version('1.7.0'):
model_zoo.load_url(url1)
with pytest.raises(RuntimeError):
model_zoo.load_url(url2)
else:
# high version of PyTorch can load checkpoints from url, regardless
# of which version they were saved in
model_zoo.load_url(url1)
model_zoo.load_url(url2)
load_url(url1)
# if a checkpoint was saved in torch >= 1.6.0 but loaded in torch < 1.5.0,
# it will raise a RuntimeError
if digit_version(TORCH_VERSION) < digit_version('1.5.0'):
with pytest.raises(RuntimeError):
load_url(url2)
else:
load_url(url2)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment