Unverified Commit c530b623 authored by Philip Meier's avatar Philip Meier Committed by GitHub
Browse files

fix prototype resource loading (#5447)

* fix prototype resource loading

* revert unrelated change
parent e88a5549
...@@ -88,20 +88,30 @@ class OnlineResource(abc.ABC): ...@@ -88,20 +88,30 @@ class OnlineResource(abc.ABC):
root = pathlib.Path(root) root = pathlib.Path(root)
path = root / self.file_name path = root / self.file_name
# Instead of the raw file, there might also be files with fewer suffixes after decompression or directories # Instead of the raw file, there might also be files with fewer suffixes after decompression or directories
# with no suffixes at all. Thus, we look for all paths that share the same name without suffixes as the raw # with no suffixes at all.
# file. stem = path.name.replace("".join(path.suffixes), "")
path_candidates = {file for file in path.parent.glob(path.name.replace("".join(path.suffixes), "") + "*")}
# If we don't find anything, we try to download the raw file. # In a first step, we check for a folder with the same stem as the raw file. If it exists, we use it since
if not path_candidates: # extracted files give the best I/O performance. Note that OnlineResource._extract() makes sure that an archive
path_candidates = {self.download(root, skip_integrity_check=skip_integrity_check)} # is always extracted in a folder with the corresponding file name.
folder_candidate = path.parent / stem
if folder_candidate.exists() and folder_candidate.is_dir():
return self._loader(folder_candidate)
# If there is no folder, we look for all files that share the same stem as the raw file, but might have a
# different suffix.
file_candidates = {file for file in path.parent.glob(stem + ".*")}
# If we don't find anything, we download the raw file.
if not file_candidates:
file_candidates = {self.download(root, skip_integrity_check=skip_integrity_check)}
# If the only thing we find is the raw file, we use it and optionally perform some preprocessing steps. # If the only thing we find is the raw file, we use it and optionally perform some preprocessing steps.
if path_candidates == {path}: if file_candidates == {path}:
if self._preprocess is not None: if self._preprocess is not None:
path = self._preprocess(path) path = self._preprocess(path)
# Otherwise we use the path with the fewest suffixes. This gives us the extracted > decompressed > raw priority # Otherwise, we use the path with the fewest suffixes. This gives us the decompressed > raw priority that we
# that we want. # want for the best I/O performance.
else: else:
path = min(path_candidates, key=lambda path: len(path.suffixes)) path = min(file_candidates, key=lambda path: len(path.suffixes))
return self._loader(path) return self._loader(path)
@abc.abstractmethod @abc.abstractmethod
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment