WIP check

1235f737 · mibaumgartner · 0cd12185 · 1235f737 · 1235f737
Commit 1235f737 authored Apr 30, 2021 by mibaumgartner
Hide whitespace changes
Inline Side-by-side

Showing with 65 additions and 0 deletions

README.md README.md +2 -0

nndet/utils/check.py nndet/utils/check.py +63 -0

No files found.
--- a/README.md
+++ b/README.md
@@ -191,6 +191,8 @@ task: Task000D3_Example
 name: "Example" # [Optional]
 dim: 3 # number of spatial dimensions of the data
+# TODO: check these
 target_class: # define class of interest for patient level evaluations # TODO: check if this should be included
 test_labels: True # manually splitted test set

--- a/nndet/utils/check.py
+++ b/nndet/utils/check.py
+from nndet.io.paths import get_task
+from nndet.utils.config import load_dataset_info
+def _check_key_missing(cfg: dict, key: str, ktype=None):
+    if key not in cfg:
+        raise ValueError(f"Dataset information did not contain "
+                        f"'{key}' key, found {list(cfg.keys())}")
+    if ktype is not None:
+        if not isinstance(cfg[key], ktype):
+            raise ValueError(f"Found {key} of type {type(cfg[key])} in "
+                             f"dataset information but expected type {ktype}")
+def check_dataset_file(task_name: str):
+    """
+    Run a sequence of checks to confirm correct format of dataset information
+    Args:
+        task_name: task identifier to check info for
+    """
+    cfg = load_dataset_info(get_task(task_name))
+    _check_key_missing(cfg, "task", ktype=str)
+    _check_key_missing(cfg, "dim", ktype=int)
+    _check_key_missing(cfg, "labels", ktype=dict)
+    _check_key_missing(cfg, "modalities", ktype=dict)
+    # check dim
+    if dim := cfg["dim"] not in [2, 3]:
+        raise ValueError(f"Found dim {dim} in dataset info but only support dim=2 or dim=3.")
+    # check labels
+    for key, item in cfg["labels"].items():
+        if not isinstance(key, (str, int)):
+            raise ValueError("Expected key of type string in dataset "
+                             f"info labels but found {type(key)} : {key}")
+        if not isinstance(item, (str, int)):
+            raise ValueError("Expected name of type string in dataset "
+                             f"info labels but found {type(item)} : {item}")
+    found_classes = sorted(list(map(int, cfg["labels"].keys())))
+    for ic, idx in enumerate(found_classes):
+        if ic != idx:
+            raise ValueError("Found wrong order of label classes in dataset info."
+                             f"Found {found_classes} but expected {list(range(len(found_classes)))}")
+    # check modalities
+    for key, item in cfg["modalities"].items():
+        if not isinstance(key, (str, int)):
+            raise ValueError("Expected key of type string in dataset "
+                             f"info labels but found {type(key)} : {key}")
+        if not isinstance(item, (str, int)):
+            raise ValueError("Expected name of type string in dataset "
+                             f"info labels but found {type(item)} : {item}")
+    found_mods = sorted(list(map(int, cfg["modalities"].keys())))
+    for ic, idx in enumerate(found_classes):
+        if ic != idx:
+            raise ValueError("Found wrong order of modalities in dataset info."
+                             f"Found {found_mods} but expected {list(range(len(found_mods)))}")
+def check_data_and_label_splitted():
+    pass