Allow processing input instance segmentation labels with float values

e708c342 · Andres Martinez Mora · d26ba9a9 · e708c342
Commit e708c342 authored Oct 23, 2025 by Andres Martinez Mora
Hide whitespace changes
Inline Side-by-side

Showing with 65 additions and 44 deletions

nndet/io/load.py nndet/io/load.py +65 -44

No files found.
--- a/nndet/io/load.py
+++ b/nndet/io/load.py
@@ -34,14 +34,23 @@ from loguru import logger
 from nndet.io.paths import subfiles, Pathlike


-__all__ = ["load_case_cropped", "load_case_from_list",
-           "load_properties_of_cropped", "npy_dataset",
-           "load_pickle", "load_json", "save_json", "save_pickle",
-           "save_yaml", "load_npz_looped",
-           ]
-
-
-def load_case_from_list(data_files, seg_file=None) -> Tuple[np.ndarray, np.ndarray, dict]:
+__all__ = [
+    "load_case_cropped",
+    "load_case_from_list",
+    "load_properties_of_cropped",
+    "npy_dataset",
+    "load_pickle",
+    "load_json",
+    "save_json",
+    "save_pickle",
+    "save_yaml",
+    "load_npz_looped",
+]
+
+
+def load_case_from_list(
+    data_files, seg_file=None
+) -> Tuple[np.ndarray, np.ndarray, dict]:
    """
    Load data and label of one case from list of paths

@@ -88,7 +97,9 @@ def load_case_from_list(data_files, seg_file=None) -> Tuple[np.ndarray, np.ndarr

            # cast instances to correct type
            properties_json["instances"] = {
-                str(key): int(item) for key, item in properties_json["instances"].items()}
+                str(key): int(item)
+                for key, item in properties_json["instances"].items()
+            }

            properties.update(properties_json)
    else:
@@ -100,52 +111,62 @@ def load_properties_of_cropped(path: Path):
    """
    Load property file of after cropping was performed
    (files are name after case id and .pkl ending)
-    
+
    Args:
        path (Path): path to file (if .pkl is missing, it will be added automatically)
-    
+
    Returns:
        Dict: loaded properties
    """
-    if not path.suffix == '.pkl':
-        path = Path(str(path) + '.pkl')
-    
-    with open(path, 'rb') as f:
+    if not path.suffix == ".pkl":
+        path = Path(str(path) + ".pkl")
+
+    with open(path, "rb") as f:
        properties = pickle.load(f)
    return properties


-def load_case_cropped(folder: Path, case_id: str) -> Tuple[np.ndarray, np.ndarray, dict]:
+def load_case_cropped(
+    folder: Path, case_id: str
+) -> Tuple[np.ndarray, np.ndarray, dict]:
    """
    Load single case after cropping
-    
+
    Args:
        folder (Path): path to folder where cases are located
        case_id (str): case identifier
-    
+
    Returns:
        np.ndarray: data
        np.ndarray: segmentation
        dict: additional properties
    """
-    stack = load_npz_looped(os.path.join(folder, case_id) + ".npz",
-                            keys=["data"], num_tries=3,
-                            )["data"]
+    stack = load_npz_looped(
+        os.path.join(folder, case_id) + ".npz",
+        keys=["data"],
+        num_tries=3,
+    )["data"]
    data = stack[:-1]
    seg = stack[-1]

    with open(os.path.join(folder, case_id) + ".pkl", "rb") as f:
        props = pickle.load(f)
-    assert data.shape[1:] == seg.shape, (f"Data and segmentation need to have same dim (except first). "
-                                         f"Found data {data.shape} and "
-                                         f"mask {seg.shape} for case {case_id}")
-    return data.astype(np.float32), seg.astype(np.int32), props
+    assert data.shape[1:] == seg.shape, (
+        f"Data and segmentation need to have same dim (except first). "
+        f"Found data {data.shape} and "
+        f"mask {seg.shape} for case {case_id}"
+    )
+    return data.astype(np.float32), np.rint(seg).astype(np.int32), props


 @contextmanager
-def npy_dataset(folder: str, processes: int,
-                unpack: bool = True, delete_npy: bool = True,
-                delete_npz: bool = False):
+def npy_dataset(
+    folder: str,
+    processes: int,
+    unpack: bool = True,
+    delete_npy: bool = True,
+    delete_npz: bool = False,
+):
    """
    Automatically unpacks the npz dataset and deletes npy data after completion

@@ -165,9 +186,7 @@ def npy_dataset(folder: str, processes: int,
            del_npy(Path(folder))


-def unpack_dataset(folder: Pathlike,
-                   processes: int,
-                   delete_npz: bool = False):
+def unpack_dataset(folder: Pathlike, processes: int, delete_npz: bool = False):
    """
    unpacks all npz files in a folder to npy
    (whatever you want to have unpacked must be saved under key)
@@ -181,7 +200,7 @@ def unpack_dataset(folder: Pathlike,
    logger.info("Unpacking dataset")
    npz_files = subfiles(Path(folder), identifier="*.npz", join=True)
    if not npz_files:
-        logger.warning(f'No paths found in {Path(folder)} matching *.npz')
+        logger.warning(f"No paths found in {Path(folder)} matching *.npz")
        return
    with Pool(processes) as p:
        p.starmap(npz2npy, zip(npz_files, repeat(delete_npz)))
@@ -255,7 +274,7 @@ def load_json(path: Path, **kwargs) -> Any:
    """
    if isinstance(path, str):
        path = Path(path)
-    if not(".json" == path.suffix):
+    if not (".json" == path.suffix):
        path = str(path) + ".json"

    with open(path, "r") as f:
@@ -275,7 +294,7 @@ def save_json(data: Any, path: Pathlike, indent: int = 4, **kwargs):
    """
    if isinstance(path, str):
        path = Path(path)
-    if not(".json" == path.suffix):
+    if not (".json" == path.suffix):
        path = Path(str(path) + ".json")

    with open(path, "w") as f:
@@ -333,7 +352,7 @@ def save_yaml(data: Any, path: Path, **kwargs):
    """
    if isinstance(path, str):
        path = Path(path)
-    if not(".yaml" == path.suffix):
+    if not (".yaml" == path.suffix):
        path = str(path) + ".yaml"

    with open(path, "w") as f:
@@ -351,7 +370,7 @@ def save_txt(data: str, path: Path, **kwargs):
    """
    if isinstance(path, str):
        path = Path(path)
-    if not(".txt" == path.suffix):
+    if not (".txt" == path.suffix):
        path = str(path) + ".txt"

    with open(path, "a") as f:
@@ -359,12 +378,12 @@ def save_txt(data: str, path: Path, **kwargs):


 def load_npz_looped(
-        p: Pathlike,
-        keys: Sequence[str],
-        *args,
-        num_tries: int = 3,
-        **kwargs,
-        ) -> Union[np.ndarray, dict]:
+    p: Pathlike,
+    keys: Sequence[str],
+    *args,
+    num_tries: int = 3,
+    **kwargs,
+) -> Union[np.ndarray, dict]:
    """
    Try | Except loop to load numpy files
    (especially large numpy files can fail with BadZipFile Errors)
@@ -380,7 +399,9 @@ def load_npz_looped(
        dict: loaded data
    """
    if num_tries <= 0:
-        raise ValueError(f"Num tires needs to be larger than 0, found {num_tries} tries.")
+        raise ValueError(
+            f"Num tires needs to be larger than 0, found {num_tries} tries."
+        )

    for i in range(num_tries):  # try reading the file 3 times
        try:
@@ -391,5 +412,5 @@ def load_npz_looped(
            if i == num_tries - 1:
                logger.error(f"Could not unpack {p}")
                return None
-            time.sleep(5.)
+            time.sleep(5.0)
    return data