move num_processed from config to argparse

3f162f5e · mibaumgartner · 20e9e513 · 3f162f5e · 3f162f5e · 3f162f5e
Commit 3f162f5e authored May 31, 2021 by mibaumgartner
8 changed files
--- a/README.md
+++ b/README.md
@@ -270,7 +270,7 @@ nnDetectionV0 requires a GPU with approximately the same amount of VRAM you are
 Future releases aim at improving this process...

 ```bash
-nndet_prep [tasks] [-o / --overwrites] [--full_check]
+nndet_prep [tasks] [-o / --overwrites] [-np / --num_processes] [-npp / --num_processes_preprocessing] [--full_check]

 # Example
 nndet_prep 000
@@ -279,7 +279,7 @@ nndet_prep 000
 # /scripts/preprocess.py - main()
 ```

-`-o` option can be used to overwrite parameters for planning and preprocessing (refer to the onfig files to see all parameters). A typical usecase is to increase or decrease `prep.num_processes` (number of processes used for cropping) and `prep.num_processes_processing` (number of processes used for resampling) depending on the size/number of modalities of the data and available RAM. The current values are fairly save if 64GB of RAM is available.
+`-o` option can be used to overwrite parameters for planning and preprocessing (refer to the config files to see all parameters). The number of processes used for cropping and analysis can be adjusted by using `-np` and the number of processes used for resampling can be set via `-npp`. The current values are fairly save if 64GB of RAM is available.
 The `--full_check` will iterate over the data before starting any preprocessing and check correct formatting of the data and labels.
 If any problems occur during preprocessing please run the full check version to make sure that the format is correct.

@@ -374,14 +374,13 @@ nndet_consolidate 000 RetinaUNetV001_D3V001_3d --sweep_boxes
 For the final test set predictions simply select the best model according to the validation scores and run the prediction command below.
 Data which is located in `raw_splitted/imagesTs` will be automatically preprocessed and predicted by running the following command:
 ```bash
-nndet_predict [task] [model] [--fold] [--num_models] [--num_tta] [--no_preprocess]
+nndet_predict [task] [model] [--fold] [--num_tta] [--no_preprocess] [--check] [-npp / --num_processes_preprocessing] [--force_args]

 # Example
 nndet_predict 000 RetinaUNetV001_D3V001_3d --fold -1

 # Script
 # /scripts/predict.py - main()
-# Note: --num_models is not supported by default
 ```

 If a self-made test set was used, evaluation can be performed by invoking `nndet_eval` as described above.

--- a/nndet/conf/prep/nothing.yaml
+++ b/nndet/conf/prep/nothing.yaml
-# parameters which are used to prepare the data and plan later training
-# define number of processes used for preprocessing
-num_processes: 6
-num_processes_processing: 3
-
 # set this to 1 if you want to override cropped data
 overwrite: False


--- a/nndet/conf/prep/plan.yaml
+++ b/nndet/conf/prep/plan.yaml
-# parameters which are used to prepare the data and plan later training
-# define number of processes used for preprocessing
-num_processes: 6
-num_processes_processing: 3
-
 # set this to 1 if you want to override cropped data
 overwrite: False


--- a/nndet/conf/prep/process.yaml
+++ b/nndet/conf/prep/process.yaml
-# parameters which are used to prepare the data and plan later training
-# define number of processes used for preprocessing
-num_processes: 6
-num_processes_processing: 3
-
 # set this to 1 if you want to override cropped data
 overwrite: False


--- a/projects/Task016_Luna/scripts/prepare.py
+++ b/projects/Task016_Luna/scripts/prepare.py
+import argparse
 import os
 import sys
 import traceback
@@ -18,7 +19,7 @@ from nndet.io.load import save_pickle, save_json, save_yaml, load_json
 from nndet.utils.check import env_guard


-def create_masks(source: Path, target: Path, df: pd.DataFrame):
+def create_masks(source: Path, target: Path, df: pd.DataFrame, num_processes: int):
    files = []
    split = {}
    for i in range(10):
@@ -50,7 +51,7 @@ def create_masks(source: Path, target: Path, df: pd.DataFrame):
        rads.append(r)

    assert len(files) == len(centers) == len(rads)
-    with Pool(processes=6) as p:
+    with Pool(processes=num_processes) as p:
        p.starmap(_create_mask, zip(files, repeat(target), centers, rads))
    # for t in zip(files, repeat(target), centers, rads):
    #     _create_mask(*t)
@@ -89,13 +90,13 @@ def create_splits(source, target):
    save_pickle(splits, target)


-def convert_data(source: Path, target: Path):
+def convert_data(source: Path, target: Path, num_processes: int):
    for subset_dir in source.glob('subset*'):
        subset_dir = Path(subset_dir)
        if not subset_dir.is_dir():
            continue

-        with Pool(processes=6) as p:
+        with Pool(processes=num_processes) as p:
            p.starmap(_convert_data, zip(subset_dir.glob('*.mhd'), repeat(target)))


@@ -110,6 +111,12 @@ def _convert_data(f, target):

 @env_guard
 def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--num_processes', type=int, default=4, required=False,
+                        help="Number of processes to use for preparation.")
+    args = parser.parse_args()
+    num_processes = args.num_processes
+
    det_data_dir = Path(os.getenv('det_data'))
    task_data_dir = det_data_dir / "Task016_Luna"
    source_data_dir = task_data_dir / "raw"
@@ -152,10 +159,10 @@ def main():

    # prepare data and labels
    csv = source_data_dir / "annotations.csv"
-    convert_data(source_data_dir, target_data_dir)
+    convert_data(source_data_dir, target_data_dir, num_processes=num_processes)

    df = pd.read_csv(csv, index_col='seriesuid')
-    create_masks(source_data_dir, target_label_dir, df)
+    create_masks(source_data_dir, target_label_dir, df, num_processes=num_processes)

    # generate split
    logger.info("Generating luna splits... ")

--- a/scripts/convert_seg2det.py
+++ b/scripts/convert_seg2det.py
@@ -147,13 +147,15 @@ if __name__ == '__main__':
                        help="Create a ranking of instances based on their volume",
                        action='store_true',
                        )
-
+    parser.add_argument('--num_processes', type=int, default=4, required=False,
+                        help="Number of processes to use for conversion.")

    args = parser.parse_args()
    tasks = args.tasks
    ov = args.overwrites
    overwrite = args.overwrite
    do_volume_ranking = args.volume_ranking
+    num_processes = args.num_processes
    initialize_config_module(config_module="nndet.conf")

    for task in tasks:
@@ -183,7 +185,7 @@ if __name__ == '__main__':
            #                             min_vol=cfg["data"].get("min_vol", 0),
            #                             )

-            with Pool(processes=6) as p:
+            with Pool(processes=num_processes) as p:
                p.starmap(prepare_detection_label, zip(
                    case_ids,
                    repeat(label_dir),

--- a/scripts/predict.py
+++ b/scripts/predict.py
@@ -39,6 +39,7 @@ def run(cfg: dict,
        num_models: int = None,
        num_tta_transforms: int = None,
        test_split: bool = False,
+        num_processes: int = 3,
        ):
    """
    Run inference pipeline
@@ -71,7 +72,7 @@ def run(cfg: dict,
            preprocessed_output_dir=preprocessed_output_dir,
            splitted_4d_output_dir=cfg["host"]["splitted_4d_output_dir"],
            plan=plan,
-            num_processes=cfg["prep"]["num_processes_processing"],
+            num_processes=num_processes,
        )

    prediction_dir.mkdir(parents=True, exist_ok=True)
@@ -169,7 +170,11 @@ def main():
    parser.add_argument('--check',
                    help="Run check of the test data before predicting",
                    action='store_true',
-                    )
+                    )   
+    parser.add_argument('-npp', '--num_processes_preprocessing',
+                        type=int, default=3, required=False,
+                        help="Number of processes to use for resampling.",
+                        )

    args = parser.parse_args()
    model = args.model
@@ -181,6 +186,7 @@ def main():
    force_args = args.force_args
    test_split = args.test_split
    check = args.check
+    num_processes = args.num_processes_preprocessing

    task_name = get_task(task, name=True)
    task_model_dir = Path(os.getenv("det_models"))
@@ -219,6 +225,7 @@ def main():
        num_models=num_models,
        num_tta_transforms=num_tta_transforms,
        test_split=test_split,
+        num_processes=num_processes,
        )



--- a/scripts/preprocess.py
+++ b/scripts/preprocess.py
@@ -364,7 +364,10 @@ def check_case(case_npz: Path,
    return case_id, True


-def run(cfg, instances_from_seg):
+def run(cfg,
+        num_processes: int,
+        num_processes_preprocessing: int,
+        ):
    """
    Python interface for script

@@ -375,7 +378,6 @@ def run(cfg, instances_from_seg):
    logger.remove()
    logger.add(sys.stdout, level="INFO")
    logger.add(Path(cfg["host"]["data_dir"]) / "logging.log", level="DEBUG")
-    logger.info(f"Running instances_from_seg: {instances_from_seg}")
    data_info = cfg["data"]

    if cfg["prep"]["crop"]:
@@ -384,7 +386,7 @@ def run(cfg, instances_from_seg):
                                 splitted_4d_output_dir=Path(cfg["host"]["splitted_4d_output_dir"]),
                                 data_info=data_info,
                                 overwrite=cfg["prep"]["overwrite"],
-                                 num_processes=cfg["prep"]["num_processes"],
+                                 num_processes=num_processes,
                                 )

    if cfg["prep"]["analyze"]:
@@ -392,7 +394,7 @@ def run(cfg, instances_from_seg):
        run_dataset_analysis(cropped_output_dir=Path(cfg["host"]["cropped_output_dir"]),
                             preprocessed_output_dir=Path(cfg["host"]["preprocessed_output_dir"]),
                             data_info=data_info,
-                             num_processes=cfg["prep"]["num_processes"],
+                             num_processes=num_processes,
                             intensity_properties=True,
                             overwrite=cfg["prep"]["overwrite"],
                             )
@@ -407,7 +409,7 @@ def run(cfg, instances_from_seg):
            dim=data_info["dim"],
            model_name=cfg["module"],
            model_cfg=cfg["model_cfg"],
-            num_processes=cfg["prep"]["num_processes_processing"],
+            num_processes=num_processes_preprocessing,
            run_preprocessing=cfg["prep"]["process"],
        )

@@ -429,11 +431,21 @@ def main():
                        help="Skip basic check.",
                        action='store_true',
                        )
+    parser.add_argument('-np', '--num_processes',
+                        type=int, default=4, required=False,
+                        help="Number of processes to use for croppping.",
+                        )
+    parser.add_argument('-npp', '--num_processes_preprocessing',
+                        type=int, default=3, required=False,
+                        help="Number of processes to use for resampling.",
+                        )
    args = parser.parse_args()
    tasks = args.tasks
    ov = args.overwrites
    full_check = args.full_check
    no_check = args.no_check
+    num_processes = args.num_processes
+    num_processes_preprocessing = args.num_processes_preprocessing

    initialize_config_module(config_module="nndet.conf")
    # perform preprocessing checks first
@@ -460,8 +472,10 @@ def main():
    for task in tasks:
        _ov = copy.deepcopy(ov) if ov is not None else []
        cfg = compose(task, "config.yaml", overrides=_ov)
-        instances_from_seg = cfg.data.get("instances_from_seg", False)
-        run(OmegaConf.to_container(cfg, resolve=True), instances_from_seg)
+        run(OmegaConf.to_container(cfg, resolve=True),
+            num_processes=num_processes,
+            num_processes_preprocessing=num_processes_preprocessing,
+            )


 if __name__ == '__main__':