Commit 3f162f5e authored by mibaumgartner's avatar mibaumgartner
Browse files

move num_processed from config to argparse

parent 20e9e513
......@@ -270,7 +270,7 @@ nnDetectionV0 requires a GPU with approximately the same amount of VRAM you are
Future releases aim at improving this process...
```bash
nndet_prep [tasks] [-o / --overwrites] [--full_check]
nndet_prep [tasks] [-o / --overwrites] [-np / --num_processes] [-npp / --num_processes_preprocessing] [--full_check]
# Example
nndet_prep 000
......@@ -279,7 +279,7 @@ nndet_prep 000
# /scripts/preprocess.py - main()
```
`-o` option can be used to overwrite parameters for planning and preprocessing (refer to the onfig files to see all parameters). A typical usecase is to increase or decrease `prep.num_processes` (number of processes used for cropping) and `prep.num_processes_processing` (number of processes used for resampling) depending on the size/number of modalities of the data and available RAM. The current values are fairly save if 64GB of RAM is available.
`-o` option can be used to overwrite parameters for planning and preprocessing (refer to the config files to see all parameters). The number of processes used for cropping and analysis can be adjusted by using `-np` and the number of processes used for resampling can be set via `-npp`. The current values are fairly save if 64GB of RAM is available.
The `--full_check` will iterate over the data before starting any preprocessing and check correct formatting of the data and labels.
If any problems occur during preprocessing please run the full check version to make sure that the format is correct.
......@@ -374,14 +374,13 @@ nndet_consolidate 000 RetinaUNetV001_D3V001_3d --sweep_boxes
For the final test set predictions simply select the best model according to the validation scores and run the prediction command below.
Data which is located in `raw_splitted/imagesTs` will be automatically preprocessed and predicted by running the following command:
```bash
nndet_predict [task] [model] [--fold] [--num_models] [--num_tta] [--no_preprocess]
nndet_predict [task] [model] [--fold] [--num_tta] [--no_preprocess] [--check] [-npp / --num_processes_preprocessing] [--force_args]
# Example
nndet_predict 000 RetinaUNetV001_D3V001_3d --fold -1
# Script
# /scripts/predict.py - main()
# Note: --num_models is not supported by default
```
If a self-made test set was used, evaluation can be performed by invoking `nndet_eval` as described above.
......
# parameters which are used to prepare the data and plan later training
# define number of processes used for preprocessing
num_processes: 6
num_processes_processing: 3
# set this to 1 if you want to override cropped data
overwrite: False
......
# parameters which are used to prepare the data and plan later training
# define number of processes used for preprocessing
num_processes: 6
num_processes_processing: 3
# set this to 1 if you want to override cropped data
overwrite: False
......
# parameters which are used to prepare the data and plan later training
# define number of processes used for preprocessing
num_processes: 6
num_processes_processing: 3
# set this to 1 if you want to override cropped data
overwrite: False
......
import argparse
import os
import sys
import traceback
......@@ -18,7 +19,7 @@ from nndet.io.load import save_pickle, save_json, save_yaml, load_json
from nndet.utils.check import env_guard
def create_masks(source: Path, target: Path, df: pd.DataFrame):
def create_masks(source: Path, target: Path, df: pd.DataFrame, num_processes: int):
files = []
split = {}
for i in range(10):
......@@ -50,7 +51,7 @@ def create_masks(source: Path, target: Path, df: pd.DataFrame):
rads.append(r)
assert len(files) == len(centers) == len(rads)
with Pool(processes=6) as p:
with Pool(processes=num_processes) as p:
p.starmap(_create_mask, zip(files, repeat(target), centers, rads))
# for t in zip(files, repeat(target), centers, rads):
# _create_mask(*t)
......@@ -89,13 +90,13 @@ def create_splits(source, target):
save_pickle(splits, target)
def convert_data(source: Path, target: Path):
def convert_data(source: Path, target: Path, num_processes: int):
for subset_dir in source.glob('subset*'):
subset_dir = Path(subset_dir)
if not subset_dir.is_dir():
continue
with Pool(processes=6) as p:
with Pool(processes=num_processes) as p:
p.starmap(_convert_data, zip(subset_dir.glob('*.mhd'), repeat(target)))
......@@ -110,6 +111,12 @@ def _convert_data(f, target):
@env_guard
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--num_processes', type=int, default=4, required=False,
help="Number of processes to use for preparation.")
args = parser.parse_args()
num_processes = args.num_processes
det_data_dir = Path(os.getenv('det_data'))
task_data_dir = det_data_dir / "Task016_Luna"
source_data_dir = task_data_dir / "raw"
......@@ -152,10 +159,10 @@ def main():
# prepare data and labels
csv = source_data_dir / "annotations.csv"
convert_data(source_data_dir, target_data_dir)
convert_data(source_data_dir, target_data_dir, num_processes=num_processes)
df = pd.read_csv(csv, index_col='seriesuid')
create_masks(source_data_dir, target_label_dir, df)
create_masks(source_data_dir, target_label_dir, df, num_processes=num_processes)
# generate split
logger.info("Generating luna splits... ")
......
......@@ -147,13 +147,15 @@ if __name__ == '__main__':
help="Create a ranking of instances based on their volume",
action='store_true',
)
parser.add_argument('--num_processes', type=int, default=4, required=False,
help="Number of processes to use for conversion.")
args = parser.parse_args()
tasks = args.tasks
ov = args.overwrites
overwrite = args.overwrite
do_volume_ranking = args.volume_ranking
num_processes = args.num_processes
initialize_config_module(config_module="nndet.conf")
for task in tasks:
......@@ -183,7 +185,7 @@ if __name__ == '__main__':
# min_vol=cfg["data"].get("min_vol", 0),
# )
with Pool(processes=6) as p:
with Pool(processes=num_processes) as p:
p.starmap(prepare_detection_label, zip(
case_ids,
repeat(label_dir),
......
......@@ -39,6 +39,7 @@ def run(cfg: dict,
num_models: int = None,
num_tta_transforms: int = None,
test_split: bool = False,
num_processes: int = 3,
):
"""
Run inference pipeline
......@@ -71,7 +72,7 @@ def run(cfg: dict,
preprocessed_output_dir=preprocessed_output_dir,
splitted_4d_output_dir=cfg["host"]["splitted_4d_output_dir"],
plan=plan,
num_processes=cfg["prep"]["num_processes_processing"],
num_processes=num_processes,
)
prediction_dir.mkdir(parents=True, exist_ok=True)
......@@ -169,7 +170,11 @@ def main():
parser.add_argument('--check',
help="Run check of the test data before predicting",
action='store_true',
)
)
parser.add_argument('-npp', '--num_processes_preprocessing',
type=int, default=3, required=False,
help="Number of processes to use for resampling.",
)
args = parser.parse_args()
model = args.model
......@@ -181,6 +186,7 @@ def main():
force_args = args.force_args
test_split = args.test_split
check = args.check
num_processes = args.num_processes_preprocessing
task_name = get_task(task, name=True)
task_model_dir = Path(os.getenv("det_models"))
......@@ -219,6 +225,7 @@ def main():
num_models=num_models,
num_tta_transforms=num_tta_transforms,
test_split=test_split,
num_processes=num_processes,
)
......
......@@ -364,7 +364,10 @@ def check_case(case_npz: Path,
return case_id, True
def run(cfg, instances_from_seg):
def run(cfg,
num_processes: int,
num_processes_preprocessing: int,
):
"""
Python interface for script
......@@ -375,7 +378,6 @@ def run(cfg, instances_from_seg):
logger.remove()
logger.add(sys.stdout, level="INFO")
logger.add(Path(cfg["host"]["data_dir"]) / "logging.log", level="DEBUG")
logger.info(f"Running instances_from_seg: {instances_from_seg}")
data_info = cfg["data"]
if cfg["prep"]["crop"]:
......@@ -384,7 +386,7 @@ def run(cfg, instances_from_seg):
splitted_4d_output_dir=Path(cfg["host"]["splitted_4d_output_dir"]),
data_info=data_info,
overwrite=cfg["prep"]["overwrite"],
num_processes=cfg["prep"]["num_processes"],
num_processes=num_processes,
)
if cfg["prep"]["analyze"]:
......@@ -392,7 +394,7 @@ def run(cfg, instances_from_seg):
run_dataset_analysis(cropped_output_dir=Path(cfg["host"]["cropped_output_dir"]),
preprocessed_output_dir=Path(cfg["host"]["preprocessed_output_dir"]),
data_info=data_info,
num_processes=cfg["prep"]["num_processes"],
num_processes=num_processes,
intensity_properties=True,
overwrite=cfg["prep"]["overwrite"],
)
......@@ -407,7 +409,7 @@ def run(cfg, instances_from_seg):
dim=data_info["dim"],
model_name=cfg["module"],
model_cfg=cfg["model_cfg"],
num_processes=cfg["prep"]["num_processes_processing"],
num_processes=num_processes_preprocessing,
run_preprocessing=cfg["prep"]["process"],
)
......@@ -429,11 +431,21 @@ def main():
help="Skip basic check.",
action='store_true',
)
parser.add_argument('-np', '--num_processes',
type=int, default=4, required=False,
help="Number of processes to use for croppping.",
)
parser.add_argument('-npp', '--num_processes_preprocessing',
type=int, default=3, required=False,
help="Number of processes to use for resampling.",
)
args = parser.parse_args()
tasks = args.tasks
ov = args.overwrites
full_check = args.full_check
no_check = args.no_check
num_processes = args.num_processes
num_processes_preprocessing = args.num_processes_preprocessing
initialize_config_module(config_module="nndet.conf")
# perform preprocessing checks first
......@@ -460,8 +472,10 @@ def main():
for task in tasks:
_ov = copy.deepcopy(ov) if ov is not None else []
cfg = compose(task, "config.yaml", overrides=_ov)
instances_from_seg = cfg.data.get("instances_from_seg", False)
run(OmegaConf.to_container(cfg, resolve=True), instances_from_seg)
run(OmegaConf.to_container(cfg, resolve=True),
num_processes=num_processes,
num_processes_preprocessing=num_processes_preprocessing,
)
if __name__ == '__main__':
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment