predict_utils.py 12.6 KB
Newer Older
Sugon_ldc's avatar
Sugon_ldc committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
# Implementation of this model is borrowed and modified
# (from torch to paddle) from here:
# https://github.com/MIC-DKFZ/nnUNet

# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import sys
import ast
import shutil
import json
import pickle
import numpy as np
from copy import deepcopy
from typing import Tuple, Union

parent_path = os.path.abspath(os.path.join(__file__, *(['..'] * 2)))
sys.path.insert(0, parent_path)

import SimpleITK as sitk
from multiprocessing import Process, Queue, Pool

from nnunet.utils import load_remove_save
from nnunet.transforms import resize_segmentation
from .utils import save_segmentation_nifti_from_softmax
from tools.preprocess_utils import get_do_separate_z, get_lowres_axis, resample_data_or_seg


def load_postprocessing(json_path):
    with open(json_path, 'rb') as f:
        a = json.load(f)
    if 'min_valid_object_sizes' in a.keys():
        min_valid_object_sizes = ast.literal_eval(a['min_valid_object_sizes'])
    else:
        min_valid_object_sizes = None
    return a['for_which_classes'], min_valid_object_sizes


def to_one_hot(seg, all_seg_labels=None):
    if all_seg_labels is None:
        all_seg_labels = np.unique(seg)
    result = np.zeros((len(all_seg_labels), *seg.shape), dtype=seg.dtype)
    for i, l in enumerate(all_seg_labels):
        result[i][seg == l] = 1
    return result


def preprocess_save_to_queue(preprocess_fn, q, list_of_lists, output_files,
                             segs_from_prev_stage, classes, transpose_forward):
    errors_in = []
    for i, l in enumerate(list_of_lists):
        try:
            output_file = output_files[i]
            print("preprocessing", output_file)
            d, _, dct = preprocess_fn(l)
            # print(output_file, dct)
            if segs_from_prev_stage[i] is not None:
                assert os.path.isfile(segs_from_prev_stage[i]) and segs_from_prev_stage[i].endswith(
                    ".nii.gz"), "segs_from_prev_stage" \
                                " must point to a " \
                                "segmentation file"
                seg_prev = sitk.GetArrayFromImage(
                    sitk.ReadImage(segs_from_prev_stage[i]))
                # check to see if shapes match
                img = sitk.GetArrayFromImage(sitk.ReadImage(l[0]))
                assert all([i == j for i, j in zip(seg_prev.shape, img.shape)]), "image and segmentation from previous " \
                                                                                 "stage don't have the same pixel array " \
                                                                                 "shape! image: %s, seg_prev: %s" % \
                                                                                 (l[0], segs_from_prev_stage[i])
                seg_prev = seg_prev.transpose(transpose_forward)
                seg_reshaped = resize_segmentation(
                    seg_prev, d.shape[1:], order=1)
                seg_reshaped = to_one_hot(seg_reshaped, classes)
                d = np.vstack((d, seg_reshaped)).astype(np.float32)
            """There is a problem with python process communication that prevents us from communicating objects 
            larger than 2 GB between processes (basically when the length of the pickle string that will be sent is 
            communicated by the multiprocessing.Pipe object then the placeholder (I think) does not allow for long 
            enough strings (lol). This could be fixed by changing i to l (for long) but that would require manually 
            patching system python code. We circumvent that problem here by saving softmax_pred to a npy file that will 
            then be read (and finally deleted) by the Process. save_segmentation_nifti_from_softmax can take either 
            filename or np.ndarray and will handle this automatically"""
            print(d.shape)
            if np.prod(d.shape) > (
                    2e9 / 4 * 0.85
            ):  # *0.85 just to be save, 4 because float32 is 4 bytes
                print(
                    "This output is too large for python process-process communication. "
                    "Saving output temporarily to disk")
                np.save(output_file[:-7] + ".npy", d)
                d = output_file[:-7] + ".npy"
            q.put((output_file, (d, dct)))
        except KeyboardInterrupt:
            raise KeyboardInterrupt
        except Exception as e:
            print("error in", l)
            print(e)
    q.put("end")
    if len(errors_in) > 0:
        print("There were some errors in the following cases:", errors_in)
        print("These cases were ignored.")
    else:
        print("This worker has ended successfully, no errors to report")


def preprocess_multithreaded(predictor,
                             list_of_lists,
                             output_files,
                             num_processes=2,
                             segs_from_prev_stage=None):
    if segs_from_prev_stage is None:
        segs_from_prev_stage = [None] * len(list_of_lists)

    num_processes = min(len(list_of_lists), num_processes)
    classes = list(range(1, predictor.num_classes))
    q = Queue(1)
    processes = []
    for i in range(num_processes):
        pr = Process(
            target=preprocess_save_to_queue,
            args=(predictor.preprocess_patient, q,
                  list_of_lists[i::num_processes],
                  output_files[i::num_processes],
                  segs_from_prev_stage[i::num_processes], classes,
                  predictor.plans['transpose_forward']))
        pr.start()
        processes.append(pr)

    try:
        end_ctr = 0
        while end_ctr != num_processes:
            item = q.get()
            if item == "end":
                end_ctr += 1
                continue
            else:
                yield item

    finally:
        for p in processes:
            if p.is_alive():
                p.terminate()
            p.join()
        q.close()


def predict_cases(predictor,
                  list_of_lists,
                  output_filenames,
                  save_npz,
                  num_threads_preprocessing,
                  num_threads_nifti_save,
                  segs_from_prev_stage=None,
                  do_tta=True,
                  mixed_precision=True,
                  overwrite_existing=False,
                  all_in_gpu=False,
                  step_size=0.5,
                  disable_postprocessing=False,
                  segmentation_export_kwargs: dict=None,
                  postprocessing_json_path=None):
    assert len(list_of_lists) == len(
        output_filenames
    ), "The number of input files and output files is not same. Please check your input folder."
    if segs_from_prev_stage is not None:
        assert len(segs_from_prev_stage) == len(
            output_filenames
        ), "The cascade lowres predict results is not the same with the number of output files."

    pool = Pool(num_threads_nifti_save)
    results = []

    cleaned_output_files = []
    for o in output_filenames:
        dr, f = os.path.split(o)
        if len(dr) > 0:
            os.makedirs(dr, exist_ok=True)
        if not f.endswith(".nii.gz"):
            f, _ = os.path.splitext(f)
            f = f + ".nii.gz"
        cleaned_output_files.append(os.path.join(dr, f))

    if not overwrite_existing:
        print("number of cases:", len(list_of_lists))
        not_done_idx = [
            i for i, j in enumerate(cleaned_output_files)
            if (not os.path.isfile(j)
                ) or (save_npz and not os.path.isfile(j[:-7] + '.npz'))
        ]

        cleaned_output_files = [cleaned_output_files[i] for i in not_done_idx]
        list_of_lists = [list_of_lists[i] for i in not_done_idx]
        if segs_from_prev_stage is not None:
            segs_from_prev_stage = [
                segs_from_prev_stage[i] for i in not_done_idx
            ]

        print("number of cases that still need to be predicted:",
              len(cleaned_output_files))

    if segmentation_export_kwargs is None:
        if 'segmentation_export_params' in predictor.plans.keys():
            force_separate_z = predictor.plans['segmentation_export_params'][
                'force_separate_z']
            interpolation_order = predictor.plans['segmentation_export_params'][
                'interpolation_order']
            interpolation_order_z = predictor.plans[
                'segmentation_export_params']['interpolation_order_z']
        else:
            force_separate_z = None
            interpolation_order = 1
            interpolation_order_z = 0
    else:
        force_separate_z = segmentation_export_kwargs['force_separate_z']
        interpolation_order = segmentation_export_kwargs['interpolation_order']
        interpolation_order_z = segmentation_export_kwargs[
            'interpolation_order_z']

    print("starting preprocessing generator")
    print("file list: ", list_of_lists)
    print("preprocess output files: ", cleaned_output_files)
    preprocessing = preprocess_multithreaded(
        predictor, list_of_lists, cleaned_output_files,
        num_threads_preprocessing, segs_from_prev_stage)
    print("starting prediction...")
    all_output_files = []
    for preprocessed in preprocessing:
        output_filename, (d, dct) = preprocessed
        all_output_files.append(all_output_files)
        if isinstance(d, str):
            data = np.load(d)
            os.remove(d)
            d = data

        print("predicting", output_filename)
        softmax = predictor.multi_folds_predict_preprocessed_data_return_seg_and_softmax(
            data=d,
            do_mirroring=do_tta,
            mirror_axes=predictor.data_aug_params['mirror_axes'],
            use_sliding_window=True,
            step_size=step_size,
            use_gaussian=True,
            mixed_precision=mixed_precision, )

        transpose_forward = predictor.plans.get('transpose_forward')
        if transpose_forward is not None:
            transpose_backward = predictor.plans.get('transpose_backward')
            softmax = softmax.transpose([0] +
                                        [i + 1 for i in transpose_backward])

        if save_npz:
            npz_file = output_filename[:-7] + ".npz"
        else:
            npz_file = None

        if hasattr(predictor, 'regions_class_order'):
            region_class_order = predictor.regions_class_order
        else:
            region_class_order = None

        bytes_per_voxel = 4
        if all_in_gpu:
            bytes_per_voxel = 2
        if np.prod(softmax.shape) > (2e9 / bytes_per_voxel * 0.85):
            print(
                "This output is too large for python process-process communication. Saving output temporarily to disk"
            )
            np.save(output_filename[:-7] + ".npy", softmax)
            softmax = output_filename[:-7] + ".npy"

        results.append(
            pool.starmap_async(save_segmentation_nifti_from_softmax, (
                (softmax, output_filename, dct, interpolation_order,
                 region_class_order, None, None, npz_file, None,
                 force_separate_z, interpolation_order_z), )))

    print(
        "inference done. Now waiting for the segmentation export to finish...")
    _ = [i.get() for i in results]

    if not disable_postprocessing:
        results = []
        pp_file = postprocessing_json_path
        if os.path.isfile(pp_file):
            print("postprocessing...")
            shutil.copy(pp_file,
                        os.path.abspath(os.path.dirname(output_filenames[0])))
            for_which_classes, min_valid_obj_size = load_postprocessing(pp_file)
            results.append(
                pool.starmap_async(
                    load_remove_save,
                    zip(output_filenames, output_filenames,
                        [for_which_classes] * len(output_filenames),
                        [min_valid_obj_size] * len(output_filenames))))
            _ = [i.get() for i in results]
        else:
            print(
                "WARNING! Cannot run postprocessing because the postprocessing file is missing. Make sure to run "
                "consolidate_folds in the output folder of the model first!")

    pool.close()
    pool.join()