Commit 0c5c3a77 authored by shizhiw's avatar shizhiw Committed by Taylor Robie
Browse files

Replace multiprocess pool with popen_helper.get_pool() in data_preprocessing. (#5512)

* Use data_dir instead of flags.FLAGS.data_dir in data_preprocessing.py.

* Use data_dir instead of flags.FLAGS.data_dir in data_preprocessing.py.

* Replace multiprocess pool with popen_helper.get_pool() in data_preprocessing.
parent b88da6ee
...@@ -333,8 +333,7 @@ def generate_train_eval_data(df, approx_num_shards, num_items, cache_paths, ...@@ -333,8 +333,7 @@ def generate_train_eval_data(df, approx_num_shards, num_items, cache_paths,
map_args = [(shards[i], i, num_items, cache_paths, process_seeds[i], map_args = [(shards[i], i, num_items, cache_paths, process_seeds[i],
match_mlperf) match_mlperf)
for i in range(approx_num_shards)] for i in range(approx_num_shards)]
with contextlib.closing( with popen_helper.get_pool(multiprocessing.cpu_count()) as pool:
multiprocessing.Pool(multiprocessing.cpu_count())) as pool:
test_shards = pool.map(_train_eval_map_fn, map_args) # pylint: disable=no-member test_shards = pool.map(_train_eval_map_fn, map_args) # pylint: disable=no-member
tf.logging.info("Merging test shards...") tf.logging.info("Merging test shards...")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment