add option mentioned in #940

70c10caa · thomwolf · 077ad693 · 70c10caa · 70c10caa
Commit 70c10caa authored Aug 05, 2019 by thomwolf
Hide whitespace changes
Inline Side-by-side

Showing with 12 additions and 0 deletions

examples/run_glue.py examples/run_glue.py +6 -0

examples/run_squad.py examples/run_squad.py +6 -0

No files found.
--- a/examples/run_glue.py
+++ b/examples/run_glue.py
@@ -247,6 +247,9 @@ def evaluate(args, model, tokenizer, prefix=""):
 def load_and_cache_examples(args, task, tokenizer, evaluate=False):
+    if args.local_rank not in [-1, 0]:
+        torch.distributed.barrier()  # Make sure only the first process in distributed training process the dataset, and the others will use the cache
    processor = processors[task]()
    output_mode = output_modes[task]
    # Load data features from cache or dataset file
@@ -273,6 +276,9 @@ def load_and_cache_examples(args, task, tokenizer, evaluate=False):
            logger.info("Saving features into cached file %s", cached_features_file)
            torch.save(features, cached_features_file)
+    if args.local_rank == 0:
+        torch.distributed.barrier()  # Make sure only the first process in distributed training process the dataset, and the others will use the cache
    # Convert to Tensors and build dataset
    all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
    all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long)

--- a/examples/run_squad.py
+++ b/examples/run_squad.py
@@ -272,6 +272,9 @@ def evaluate(args, model, tokenizer, prefix=""):
 def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=False):
+    if args.local_rank not in [-1, 0]:
+        torch.distributed.barrier()  # Make sure only the first process in distributed training process the dataset, and the others will use the cache
    # Load data features from cache or dataset file
    input_file = args.predict_file if evaluate else args.train_file
    cached_features_file = os.path.join(os.path.dirname(input_file), 'cached_{}_{}_{}'.format(
@@ -296,6 +299,9 @@ def load_and_cache_examples(args, tokenizer, evaluate=False, output_examples=Fal
            logger.info("Saving features into cached file %s", cached_features_file)
            torch.save(features, cached_features_file)
+    if args.local_rank == 0:
+        torch.distributed.barrier()  # Make sure only the first process in distributed training process the dataset, and the others will use the cache
    # Convert to Tensors and build dataset
    all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
    all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long)