Commit 3c24e4be authored by VictorSanh's avatar VictorSanh
Browse files

Multi-Gpu loss - Cleaning

parent 5de1517d
...@@ -27,6 +27,7 @@ import math ...@@ -27,6 +27,7 @@ import math
import os import os
from tqdm import tqdm, trange from tqdm import tqdm, trange
import random import random
import numpy as np
import torch import torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
...@@ -718,23 +719,6 @@ def main(): ...@@ -718,23 +719,6 @@ def main():
parser.add_argument("--max_answer_length", default=30, type=int, parser.add_argument("--max_answer_length", default=30, type=int,
help="The maximum length of an answer that can be generated. This is needed because the start " help="The maximum length of an answer that can be generated. This is needed because the start "
"and end predictions are not conditioned on one another.") "and end predictions are not conditioned on one another.")
### BEGIN - TO DELETE EVENTUALLY --> NO SENSE IN PYTORCH ###
# parser.add_argument("--use_tpu", default=False, action='store_true', help="Whether to use TPU or GPU/CPU.")
# parser.add_argument("--tpu_name", default=None, type=str,
# help="The Cloud TPU to use for training. This should be either the name used when creating the "
# "Cloud TPU, or a grpc://ip.address.of.tpu:8470 url.")
# parser.add_argument("--tpu_zone", default=None, type=str,
# help="[Optional] GCE zone where the Cloud TPU is located in. If not specified, we will attempt "
# "to automatically detect the GCE project from metadata.")
# parser.add_argument("--gcp_project", default=None, type=str,
# help="[Optional] Project name for the Cloud TPU-enabled project. If not specified, we will attempt "
# "to automatically detect the GCE project from metadata.")
# parser.add_argument("--master", default=None, type=str, help="[Optional] TensorFlow master URL.")
# parser.add_argument("--num_tpu_cores", default=8, type=int, help="Only used if `use_tpu` is True. "
# "Total number of TPU cores to use.")
### END - TO DELETE EVENTUALLY --> NO SENSE IN PYTORCH ###
parser.add_argument("--verbose_logging", default=False, action='store_true', parser.add_argument("--verbose_logging", default=False, action='store_true',
help="If true, all of the warnings related to data processing will be printed. " help="If true, all of the warnings related to data processing will be printed. "
"A number of warnings are expected for a normal SQuAD evaluation.") "A number of warnings are expected for a normal SQuAD evaluation.")
...@@ -836,16 +820,12 @@ def main(): ...@@ -836,16 +820,12 @@ def main():
logger.info(" Batch size = %d", args.train_batch_size) logger.info(" Batch size = %d", args.train_batch_size)
logger.info(" Num steps = %d", num_train_steps) logger.info(" Num steps = %d", num_train_steps)
logger.info("HHHHH Loading data")
all_input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.long) all_input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.long)
all_input_mask = torch.tensor([f.input_mask for f in train_features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in train_features], dtype=torch.long)
all_segment_ids = torch.tensor([f.segment_ids for f in train_features], dtype=torch.long) all_segment_ids = torch.tensor([f.segment_ids for f in train_features], dtype=torch.long)
#all_label_ids = torch.tensor([f.label_id for f in train_features], dtype=torch.long)
all_start_positions = torch.tensor([f.start_position for f in train_features], dtype=torch.long) all_start_positions = torch.tensor([f.start_position for f in train_features], dtype=torch.long)
all_end_positions = torch.tensor([f.end_position for f in train_features], dtype=torch.long) all_end_positions = torch.tensor([f.end_position for f in train_features], dtype=torch.long)
logger.info("HHHHH Creating dataset")
#train_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids)
train_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_start_positions, all_end_positions) train_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_start_positions, all_end_positions)
if args.local_rank == -1: if args.local_rank == -1:
train_sampler = RandomSampler(train_data) train_sampler = RandomSampler(train_data)
...@@ -869,15 +849,11 @@ def main(): ...@@ -869,15 +849,11 @@ def main():
start_positions = start_positions.view(-1, 1) start_positions = start_positions.view(-1, 1)
end_positions = end_positions.view(-1, 1) end_positions = end_positions.view(-1, 1)
logger.info("HHHHH Forward")
loss, _ = model(input_ids, segment_ids, input_mask, start_positions, end_positions) loss, _ = model(input_ids, segment_ids, input_mask, start_positions, end_positions)
model.zero_grad() model.zero_grad()
logger.info("HHHHH Backward") loss.mean().backward()
loss.backward()
logger.info("HHHHH Loading data")
optimizer.step() optimizer.step()
global_step += 1 global_step += 1
logger.info("Done %s steps", global_step)
if args.do_predict: if args.do_predict:
eval_examples = read_squad_examples( eval_examples = read_squad_examples(
...@@ -898,10 +874,8 @@ def main(): ...@@ -898,10 +874,8 @@ def main():
all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long) all_input_ids = torch.tensor([f.input_ids for f in eval_features], dtype=torch.long)
all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in eval_features], dtype=torch.long)
all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long) all_segment_ids = torch.tensor([f.segment_ids for f in eval_features], dtype=torch.long)
#all_label_ids = torch.tensor([f.label_id for f in eval_features], dtype=torch.long)
all_example_index = torch.arange(all_input_ids.size(0), dtype=torch.long) all_example_index = torch.arange(all_input_ids.size(0), dtype=torch.long)
#eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids, all_example_index)
eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_example_index) eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_example_index)
if args.local_rank == -1: if args.local_rank == -1:
eval_sampler = SequentialSampler(eval_data) eval_sampler = SequentialSampler(eval_data)
...@@ -912,7 +886,6 @@ def main(): ...@@ -912,7 +886,6 @@ def main():
model.eval() model.eval()
all_results = [] all_results = []
logger.info("Start evaulating") logger.info("Start evaulating")
#for input_ids, input_mask, segment_ids, label_ids, example_index in eval_dataloader:
for input_ids, input_mask, segment_ids, example_index in eval_dataloader: for input_ids, input_mask, segment_ids, example_index in eval_dataloader:
if len(all_results) % 1000 == 0: if len(all_results) % 1000 == 0:
logger.info("Processing example: %d" % (len(all_results))) logger.info("Processing example: %d" % (len(all_results)))
...@@ -924,9 +897,7 @@ def main(): ...@@ -924,9 +897,7 @@ def main():
start_logits, end_logits = model(input_ids, segment_ids, input_mask) start_logits, end_logits = model(input_ids, segment_ids, input_mask)
unique_id = [int(eval_features[e.item()].unique_id) for e in example_index] unique_id = [int(eval_features[e.item()].unique_id) for e in example_index]
#start_logits = [x.item() for x in start_logits]
start_logits = [x.view(-1).detach().cpu().numpy() for x in start_logits] start_logits = [x.view(-1).detach().cpu().numpy() for x in start_logits]
#end_logits = [x.item() for x in end_logits]
end_logits = [x.view(-1).detach().cpu().numpy() for x in end_logits] end_logits = [x.view(-1).detach().cpu().numpy() for x in end_logits]
for idx, i in enumerate(unique_id): for idx, i in enumerate(unique_id):
s = [float(x) for x in start_logits[idx]] s = [float(x) for x in start_logits[idx]]
...@@ -938,11 +909,6 @@ def main(): ...@@ -938,11 +909,6 @@ def main():
end_logits=e end_logits=e
) )
) )
# all_results.append(
# RawResult(
# unique_id=unique_id,
# start_logits=start_logits,
# end_logits=end_logits))
output_prediction_file = os.path.join(args.output_dir, "predictions.json") output_prediction_file = os.path.join(args.output_dir, "predictions.json")
output_nbest_file = os.path.join(args.output_dir, "nbest_predictions.json") output_nbest_file = os.path.join(args.output_dir, "nbest_predictions.json")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment