"git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "593c0704351f35208e44dae1d85be8238209eb2a"
Commit c64c2fc4 authored by Matthew Carrigan's avatar Matthew Carrigan
Browse files

Fixed embarrassing indentation problem

parent 0540d360
...@@ -241,8 +241,7 @@ def main(): ...@@ -241,8 +241,7 @@ def main():
from apex.optimizers import FP16_Optimizer from apex.optimizers import FP16_Optimizer
from apex.optimizers import FusedAdam from apex.optimizers import FusedAdam
except ImportError: except ImportError:
raise ImportError( raise ImportError("Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.")
"Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training.")
optimizer = FusedAdam(optimizer_grouped_parameters, optimizer = FusedAdam(optimizer_grouped_parameters,
lr=args.learning_rate, lr=args.learning_rate,
...@@ -259,57 +258,57 @@ def main(): ...@@ -259,57 +258,57 @@ def main():
warmup=args.warmup_proportion, warmup=args.warmup_proportion,
t_total=num_train_optimization_steps) t_total=num_train_optimization_steps)
global_step = 0 global_step = 0
logging.info("***** Running training *****") logging.info("***** Running training *****")
logging.info(f" Num examples = {total_train_examples}") logging.info(f" Num examples = {total_train_examples}")
logging.info(" Batch size = %d", args.train_batch_size) logging.info(" Batch size = %d", args.train_batch_size)
logging.info(" Num steps = %d", num_train_optimization_steps) logging.info(" Num steps = %d", num_train_optimization_steps)
model.train() model.train()
for epoch in range(args.epochs): for epoch in range(args.epochs):
epoch_dataset = PregeneratedDataset(epoch=epoch, training_path=args.pregenerated_data, tokenizer=tokenizer, epoch_dataset = PregeneratedDataset(epoch=epoch, training_path=args.pregenerated_data, tokenizer=tokenizer,
num_data_epochs=num_data_epochs) num_data_epochs=num_data_epochs)
if args.local_rank == -1: if args.local_rank == -1:
train_sampler = RandomSampler(epoch_dataset) train_sampler = RandomSampler(epoch_dataset)
else: else:
train_sampler = DistributedSampler(epoch_dataset) train_sampler = DistributedSampler(epoch_dataset)
train_dataloader = DataLoader(epoch_dataset, sampler=train_sampler, batch_size=args.train_batch_size) train_dataloader = DataLoader(epoch_dataset, sampler=train_sampler, batch_size=args.train_batch_size)
tr_loss = 0 tr_loss = 0
nb_tr_examples, nb_tr_steps = 0, 0 nb_tr_examples, nb_tr_steps = 0, 0
with tqdm(total=len(train_dataloader), desc=f"Epoch {epoch}") as pbar: with tqdm(total=len(train_dataloader), desc=f"Epoch {epoch}") as pbar:
for step, batch in enumerate(train_dataloader): for step, batch in enumerate(train_dataloader):
batch = tuple(t.to(device) for t in batch) batch = tuple(t.to(device) for t in batch)
input_ids, input_mask, segment_ids, lm_label_ids, is_next = batch input_ids, input_mask, segment_ids, lm_label_ids, is_next = batch
loss = model(input_ids, segment_ids, input_mask, lm_label_ids, is_next) loss = model(input_ids, segment_ids, input_mask, lm_label_ids, is_next)
if n_gpu > 1: if n_gpu > 1:
loss = loss.mean() # mean() to average on multi-gpu. loss = loss.mean() # mean() to average on multi-gpu.
if args.gradient_accumulation_steps > 1: if args.gradient_accumulation_steps > 1:
loss = loss / args.gradient_accumulation_steps loss = loss / args.gradient_accumulation_steps
if args.fp16:
optimizer.backward(loss)
else:
loss.backward()
tr_loss += loss.item()
nb_tr_examples += input_ids.size(0)
nb_tr_steps += 1
pbar.update(1)
mean_loss = tr_loss / nb_tr_steps
pbar.set_postfix_str(f"Loss: {mean_loss:.5f}")
if (step + 1) % args.gradient_accumulation_steps == 0:
if args.fp16: if args.fp16:
optimizer.backward(loss) # modify learning rate with special warm up BERT uses
else: # if args.fp16 is False, BertAdam is used that handles this automatically
loss.backward() lr_this_step = args.learning_rate * warmup_linear(global_step/num_train_optimization_steps, args.warmup_proportion)
tr_loss += loss.item() for param_group in optimizer.param_groups:
nb_tr_examples += input_ids.size(0) param_group['lr'] = lr_this_step
nb_tr_steps += 1 optimizer.step()
pbar.update(1) optimizer.zero_grad()
mean_loss = tr_loss / nb_tr_steps global_step += 1
pbar.set_postfix_str(f"Loss: {mean_loss:.5f}")
if (step + 1) % args.gradient_accumulation_steps == 0: # Save a trained model
if args.fp16: logging.info("** ** * Saving fine-tuned model ** ** * ")
# modify learning rate with special warm up BERT uses model_to_save = model.module if hasattr(model, 'module') else model # Only save the model it-self
# if args.fp16 is False, BertAdam is used that handles this automatically output_model_file = args.output_dir / "pytorch_model.bin"
lr_this_step = args.learning_rate * warmup_linear(global_step/num_train_optimization_steps, args.warmup_proportion) torch.save(model_to_save.state_dict(), str(output_model_file))
for param_group in optimizer.param_groups:
param_group['lr'] = lr_this_step
optimizer.step()
optimizer.zero_grad()
global_step += 1
# Save a trained model
logging.info("** ** * Saving fine-tuned model ** ** * ")
model_to_save = model.module if hasattr(model, 'module') else model # Only save the model it-self
output_model_file = args.output_dir / "pytorch_model.bin"
torch.save(model_to_save.state_dict(), str(output_model_file))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment