"...git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "4f08887053cae174d8a249ea410eb3655d5bc2eb"
Unverified Commit 0bf1e1ac authored by Kian Sierra McGettigan's avatar Kian Sierra McGettigan Committed by GitHub
Browse files

Update no trainer examples for QA and Semantic Segmentation (#18474)

* swag_no_trainer updated for with gather_metrics

* Removed unused variable samples_seen

* updated examples with gather_for_metrics
parent d2704c41
...@@ -698,7 +698,7 @@ def main(): ...@@ -698,7 +698,7 @@ def main():
step = 0 step = 0
# create a numpy array and fill it with -100. # create a numpy array and fill it with -100.
logits_concat = np.full((len(dataset), max_len), -100, dtype=np.float32) logits_concat = np.full((len(dataset), max_len), -100, dtype=np.float32)
# Now since we have create an array now we will populate it with the outputs gathered using accelerator.gather # Now since we have create an array now we will populate it with the outputs gathered using accelerator.gather_for_metrics
for i, output_logit in enumerate(start_or_end_logits): # populate columns for i, output_logit in enumerate(start_or_end_logits): # populate columns
# We have to fill it such that we have to take the whole tensor and replace it on the newly created array # We have to fill it such that we have to take the whole tensor and replace it on the newly created array
# And after every iteration we have to change the step # And after every iteration we have to change the step
...@@ -876,11 +876,11 @@ def main(): ...@@ -876,11 +876,11 @@ def main():
end_top_index = accelerator.pad_across_processes(end_top_index, dim=1, pad_index=-100) end_top_index = accelerator.pad_across_processes(end_top_index, dim=1, pad_index=-100)
cls_logits = accelerator.pad_across_processes(cls_logits, dim=1, pad_index=-100) cls_logits = accelerator.pad_across_processes(cls_logits, dim=1, pad_index=-100)
all_start_top_log_probs.append(accelerator.gather(start_top_log_probs).cpu().numpy()) all_start_top_log_probs.append(accelerator.gather_for_metrics(start_top_log_probs).cpu().numpy())
all_start_top_index.append(accelerator.gather(start_top_index).cpu().numpy()) all_start_top_index.append(accelerator.gather_for_metrics(start_top_index).cpu().numpy())
all_end_top_log_probs.append(accelerator.gather(end_top_log_probs).cpu().numpy()) all_end_top_log_probs.append(accelerator.gather_for_metrics(end_top_log_probs).cpu().numpy())
all_end_top_index.append(accelerator.gather(end_top_index).cpu().numpy()) all_end_top_index.append(accelerator.gather_for_metrics(end_top_index).cpu().numpy())
all_cls_logits.append(accelerator.gather(cls_logits).cpu().numpy()) all_cls_logits.append(accelerator.gather_for_metrics(cls_logits).cpu().numpy())
max_len = max([x.shape[1] for x in all_end_top_log_probs]) # Get the max_length of the tensor max_len = max([x.shape[1] for x in all_end_top_log_probs]) # Get the max_length of the tensor
...@@ -936,11 +936,11 @@ def main(): ...@@ -936,11 +936,11 @@ def main():
end_top_index = accelerator.pad_across_processes(end_top_index, dim=1, pad_index=-100) end_top_index = accelerator.pad_across_processes(end_top_index, dim=1, pad_index=-100)
cls_logits = accelerator.pad_across_processes(cls_logits, dim=1, pad_index=-100) cls_logits = accelerator.pad_across_processes(cls_logits, dim=1, pad_index=-100)
all_start_top_log_probs.append(accelerator.gather(start_top_log_probs).cpu().numpy()) all_start_top_log_probs.append(accelerator.gather_for_metrics(start_top_log_probs).cpu().numpy())
all_start_top_index.append(accelerator.gather(start_top_index).cpu().numpy()) all_start_top_index.append(accelerator.gather_for_metrics(start_top_index).cpu().numpy())
all_end_top_log_probs.append(accelerator.gather(end_top_log_probs).cpu().numpy()) all_end_top_log_probs.append(accelerator.gather_for_metrics(end_top_log_probs).cpu().numpy())
all_end_top_index.append(accelerator.gather(end_top_index).cpu().numpy()) all_end_top_index.append(accelerator.gather_for_metrics(end_top_index).cpu().numpy())
all_cls_logits.append(accelerator.gather(cls_logits).cpu().numpy()) all_cls_logits.append(accelerator.gather_for_metrics(cls_logits).cpu().numpy())
max_len = max([x.shape[1] for x in all_end_top_log_probs]) # Get the max_length of the tensor max_len = max([x.shape[1] for x in all_end_top_log_probs]) # Get the max_length of the tensor
......
...@@ -715,7 +715,7 @@ def main(): ...@@ -715,7 +715,7 @@ def main():
step = 0 step = 0
# create a numpy array and fill it with -100. # create a numpy array and fill it with -100.
logits_concat = np.full((len(dataset), max_len), -100, dtype=np.float64) logits_concat = np.full((len(dataset), max_len), -100, dtype=np.float64)
# Now since we have create an array now we will populate it with the outputs gathered using accelerator.gather # Now since we have create an array now we will populate it with the outputs gathered using accelerator.gather_for_metrics
for i, output_logit in enumerate(start_or_end_logits): # populate columns for i, output_logit in enumerate(start_or_end_logits): # populate columns
# We have to fill it such that we have to take the whole tensor and replace it on the newly created array # We have to fill it such that we have to take the whole tensor and replace it on the newly created array
# And after every iteration we have to change the step # And after every iteration we have to change the step
...@@ -901,8 +901,8 @@ def main(): ...@@ -901,8 +901,8 @@ def main():
start_logits = accelerator.pad_across_processes(start_logits, dim=1, pad_index=-100) start_logits = accelerator.pad_across_processes(start_logits, dim=1, pad_index=-100)
end_logits = accelerator.pad_across_processes(end_logits, dim=1, pad_index=-100) end_logits = accelerator.pad_across_processes(end_logits, dim=1, pad_index=-100)
all_start_logits.append(accelerator.gather(start_logits).cpu().numpy()) all_start_logits.append(accelerator.gather_for_metrics(start_logits).cpu().numpy())
all_end_logits.append(accelerator.gather(end_logits).cpu().numpy()) all_end_logits.append(accelerator.gather_for_metrics(end_logits).cpu().numpy())
max_len = max([x.shape[1] for x in all_start_logits]) # Get the max_length of the tensor max_len = max([x.shape[1] for x in all_start_logits]) # Get the max_length of the tensor
...@@ -940,8 +940,8 @@ def main(): ...@@ -940,8 +940,8 @@ def main():
start_logits = accelerator.pad_across_processes(start_logits, dim=1, pad_index=-100) start_logits = accelerator.pad_across_processes(start_logits, dim=1, pad_index=-100)
end_logits = accelerator.pad_across_processes(end_logits, dim=1, pad_index=-100) end_logits = accelerator.pad_across_processes(end_logits, dim=1, pad_index=-100)
all_start_logits.append(accelerator.gather(start_logits).cpu().numpy()) all_start_logits.append(accelerator.gather_for_metrics(start_logits).cpu().numpy())
all_end_logits.append(accelerator.gather(end_logits).cpu().numpy()) all_end_logits.append(accelerator.gather_for_metrics(end_logits).cpu().numpy())
max_len = max([x.shape[1] for x in all_start_logits]) # Get the max_length of the tensor max_len = max([x.shape[1] for x in all_start_logits]) # Get the max_length of the tensor
# concatenate the numpy array # concatenate the numpy array
......
...@@ -605,7 +605,6 @@ def main(): ...@@ -605,7 +605,6 @@ def main():
logger.info("***** Running evaluation *****") logger.info("***** Running evaluation *****")
model.eval() model.eval()
samples_seen = 0
for step, batch in enumerate(tqdm(eval_dataloader, disable=not accelerator.is_local_main_process)): for step, batch in enumerate(tqdm(eval_dataloader, disable=not accelerator.is_local_main_process)):
with torch.no_grad(): with torch.no_grad():
outputs = model(**batch) outputs = model(**batch)
...@@ -615,15 +614,7 @@ def main(): ...@@ -615,15 +614,7 @@ def main():
) )
predictions = upsampled_logits.argmax(dim=1) predictions = upsampled_logits.argmax(dim=1)
predictions, references = accelerator.gather((predictions, batch["labels"])) predictions, references = accelerator.gather_for_metrics((predictions, batch["labels"]))
# If we are in a multiprocess environment, the last batch has duplicates
if accelerator.num_processes > 1:
if step == len(eval_dataloader) - 1:
predictions = predictions[: len(eval_dataloader.dataset) - samples_seen]
references = references[: len(eval_dataloader.dataset) - samples_seen]
else:
samples_seen += references.shape[0]
metric.add_batch( metric.add_batch(
predictions=predictions, predictions=predictions,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment