Unverified Commit 510ad0a8 authored by peter-sk's avatar peter-sk Committed by GitHub
Browse files

gpt2 multi-gpu fix (#23149)


Co-authored-by: default avatarProf. Peter Schneider-Kamp <jps@ordbogen.com>
parent adb0760b
......@@ -1670,9 +1670,9 @@ class GPT2ForQuestionAnswering(GPT2PreTrainedModel):
if start_positions is not None and end_positions is not None:
# If we are on multi-GPU, split add a dimension
if len(start_positions.size()) > 1:
start_positions = start_positions.squeeze(-1)
start_positions = start_positions.squeeze(-1).to(start_logits.device)
if len(end_positions.size()) > 1:
end_positions = end_positions.squeeze(-1)
end_positions = end_positions.squeeze(-1).to(end_logits.device)
# sometimes the start/end positions are outside our model inputs, we ignore these terms
ignored_index = start_logits.size(1)
start_positions = start_positions.clamp(0, ignored_index)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment