Unverified Commit 21cd8c40 authored by Funtowicz Morgan's avatar Funtowicz Morgan Committed by GitHub
Browse files

QA Pipelines fixes (#5429)



* Make QA pipeline supports models with more than 2 outputs such as BART assuming start/end are the two first outputs.
Signed-off-by: default avatarMorgan Funtowicz <funtowiczmo@gmail.com>

* When using the new padding/truncation paradigm setting padding="max_length" + max_length=X actually pads the input up to max_length.

This result in every sample going through QA pipelines to be of size 384 whatever the actual input size is making the overall pipeline very slow.
Signed-off-by: default avatarMorgan Funtowicz <funtowiczmo@gmail.com>

* Mask padding & question before applying softmax. Softmax has been refactored to operate in log space for speed and stability.
Signed-off-by: default avatarMorgan Funtowicz <funtowiczmo@gmail.com>

* Format.
Signed-off-by: default avatarMorgan Funtowicz <funtowiczmo@gmail.com>

* Use PaddingStrategy.LONGEST instead of DO_NOT_PAD
Signed-off-by: default avatarMorgan Funtowicz <funtowiczmo@gmail.com>

* Revert "When using the new padding/truncation paradigm setting padding="max_length" + max_length=X actually pads the input up to max_length."

This reverts commit 1b00a9a2
Signed-off-by: default avatarMorgan Funtowicz <funtowiczmo@gmail.com>

* Trigger CI after unattended failure

* Trigger CI
parent 8438bab3
......@@ -1272,33 +1272,34 @@ class QuestionAnsweringPipeline(Pipeline):
with self.device_placement():
if self.framework == "tf":
fw_args = {k: tf.constant(v) for (k, v) in fw_args.items()}
start, end = self.model(fw_args)
start, end = self.model(fw_args)[:2]
start, end = start.numpy(), end.numpy()
else:
with torch.no_grad():
# Retrieve the score for the context tokens only (removing question tokens)
fw_args = {k: torch.tensor(v, device=self.device) for (k, v) in fw_args.items()}
start, end = self.model(**fw_args)
start, end = self.model(**fw_args)[:2]
start, end = start.cpu().numpy(), end.cpu().numpy()
min_null_score = 1000000 # large and positive
answers = []
for (feature, start_, end_) in zip(features, start, end):
# Normalize logits and spans to retrieve the answer
start_ = np.exp(start_) / np.sum(np.exp(start_))
end_ = np.exp(end_) / np.sum(np.exp(end_))
# Mask padding and question
start_, end_ = (
start_ * np.abs(np.array(feature.p_mask) - 1),
end_ * np.abs(np.array(feature.p_mask) - 1),
)
# Mask CLS
start_[0] = end_[0] = 0
# Normalize logits and spans to retrieve the answer
start_ = np.exp(start_ - np.log(np.sum(np.exp(start_), axis=-1, keepdims=True)))
end_ = np.exp(end_ - np.log(np.sum(np.exp(end_), axis=-1, keepdims=True)))
if kwargs["handle_impossible_answer"]:
min_null_score = min(min_null_score, (start_[0] * end_[0]).item())
start_[0] = end_[0] = 0
starts, ends, scores = self.decode(start_, end_, kwargs["topk"], kwargs["max_answer_len"])
char_to_word = np.array(example.char_to_word_offset)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment