"vscode:/vscode.git/clone" did not exist on "c19d04623eacfbc2c452397a5eda0fde42db3fc5"
Unverified Commit d447c460 authored by Patrick Deutschmann's avatar Patrick Deutschmann Committed by GitHub
Browse files

Speed up TF token classification postprocessing by converting complete tensors to numpy (#19976)



* Speed up TF postprocessing by converting to numpy before

* Fix bug that was triggered when offset_mapping was None
Co-authored-by: default avatarPatrick Deutschmann <patrick.deutschmann@dedalus.com>
parent 06886d5a
...@@ -238,6 +238,10 @@ class TokenClassificationPipeline(Pipeline): ...@@ -238,6 +238,10 @@ class TokenClassificationPipeline(Pipeline):
shifted_exp = np.exp(logits - maxes) shifted_exp = np.exp(logits - maxes)
scores = shifted_exp / shifted_exp.sum(axis=-1, keepdims=True) scores = shifted_exp / shifted_exp.sum(axis=-1, keepdims=True)
if self.framework == "tf":
input_ids = input_ids.numpy()
offset_mapping = offset_mapping.numpy() if offset_mapping is not None else None
pre_entities = self.gather_pre_entities( pre_entities = self.gather_pre_entities(
sentence, input_ids, scores, offset_mapping, special_tokens_mask, aggregation_strategy sentence, input_ids, scores, offset_mapping, special_tokens_mask, aggregation_strategy
) )
...@@ -276,9 +280,6 @@ class TokenClassificationPipeline(Pipeline): ...@@ -276,9 +280,6 @@ class TokenClassificationPipeline(Pipeline):
if self.framework == "pt": if self.framework == "pt":
start_ind = start_ind.item() start_ind = start_ind.item()
end_ind = end_ind.item() end_ind = end_ind.item()
else:
start_ind = int(start_ind.numpy())
end_ind = int(end_ind.numpy())
word_ref = sentence[start_ind:end_ind] word_ref = sentence[start_ind:end_ind]
if getattr(self.tokenizer._tokenizer.model, "continuing_subword_prefix", None): if getattr(self.tokenizer._tokenizer.model, "continuing_subword_prefix", None):
# This is a BPE, word aware tokenizer, there is a correct way # This is a BPE, word aware tokenizer, there is a correct way
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment