"git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "443bf5e9e25fdd6dfa31e35879485edacd848b4f"
Unverified Commit 35b16032 authored by Balaji's avatar Balaji Committed by GitHub
Browse files

Fixes #17128 . (#17356)



VisibleDeprecationWarning is addressed by specifying dtype=object when creating numpy array.
Update code based on review feedback.
Undo whitespace changes to tokenization_utils_base.py.
Co-authored-by: default avatarI like data <ilikedata@nym.hush.com>
parent b8809091
...@@ -279,7 +279,6 @@ class QuestionAnsweringPipeline(ChunkPipeline): ...@@ -279,7 +279,6 @@ class QuestionAnsweringPipeline(ChunkPipeline):
truncation="only_second" if question_first else "only_first", truncation="only_second" if question_first else "only_first",
max_length=max_seq_len, max_length=max_seq_len,
stride=doc_stride, stride=doc_stride,
return_tensors="np",
return_token_type_ids=True, return_token_type_ids=True,
return_overflowing_tokens=True, return_overflowing_tokens=True,
return_offsets_mapping=True, return_offsets_mapping=True,
...@@ -294,12 +293,10 @@ class QuestionAnsweringPipeline(ChunkPipeline): ...@@ -294,12 +293,10 @@ class QuestionAnsweringPipeline(ChunkPipeline):
# p_mask: mask with 1 for token than cannot be in the answer (0 for token which can be in an answer) # p_mask: mask with 1 for token than cannot be in the answer (0 for token which can be in an answer)
# We put 0 on the tokens from the context and 1 everywhere else (question and special tokens) # We put 0 on the tokens from the context and 1 everywhere else (question and special tokens)
p_mask = np.asarray( p_mask = [
[ [tok != 1 if question_first else 0 for tok in encoded_inputs.sequence_ids(span_id)]
[tok != 1 if question_first else 0 for tok in encoded_inputs.sequence_ids(span_id)] for span_id in range(num_spans)
for span_id in range(num_spans) ]
]
)
features = [] features = []
for span_idx in range(num_spans): for span_idx in range(num_spans):
...@@ -316,8 +313,6 @@ class QuestionAnsweringPipeline(ChunkPipeline): ...@@ -316,8 +313,6 @@ class QuestionAnsweringPipeline(ChunkPipeline):
for cls_index in cls_indices: for cls_index in cls_indices:
p_mask[span_idx][cls_index] = 0 p_mask[span_idx][cls_index] = 0
submask = p_mask[span_idx] submask = p_mask[span_idx]
if isinstance(submask, np.ndarray):
submask = submask.tolist()
features.append( features.append(
SquadFeatures( SquadFeatures(
input_ids=input_ids_span_idx, input_ids=input_ids_span_idx,
...@@ -344,7 +339,7 @@ class QuestionAnsweringPipeline(ChunkPipeline): ...@@ -344,7 +339,7 @@ class QuestionAnsweringPipeline(ChunkPipeline):
for i, feature in enumerate(features): for i, feature in enumerate(features):
fw_args = {} fw_args = {}
others = {} others = {}
model_input_names = self.tokenizer.model_input_names + ["p_mask"] model_input_names = self.tokenizer.model_input_names + ["p_mask", "token_type_ids"]
for k, v in feature.__dict__.items(): for k, v in feature.__dict__.items():
if k in model_input_names: if k in model_input_names:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment