"next_docs/en/vscode:/vscode.git/clone" did not exist on "a9dea5f0f8471ad5c03b574154f6590a1cab676b"
Commit cefd51c5 authored by James Betker's avatar James Betker Committed by Lysandre Debut
Browse files

Fix glue processor failing on tf datasets

parent ca6ce304
...@@ -80,11 +80,15 @@ def glue_convert_examples_to_features( ...@@ -80,11 +80,15 @@ def glue_convert_examples_to_features(
features = [] features = []
for (ex_index, example) in enumerate(examples): for (ex_index, example) in enumerate(examples):
if ex_index % 10000 == 0: len_examples = 0
logger.info("Writing example %d/%d" % (ex_index, len(examples)))
if is_tf_dataset: if is_tf_dataset:
example = processor.get_example_from_tensor_dict(example) example = processor.get_example_from_tensor_dict(example)
example = processor.tfds_map(example) example = processor.tfds_map(example)
len_examples = tf.data.experimental.cardinality(examples)
else:
len_examples = len(examples)
if ex_index % 10000 == 0:
logger.info("Writing example %d/%d" % (ex_index, len_examples))
inputs = tokenizer.encode_plus(example.text_a, example.text_b, add_special_tokens=True, max_length=max_length,) inputs = tokenizer.encode_plus(example.text_a, example.text_b, add_special_tokens=True, max_length=max_length,)
input_ids, token_type_ids = inputs["input_ids"], inputs["token_type_ids"] input_ids, token_type_ids = inputs["input_ids"], inputs["token_type_ids"]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment