Unverified Commit c1b9a11d authored by Matt's avatar Matt Committed by GitHub
Browse files

Convert tokenizer outputs for Keras in doc example (#20732)

* Convert tokenizer outputs for Keras in doc example

* Das deutsche Beispiel auch korrigieren
parent 0ba94ace
...@@ -185,6 +185,8 @@ from transformers import AutoTokenizer ...@@ -185,6 +185,8 @@ from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased") tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
tokenized_data = tokenizer(dataset["text"], return_tensors="np", padding=True) tokenized_data = tokenizer(dataset["text"], return_tensors="np", padding=True)
# Tokenizer returns a BatchEncoding, but we convert that to a dict for Keras
tokenized_data = dict(tokenized_data)
labels = np.array(dataset["label"]) # Label is already an array of 0 and 1 labels = np.array(dataset["label"]) # Label is already an array of 0 and 1
``` ```
......
...@@ -185,6 +185,8 @@ from transformers import AutoTokenizer ...@@ -185,6 +185,8 @@ from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased") tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
tokenized_data = tokenizer(dataset["text"], return_tensors="np", padding=True) tokenized_data = tokenizer(dataset["text"], return_tensors="np", padding=True)
# Tokenizer returns a BatchEncoding, but we convert that to a dict for Keras
tokenized_data = dict(tokenized_data)
labels = np.array(dataset["label"]) # Label is already an array of 0 and 1 labels = np.array(dataset["label"]) # Label is already an array of 0 and 1
``` ```
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment