"...git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "94a23487066fd4cf3d548716ade524aa2b71b06c"
Unverified Commit d3c3e722 authored by Stefan Schweter's avatar Stefan Schweter Committed by GitHub
Browse files

[FLAX] Minor fixes in CLM example (#12914)

* readme: fix retrieval of vocab size for flax clm example

* examples: fix flax clm example when using training/evaluation files
parent 12e02e33
...@@ -211,7 +211,7 @@ from transformers import GPT2Config ...@@ -211,7 +211,7 @@ from transformers import GPT2Config
model_dir = "./norwegian-gpt2" # ${MODEL_DIR} model_dir = "./norwegian-gpt2" # ${MODEL_DIR}
config = GPT2Config.from_pretrained("gpt2", resid_pdrop=0.0, embd_pdrop=0.0, attn_pdrop=0.0, vocab_size=tokenizer.vocab_size) config = GPT2Config.from_pretrained("gpt2", resid_pdrop=0.0, embd_pdrop=0.0, attn_pdrop=0.0, vocab_size=tokenizer.get_vocab_size())
config.save_pretrained(model_dir) config.save_pretrained(model_dir)
``` ```
......
...@@ -308,14 +308,14 @@ def main(): ...@@ -308,14 +308,14 @@ def main():
extension = "text" extension = "text"
dataset = load_dataset(extension, data_files=data_files, cache_dir=model_args.cache_dir) dataset = load_dataset(extension, data_files=data_files, cache_dir=model_args.cache_dir)
if "validation" not in datasets.keys(): if "validation" not in dataset.keys():
datasets["validation"] = load_dataset( dataset["validation"] = load_dataset(
extension, extension,
data_files=data_files, data_files=data_files,
split=f"train[:{data_args.validation_split_percentage}%]", split=f"train[:{data_args.validation_split_percentage}%]",
cache_dir=model_args.cache_dir, cache_dir=model_args.cache_dir,
) )
datasets["train"] = load_dataset( dataset["train"] = load_dataset(
extension, extension,
data_files=data_files, data_files=data_files,
split=f"train[{data_args.validation_split_percentage}%:]", split=f"train[{data_args.validation_split_percentage}%:]",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment