Examples: add Bloom support for token classification (#18632)

* examples: add Bloom support for token classification (FLAX, PyTorch and TensorFlow) * examples: remove support for Bloom in token classication (FLAX and TensorFlow currently have no support for it)

Examples: add Bloom support for token classification (#18632)
* examples: add Bloom support for token classification (FLAX, PyTorch and TensorFlow) * examples: remove support for Bloom in token classication (FLAX and TensorFlow currently have no support for it)
358478e7 · Stefan Schweter · GitHub · 6d175c11 · 358478e7 · 358478e7
Unverified Commit 358478e7 authored Aug 17, 2022 by Stefan Schweter Committed by GitHub Aug 17, 2022
2 changed files
--- a/examples/pytorch/token-classification/run_ner.py
+++ b/examples/pytorch/token-classification/run_ner.py
@@ -348,7 +348,7 @@ def main():
    )

    tokenizer_name_or_path = model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path
-    if config.model_type in {"gpt2", "roberta"}:
+    if config.model_type in {"bloom", "gpt2", "roberta"}:
        tokenizer = AutoTokenizer.from_pretrained(
            tokenizer_name_or_path,
            cache_dir=model_args.cache_dir,

--- a/examples/pytorch/token-classification/run_ner_no_trainer.py
+++ b/examples/pytorch/token-classification/run_ner_no_trainer.py
@@ -398,7 +398,7 @@ def main():
            "You can do it from another script, save it, and load it from here, using --tokenizer_name."
        )

-    if config.model_type in {"gpt2", "roberta"}:
+    if config.model_type in {"bloom", "gpt2", "roberta"}:
        tokenizer = AutoTokenizer.from_pretrained(tokenizer_name_or_path, use_fast=True, add_prefix_space=True)
    else:
        tokenizer = AutoTokenizer.from_pretrained(tokenizer_name_or_path, use_fast=True)