Unverified Commit 5b570754 authored by Lysandre Debut's avatar Lysandre Debut Committed by GitHub
Browse files

Add BlenderBot small tokenizer to the init (#13367)



* Add BlenderBot small tokenizer to the init

* Update src/transformers/__init__.py
Co-authored-by: default avatarSuraj Patil <surajp815@gmail.com>

* Style

* Bugfix
Co-authored-by: default avatarSuraj Patil <surajp815@gmail.com>
parent 9e0fd780
......@@ -354,7 +354,7 @@ Flax), PyTorch, and/or TensorFlow.
+-----------------------------+----------------+----------------+-----------------+--------------------+--------------+
| Blenderbot | ✅ | ❌ | ✅ | ✅ | ❌ |
+-----------------------------+----------------+----------------+-----------------+--------------------+--------------+
| BlenderbotSmall | ✅ | | ✅ | ✅ | ❌ |
| BlenderbotSmall | ✅ | | ✅ | ✅ | ❌ |
+-----------------------------+----------------+----------------+-----------------+--------------------+--------------+
| CamemBERT | ✅ | ✅ | ✅ | ✅ | ❌ |
+-----------------------------+----------------+----------------+-----------------+--------------------+--------------+
......
......@@ -57,6 +57,13 @@ BlenderbotSmallTokenizer
create_token_type_ids_from_sequences, save_vocabulary
BlenderbotSmallTokenizerFast
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.BlenderbotSmallTokenizerFast
:members:
BlenderbotSmallModel
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
......
......@@ -370,6 +370,7 @@ if is_tokenizers_available():
_import_structure["models.roformer"].append("RoFormerTokenizerFast")
_import_structure["models.clip"].append("CLIPTokenizerFast")
_import_structure["models.convbert"].append("ConvBertTokenizerFast")
_import_structure["models.blenderbot_small"].append("BlenderbotSmallTokenizerFast")
_import_structure["models.albert"].append("AlbertTokenizerFast")
_import_structure["models.bart"].append("BartTokenizerFast")
_import_structure["models.barthez"].append("BarthezTokenizerFast")
......@@ -2182,6 +2183,7 @@ if TYPE_CHECKING:
from .models.barthez import BarthezTokenizerFast
from .models.bert import BertTokenizerFast
from .models.big_bird import BigBirdTokenizerFast
from .models.blenderbot_small import BlenderbotSmallTokenizerFast
from .models.camembert import CamembertTokenizerFast
from .models.clip import CLIPTokenizerFast
from .models.convbert import ConvBertTokenizerFast
......
......@@ -17,7 +17,7 @@
# limitations under the License.
from typing import TYPE_CHECKING
from ...file_utils import _LazyModule, is_tf_available, is_torch_available
from ...file_utils import _LazyModule, is_tf_available, is_tokenizers_available, is_torch_available
_import_structure = {
......@@ -25,6 +25,9 @@ _import_structure = {
"tokenization_blenderbot_small": ["BlenderbotSmallTokenizer"],
}
if is_tokenizers_available():
_import_structure["tokenization_blenderbot_small_fast"] = ["BlenderbotSmallTokenizerFast"]
if is_torch_available():
_import_structure["modeling_blenderbot_small"] = [
"BLENDERBOT_SMALL_PRETRAINED_MODEL_ARCHIVE_LIST",
......@@ -45,6 +48,9 @@ if TYPE_CHECKING:
from .configuration_blenderbot_small import BLENDERBOT_SMALL_PRETRAINED_CONFIG_ARCHIVE_MAP, BlenderbotSmallConfig
from .tokenization_blenderbot_small import BlenderbotSmallTokenizer
if is_tokenizers_available():
from .tokenization_blenderbot_small_fast import BlenderbotSmallTokenizerFast
if is_torch_available():
from .modeling_blenderbot_small import (
BLENDERBOT_SMALL_PRETRAINED_MODEL_ARCHIVE_LIST,
......
......@@ -74,8 +74,8 @@ class BlenderbotSmallTokenizerFast(PreTrainedTokenizerFast):
):
super().__init__(
ByteLevelBPETokenizer(
vocab_file=vocab_file,
merges_file=merges_file,
vocab=vocab_file,
merges=merges_file,
add_prefix_space=add_prefix_space,
trim_offsets=trim_offsets,
),
......
......@@ -47,6 +47,15 @@ class BigBirdTokenizerFast:
requires_backends(cls, ["tokenizers"])
class BlenderbotSmallTokenizerFast:
def __init__(self, *args, **kwargs):
requires_backends(self, ["tokenizers"])
@classmethod
def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["tokenizers"])
class CamembertTokenizerFast:
def __init__(self, *args, **kwargs):
requires_backends(self, ["tokenizers"])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment