Unverified Commit 5b570754 authored by Lysandre Debut's avatar Lysandre Debut Committed by GitHub
Browse files

Add BlenderBot small tokenizer to the init (#13367)



* Add BlenderBot small tokenizer to the init

* Update src/transformers/__init__.py
Co-authored-by: default avatarSuraj Patil <surajp815@gmail.com>

* Style

* Bugfix
Co-authored-by: default avatarSuraj Patil <surajp815@gmail.com>
parent 9e0fd780
...@@ -354,7 +354,7 @@ Flax), PyTorch, and/or TensorFlow. ...@@ -354,7 +354,7 @@ Flax), PyTorch, and/or TensorFlow.
+-----------------------------+----------------+----------------+-----------------+--------------------+--------------+ +-----------------------------+----------------+----------------+-----------------+--------------------+--------------+
| Blenderbot | ✅ | ❌ | ✅ | ✅ | ❌ | | Blenderbot | ✅ | ❌ | ✅ | ✅ | ❌ |
+-----------------------------+----------------+----------------+-----------------+--------------------+--------------+ +-----------------------------+----------------+----------------+-----------------+--------------------+--------------+
| BlenderbotSmall | ✅ | | ✅ | ✅ | ❌ | | BlenderbotSmall | ✅ | | ✅ | ✅ | ❌ |
+-----------------------------+----------------+----------------+-----------------+--------------------+--------------+ +-----------------------------+----------------+----------------+-----------------+--------------------+--------------+
| CamemBERT | ✅ | ✅ | ✅ | ✅ | ❌ | | CamemBERT | ✅ | ✅ | ✅ | ✅ | ❌ |
+-----------------------------+----------------+----------------+-----------------+--------------------+--------------+ +-----------------------------+----------------+----------------+-----------------+--------------------+--------------+
......
...@@ -57,6 +57,13 @@ BlenderbotSmallTokenizer ...@@ -57,6 +57,13 @@ BlenderbotSmallTokenizer
create_token_type_ids_from_sequences, save_vocabulary create_token_type_ids_from_sequences, save_vocabulary
BlenderbotSmallTokenizerFast
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. autoclass:: transformers.BlenderbotSmallTokenizerFast
:members:
BlenderbotSmallModel BlenderbotSmallModel
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
......
...@@ -370,6 +370,7 @@ if is_tokenizers_available(): ...@@ -370,6 +370,7 @@ if is_tokenizers_available():
_import_structure["models.roformer"].append("RoFormerTokenizerFast") _import_structure["models.roformer"].append("RoFormerTokenizerFast")
_import_structure["models.clip"].append("CLIPTokenizerFast") _import_structure["models.clip"].append("CLIPTokenizerFast")
_import_structure["models.convbert"].append("ConvBertTokenizerFast") _import_structure["models.convbert"].append("ConvBertTokenizerFast")
_import_structure["models.blenderbot_small"].append("BlenderbotSmallTokenizerFast")
_import_structure["models.albert"].append("AlbertTokenizerFast") _import_structure["models.albert"].append("AlbertTokenizerFast")
_import_structure["models.bart"].append("BartTokenizerFast") _import_structure["models.bart"].append("BartTokenizerFast")
_import_structure["models.barthez"].append("BarthezTokenizerFast") _import_structure["models.barthez"].append("BarthezTokenizerFast")
...@@ -2182,6 +2183,7 @@ if TYPE_CHECKING: ...@@ -2182,6 +2183,7 @@ if TYPE_CHECKING:
from .models.barthez import BarthezTokenizerFast from .models.barthez import BarthezTokenizerFast
from .models.bert import BertTokenizerFast from .models.bert import BertTokenizerFast
from .models.big_bird import BigBirdTokenizerFast from .models.big_bird import BigBirdTokenizerFast
from .models.blenderbot_small import BlenderbotSmallTokenizerFast
from .models.camembert import CamembertTokenizerFast from .models.camembert import CamembertTokenizerFast
from .models.clip import CLIPTokenizerFast from .models.clip import CLIPTokenizerFast
from .models.convbert import ConvBertTokenizerFast from .models.convbert import ConvBertTokenizerFast
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
# limitations under the License. # limitations under the License.
from typing import TYPE_CHECKING from typing import TYPE_CHECKING
from ...file_utils import _LazyModule, is_tf_available, is_torch_available from ...file_utils import _LazyModule, is_tf_available, is_tokenizers_available, is_torch_available
_import_structure = { _import_structure = {
...@@ -25,6 +25,9 @@ _import_structure = { ...@@ -25,6 +25,9 @@ _import_structure = {
"tokenization_blenderbot_small": ["BlenderbotSmallTokenizer"], "tokenization_blenderbot_small": ["BlenderbotSmallTokenizer"],
} }
if is_tokenizers_available():
_import_structure["tokenization_blenderbot_small_fast"] = ["BlenderbotSmallTokenizerFast"]
if is_torch_available(): if is_torch_available():
_import_structure["modeling_blenderbot_small"] = [ _import_structure["modeling_blenderbot_small"] = [
"BLENDERBOT_SMALL_PRETRAINED_MODEL_ARCHIVE_LIST", "BLENDERBOT_SMALL_PRETRAINED_MODEL_ARCHIVE_LIST",
...@@ -45,6 +48,9 @@ if TYPE_CHECKING: ...@@ -45,6 +48,9 @@ if TYPE_CHECKING:
from .configuration_blenderbot_small import BLENDERBOT_SMALL_PRETRAINED_CONFIG_ARCHIVE_MAP, BlenderbotSmallConfig from .configuration_blenderbot_small import BLENDERBOT_SMALL_PRETRAINED_CONFIG_ARCHIVE_MAP, BlenderbotSmallConfig
from .tokenization_blenderbot_small import BlenderbotSmallTokenizer from .tokenization_blenderbot_small import BlenderbotSmallTokenizer
if is_tokenizers_available():
from .tokenization_blenderbot_small_fast import BlenderbotSmallTokenizerFast
if is_torch_available(): if is_torch_available():
from .modeling_blenderbot_small import ( from .modeling_blenderbot_small import (
BLENDERBOT_SMALL_PRETRAINED_MODEL_ARCHIVE_LIST, BLENDERBOT_SMALL_PRETRAINED_MODEL_ARCHIVE_LIST,
......
...@@ -74,8 +74,8 @@ class BlenderbotSmallTokenizerFast(PreTrainedTokenizerFast): ...@@ -74,8 +74,8 @@ class BlenderbotSmallTokenizerFast(PreTrainedTokenizerFast):
): ):
super().__init__( super().__init__(
ByteLevelBPETokenizer( ByteLevelBPETokenizer(
vocab_file=vocab_file, vocab=vocab_file,
merges_file=merges_file, merges=merges_file,
add_prefix_space=add_prefix_space, add_prefix_space=add_prefix_space,
trim_offsets=trim_offsets, trim_offsets=trim_offsets,
), ),
......
...@@ -47,6 +47,15 @@ class BigBirdTokenizerFast: ...@@ -47,6 +47,15 @@ class BigBirdTokenizerFast:
requires_backends(cls, ["tokenizers"]) requires_backends(cls, ["tokenizers"])
class BlenderbotSmallTokenizerFast:
def __init__(self, *args, **kwargs):
requires_backends(self, ["tokenizers"])
@classmethod
def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["tokenizers"])
class CamembertTokenizerFast: class CamembertTokenizerFast:
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
requires_backends(self, ["tokenizers"]) requires_backends(self, ["tokenizers"])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment