Commit f5a236c3 authored by Wietse de Vries's avatar Wietse de Vries Committed by Julien Chaumond
Browse files

Add Dutch pre-trained BERT model

parent 6b4c3ee2
...@@ -88,6 +88,10 @@ For a list that includes community-uploaded models, refer to `https://huggingfac ...@@ -88,6 +88,10 @@ For a list that includes community-uploaded models, refer to `https://huggingfac
| | ``bert-base-finnish-uncased-v1`` | | 12-layer, 768-hidden, 12-heads, 110M parameters. | | | ``bert-base-finnish-uncased-v1`` | | 12-layer, 768-hidden, 12-heads, 110M parameters. |
| | | | Trained on uncased Finnish text. | | | | | Trained on uncased Finnish text. |
| | | (see `details on turkunlp.org <http://turkunlp.org/FinBERT/>`__). | | | | (see `details on turkunlp.org <http://turkunlp.org/FinBERT/>`__). |
| +------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
| | ``bert-base-dutch-cased`` | | 12-layer, 768-hidden, 12-heads, 110M parameters. |
| | | | Trained on cased Dutch text. |
| | | (see `details on wietsedv repository <https://github.com/wietsedv/bertje/>`__). |
+-------------------+------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+ +-------------------+------------------------------------------------------------+---------------------------------------------------------------------------------------------------------------------------------------+
| GPT | ``openai-gpt`` | | 12-layer, 768-hidden, 12-heads, 110M parameters. | | GPT | ``openai-gpt`` | | 12-layer, 768-hidden, 12-heads, 110M parameters. |
| | | | OpenAI GPT English model | | | | | OpenAI GPT English model |
......
...@@ -45,6 +45,7 @@ BERT_PRETRAINED_CONFIG_ARCHIVE_MAP = { ...@@ -45,6 +45,7 @@ BERT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
"bert-base-japanese-char-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char-whole-word-masking-config.json", "bert-base-japanese-char-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char-whole-word-masking-config.json",
"bert-base-finnish-cased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-cased-v1/config.json", "bert-base-finnish-cased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-cased-v1/config.json",
"bert-base-finnish-uncased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-uncased-v1/config.json", "bert-base-finnish-uncased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-uncased-v1/config.json",
"bert-base-dutch-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/wietsedv/bert-base-dutch-cased/config.json",
} }
......
...@@ -53,6 +53,7 @@ BERT_PRETRAINED_MODEL_ARCHIVE_MAP = { ...@@ -53,6 +53,7 @@ BERT_PRETRAINED_MODEL_ARCHIVE_MAP = {
"bert-base-japanese-char-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char-whole-word-masking-pytorch_model.bin", "bert-base-japanese-char-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char-whole-word-masking-pytorch_model.bin",
"bert-base-finnish-cased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-cased-v1/pytorch_model.bin", "bert-base-finnish-cased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-cased-v1/pytorch_model.bin",
"bert-base-finnish-uncased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-uncased-v1/pytorch_model.bin", "bert-base-finnish-uncased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-uncased-v1/pytorch_model.bin",
"bert-base-dutch-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/wietsedv/bert-base-dutch-cased/pytorch_model.bin",
} }
......
...@@ -49,6 +49,7 @@ TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP = { ...@@ -49,6 +49,7 @@ TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP = {
"bert-base-japanese-char-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char-whole-word-masking-tf_model.h5", "bert-base-japanese-char-whole-word-masking": "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char-whole-word-masking-tf_model.h5",
"bert-base-finnish-cased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-cased-v1/tf_model.h5", "bert-base-finnish-cased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-cased-v1/tf_model.h5",
"bert-base-finnish-uncased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-uncased-v1/tf_model.h5", "bert-base-finnish-uncased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-uncased-v1/tf_model.h5",
"bert-base-dutch-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/wietsedv/bert-base-dutch-cased/tf_model.h5"
} }
......
...@@ -48,6 +48,7 @@ PRETRAINED_VOCAB_FILES_MAP = { ...@@ -48,6 +48,7 @@ PRETRAINED_VOCAB_FILES_MAP = {
"bert-base-german-dbmdz-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-dbmdz-uncased-vocab.txt", "bert-base-german-dbmdz-uncased": "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-dbmdz-uncased-vocab.txt",
"bert-base-finnish-cased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-cased-v1/vocab.txt", "bert-base-finnish-cased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-cased-v1/vocab.txt",
"bert-base-finnish-uncased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-uncased-v1/vocab.txt", "bert-base-finnish-uncased-v1": "https://s3.amazonaws.com/models.huggingface.co/bert/TurkuNLP/bert-base-finnish-uncased-v1/vocab.txt",
"bert-base-dutch-cased": "https://s3.amazonaws.com/models.huggingface.co/bert/wietsedv/bert-base-dutch-cased/vocab.txt",
} }
} }
...@@ -69,6 +70,7 @@ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = { ...@@ -69,6 +70,7 @@ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
"bert-base-german-dbmdz-uncased": 512, "bert-base-german-dbmdz-uncased": 512,
"bert-base-finnish-cased-v1": 512, "bert-base-finnish-cased-v1": 512,
"bert-base-finnish-uncased-v1": 512, "bert-base-finnish-uncased-v1": 512,
"bert-base-dutch-cased": 512,
} }
PRETRAINED_INIT_CONFIGURATION = { PRETRAINED_INIT_CONFIGURATION = {
...@@ -89,6 +91,7 @@ PRETRAINED_INIT_CONFIGURATION = { ...@@ -89,6 +91,7 @@ PRETRAINED_INIT_CONFIGURATION = {
"bert-base-german-dbmdz-uncased": {"do_lower_case": True}, "bert-base-german-dbmdz-uncased": {"do_lower_case": True},
"bert-base-finnish-cased-v1": {"do_lower_case": False}, "bert-base-finnish-cased-v1": {"do_lower_case": False},
"bert-base-finnish-uncased-v1": {"do_lower_case": True}, "bert-base-finnish-uncased-v1": {"do_lower_case": True},
"bert-base-dutch-cased": {'do_lower_case': False},
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment