Commit 8ac840ff authored by Antti Virtanen's avatar Antti Virtanen Committed by Julien Chaumond
Browse files

Adding Finnish BERT.

parent a0d38645
......@@ -45,7 +45,9 @@ BERT_PRETRAINED_CONFIG_ARCHIVE_MAP = {
'bert-base-japanese': "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-config.json",
'bert-base-japanese-whole-word-masking': "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-whole-word-masking-config.json",
'bert-base-japanese-char': "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char-config.json",
'bert-base-japanese-char-whole-word-masking': "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char-whole-word-masking-config.json"
'bert-base-japanese-char-whole-word-masking': "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char-whole-word-masking-config.json",
'bert-base-finnish-cased-v1': "http://dl.turkunlp.org/finbert/torch-transformers/bert-base-finnish-cased-v1/config.json",
'bert-base-finnish-uncased-v1': "http://dl.turkunlp.org/finbert/torch-transformers/bert-base-finnish-uncased-v1/config.json",
}
......
......@@ -51,7 +51,9 @@ BERT_PRETRAINED_MODEL_ARCHIVE_MAP = {
'bert-base-japanese': "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-pytorch_model.bin",
'bert-base-japanese-whole-word-masking': "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-whole-word-masking-pytorch_model.bin",
'bert-base-japanese-char': "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char-pytorch_model.bin",
'bert-base-japanese-char-whole-word-masking': "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char-whole-word-masking-pytorch_model.bin"
'bert-base-japanese-char-whole-word-masking': "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char-whole-word-masking-pytorch_model.bin",
'bert-base-finnish-cased-v1': "http://dl.turkunlp.org/finbert/torch-transformers/bert-base-finnish-cased-v1/pytorch_model.bin",
'bert-base-finnish-uncased-v1': "http://dl.turkunlp.org/finbert/torch-transformers/bert-base-finnish-uncased-v1/pytorch_model.bin",
}
......
......@@ -51,7 +51,9 @@ TF_BERT_PRETRAINED_MODEL_ARCHIVE_MAP = {
'bert-base-japanese': "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-tf_model.h5",
'bert-base-japanese-whole-word-masking': "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-whole-word-masking-tf_model.h5",
'bert-base-japanese-char': "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char-tf_model.h5",
'bert-base-japanese-char-whole-word-masking': "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char-whole-word-masking-tf_model.h5"
'bert-base-japanese-char-whole-word-masking': "https://s3.amazonaws.com/models.huggingface.co/bert/cl-tohoku/bert-base-japanese-char-whole-word-masking-tf_model.h5",
#'bert-base-finnish-cased-v1': "http://dl.turkunlp.org/finbert/torch-transformers/bert-base-finnish-cased-v1/pytorch_model.bin",
#'bert-base-finnish-uncased-v1': "http://dl.turkunlp.org/finbert/torch-transformers/bert-base-finnish-uncased-v1/pytorch_model.bin",
}
......
......@@ -46,6 +46,8 @@ PRETRAINED_VOCAB_FILES_MAP = {
'bert-base-cased-finetuned-mrpc': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-cased-finetuned-mrpc-vocab.txt",
'bert-base-german-dbmdz-cased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-dbmdz-cased-vocab.txt",
'bert-base-german-dbmdz-uncased': "https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-german-dbmdz-uncased-vocab.txt",
'bert-base-finnish-cased-v1': "http://dl.turkunlp.org/finbert/torch-transformers/bert-base-finnish-cased-v1/vocab.txt",
'bert-base-finnish-uncased-v1': "http://dl.turkunlp.org/finbert/torch-transformers/bert-base-finnish-uncased-v1/vocab.txt",
}
}
......@@ -65,6 +67,8 @@ PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES = {
'bert-base-cased-finetuned-mrpc': 512,
'bert-base-german-dbmdz-cased': 512,
'bert-base-german-dbmdz-uncased': 512,
'bert-base-finnish-cased-v1': 512,
'bert-base-finnish-uncased-v1': 512,
}
PRETRAINED_INIT_CONFIGURATION = {
......@@ -83,6 +87,8 @@ PRETRAINED_INIT_CONFIGURATION = {
'bert-base-cased-finetuned-mrpc': {'do_lower_case': False},
'bert-base-german-dbmdz-cased': {'do_lower_case': False},
'bert-base-german-dbmdz-uncased': {'do_lower_case': True},
'bert-base-finnish-cased-v1': {'do_lower_case': False},
'bert-base-finnish-uncased-v1': {'do_lower_case': True},
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment