Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
4f3a54bf
Commit
4f3a54bf
authored
Oct 31, 2019
by
Lysandre
Committed by
Lysandre Debut
Nov 26, 2019
Browse files
ALBERT can load pre-trained models. Doesn't inherit from BERT anymore.
parent
c4403006
Changes
4
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
68 additions
and
12 deletions
+68
-12
transformers/__init__.py
transformers/__init__.py
+1
-1
transformers/configuration_albert.py
transformers/configuration_albert.py
+9
-0
transformers/modeling_albert.py
transformers/modeling_albert.py
+37
-7
transformers/tokenization_albert.py
transformers/tokenization_albert.py
+21
-4
No files found.
transformers/__init__.py
View file @
4f3a54bf
...
@@ -107,7 +107,7 @@ if is_torch_available():
...
@@ -107,7 +107,7 @@ if is_torch_available():
CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP
)
CAMEMBERT_PRETRAINED_MODEL_ARCHIVE_MAP
)
from
.modeling_encoder_decoder
import
PreTrainedEncoderDecoder
,
Model2Model
from
.modeling_encoder_decoder
import
PreTrainedEncoderDecoder
,
Model2Model
from
.modeling_albert
import
(
AlbertModel
,
AlbertForMaskedLM
)
from
.modeling_albert
import
(
AlbertModel
,
AlbertForMaskedLM
,
ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP
)
# Optimization
# Optimization
from
.optimization
import
(
AdamW
,
get_constant_schedule
,
get_constant_schedule_with_warmup
,
get_cosine_schedule_with_warmup
,
from
.optimization
import
(
AdamW
,
get_constant_schedule
,
get_constant_schedule_with_warmup
,
get_cosine_schedule_with_warmup
,
...
...
transformers/configuration_albert.py
View file @
4f3a54bf
...
@@ -17,12 +17,21 @@
...
@@ -17,12 +17,21 @@
from
.configuration_utils
import
PretrainedConfig
from
.configuration_utils
import
PretrainedConfig
ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
=
{
'albert-base'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/albert-base-config.json"
,
'albert-large'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/albert-large-config.json"
,
'albert-xlarge'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/albert-xlarge-config.json"
,
'albert-xxlarge'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/albert-xxlarge-config.json"
,
}
class
AlbertConfig
(
PretrainedConfig
):
class
AlbertConfig
(
PretrainedConfig
):
"""Configuration for `AlbertModel`.
"""Configuration for `AlbertModel`.
The default settings match the configuration of model `albert_xxlarge`.
The default settings match the configuration of model `albert_xxlarge`.
"""
"""
pretrained_config_archive_map
=
ALBERT_PRETRAINED_CONFIG_ARCHIVE_MAP
def
__init__
(
self
,
def
__init__
(
self
,
vocab_size_or_config_json_file
=
30000
,
vocab_size_or_config_json_file
=
30000
,
embedding_size
=
128
,
embedding_size
=
128
,
...
...
transformers/modeling_albert.py
View file @
4f3a54bf
...
@@ -21,6 +21,7 @@ import logging
...
@@ -21,6 +21,7 @@ import logging
import
torch
import
torch
import
torch.nn
as
nn
import
torch.nn
as
nn
from
torch.nn
import
CrossEntropyLoss
from
torch.nn
import
CrossEntropyLoss
from
transformers.modeling_utils
import
PreTrainedModel
from
transformers.configuration_albert
import
AlbertConfig
from
transformers.configuration_albert
import
AlbertConfig
from
transformers.modeling_bert
import
BertEmbeddings
,
BertPreTrainedModel
,
BertModel
,
BertSelfAttention
,
prune_linear_layer
,
ACT2FN
from
transformers.modeling_bert
import
BertEmbeddings
,
BertPreTrainedModel
,
BertModel
,
BertSelfAttention
,
prune_linear_layer
,
ACT2FN
from
.file_utils
import
add_start_docstrings
from
.file_utils
import
add_start_docstrings
...
@@ -274,6 +275,29 @@ class AlbertTransformer(nn.Module):
...
@@ -274,6 +275,29 @@ class AlbertTransformer(nn.Module):
return
outputs
# last-layer hidden state, (all hidden states), (all attentions)
return
outputs
# last-layer hidden state, (all hidden states), (all attentions)
class
AlbertPreTrainedModel
(
PreTrainedModel
):
""" An abstract class to handle weights initialization and
a simple interface for dowloading and loading pretrained models.
"""
config_class
=
AlbertConfig
pretrained_model_archive_map
=
ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP
base_model_prefix
=
"albert"
def
_init_weights
(
self
,
module
):
""" Initialize the weights.
"""
if
isinstance
(
module
,
(
nn
.
Linear
,
nn
.
Embedding
)):
# Slightly different from the TF version which uses truncated_normal for initialization
# cf https://github.com/pytorch/pytorch/pull/5617
module
.
weight
.
data
.
normal_
(
mean
=
0.0
,
std
=
self
.
config
.
initializer_range
)
if
isinstance
(
module
,
(
nn
.
Linear
))
and
module
.
bias
is
not
None
:
module
.
bias
.
data
.
zero_
()
elif
isinstance
(
module
,
nn
.
LayerNorm
):
module
.
bias
.
data
.
zero_
()
module
.
weight
.
data
.
fill_
(
1.0
)
ALBERT_START_DOCSTRING
=
r
""" The ALBERT model was proposed in
ALBERT_START_DOCSTRING
=
r
""" The ALBERT model was proposed in
`ALBERT: A Lite BERT for Self-supervised Learning of Language Representations`_
`ALBERT: A Lite BERT for Self-supervised Learning of Language Representations`_
by Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut. It presents
by Zhenzhong Lan, Mingda Chen, Sebastian Goodman, Kevin Gimpel, Piyush Sharma, Radu Soricut. It presents
...
@@ -338,7 +362,7 @@ ALBERT_INPUTS_DOCSTRING = r"""
...
@@ -338,7 +362,7 @@ ALBERT_INPUTS_DOCSTRING = r"""
@
add_start_docstrings
(
"The bare ALBERT Model transformer outputting raw hidden-states without any specific head on top."
,
@
add_start_docstrings
(
"The bare ALBERT Model transformer outputting raw hidden-states without any specific head on top."
,
ALBERT_START_DOCSTRING
,
ALBERT_INPUTS_DOCSTRING
)
ALBERT_START_DOCSTRING
,
ALBERT_INPUTS_DOCSTRING
)
class
AlbertModel
(
Bert
Model
):
class
AlbertModel
(
AlbertPreTrained
Model
):
r
"""
r
"""
Outputs: `Tuple` comprising various elements depending on the configuration (config) and inputs:
Outputs: `Tuple` comprising various elements depending on the configuration (config) and inputs:
**last_hidden_state**: ``torch.FloatTensor`` of shape ``(batch_size, sequence_length, hidden_size)``
**last_hidden_state**: ``torch.FloatTensor`` of shape ``(batch_size, sequence_length, hidden_size)``
...
@@ -358,6 +382,12 @@ class AlbertModel(BertModel):
...
@@ -358,6 +382,12 @@ class AlbertModel(BertModel):
list of ``torch.FloatTensor`` (one for each layer) of shape ``(batch_size, num_heads, sequence_length, sequence_length)``:
list of ``torch.FloatTensor`` (one for each layer) of shape ``(batch_size, num_heads, sequence_length, sequence_length)``:
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
"""
"""
config_class
=
AlbertConfig
pretrained_model_archive_map
=
ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP
load_tf_weights
=
load_tf_weights_in_albert
base_model_prefix
=
"albert"
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
AlbertModel
,
self
).
__init__
(
config
)
super
(
AlbertModel
,
self
).
__init__
(
config
)
...
@@ -369,6 +399,11 @@ class AlbertModel(BertModel):
...
@@ -369,6 +399,11 @@ class AlbertModel(BertModel):
self
.
init_weights
()
self
.
init_weights
()
def
_resize_token_embeddings
(
self
,
new_num_tokens
):
old_embeddings
=
self
.
embeddings
.
word_embeddings
new_embeddings
=
self
.
_get_resized_embeddings
(
old_embeddings
,
new_num_tokens
)
self
.
embeddings
.
word_embeddings
=
new_embeddings
return
self
.
embeddings
.
word_embeddings
def
forward
(
self
,
input_ids
,
attention_mask
=
None
,
token_type_ids
=
None
,
position_ids
=
None
,
head_mask
=
None
):
def
forward
(
self
,
input_ids
,
attention_mask
=
None
,
token_type_ids
=
None
,
position_ids
=
None
,
head_mask
=
None
):
if
attention_mask
is
None
:
if
attention_mask
is
None
:
...
@@ -423,7 +458,7 @@ class AlbertMLMHead(nn.Module):
...
@@ -423,7 +458,7 @@ class AlbertMLMHead(nn.Module):
@
add_start_docstrings
(
"Bert Model with a `language modeling` head on top."
,
ALBERT_START_DOCSTRING
,
ALBERT_INPUTS_DOCSTRING
)
@
add_start_docstrings
(
"Bert Model with a `language modeling` head on top."
,
ALBERT_START_DOCSTRING
,
ALBERT_INPUTS_DOCSTRING
)
class
AlbertForMaskedLM
(
B
ertPreTrainedModel
):
class
AlbertForMaskedLM
(
Alb
ertPreTrainedModel
):
r
"""
r
"""
**masked_lm_labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``:
**masked_lm_labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``:
Labels for computing the masked language modeling loss.
Labels for computing the masked language modeling loss.
...
@@ -445,11 +480,6 @@ class AlbertForMaskedLM(BertPreTrainedModel):
...
@@ -445,11 +480,6 @@ class AlbertForMaskedLM(BertPreTrainedModel):
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
Attentions weights after the attention softmax, used to compute the weighted average in the self-attention heads.
"""
"""
config_class
=
AlbertConfig
pretrained_model_archive_map
=
ALBERT_PRETRAINED_MODEL_ARCHIVE_MAP
load_tf_weights
=
load_tf_weights_in_albert
base_model_prefix
=
"albert"
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
AlbertForMaskedLM
,
self
).
__init__
(
config
)
super
(
AlbertForMaskedLM
,
self
).
__init__
(
config
)
...
...
transformers/tokenization_albert.py
View file @
4f3a54bf
...
@@ -24,8 +24,25 @@ import os
...
@@ -24,8 +24,25 @@ import os
from
shutil
import
copyfile
from
shutil
import
copyfile
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
VOCAB_FILES_NAMES
=
{
'vocab_file'
:
'spiece.model'
}
VOCAB_FILES_NAMES
=
{
'vocab_file'
:
'spiece.model'
}
PRETRAINED_VOCAB_FILES_MAP
=
{
'vocab_file'
:
{
'albert-base'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/albert-base-spiece.model"
,
'albert-large'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/albert-large-spiece.model"
,
'albert-xlarge'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/albert-xlarge-spiece.model"
,
'albert-xxlarge'
:
"https://s3.amazonaws.com/models.huggingface.co/bert/albert-xxlarge-spiece.model"
,
}
}
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
=
{
'albert-base'
:
512
,
'albert-large'
:
512
,
'albert-xlarge'
:
512
,
'albert-xxlarge'
:
512
,
}
SPIECE_UNDERLINE
=
u
'▁'
SPIECE_UNDERLINE
=
u
'▁'
class
AlbertTokenizer
(
PreTrainedTokenizer
):
class
AlbertTokenizer
(
PreTrainedTokenizer
):
...
@@ -35,8 +52,8 @@ class AlbertTokenizer(PreTrainedTokenizer):
...
@@ -35,8 +52,8 @@ class AlbertTokenizer(PreTrainedTokenizer):
- requires `SentencePiece <https://github.com/google/sentencepiece>`_
- requires `SentencePiece <https://github.com/google/sentencepiece>`_
"""
"""
vocab_files_names
=
VOCAB_FILES_NAMES
vocab_files_names
=
VOCAB_FILES_NAMES
#
pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
pretrained_vocab_files_map
=
PRETRAINED_VOCAB_FILES_MAP
#
max_model_input_sizes = PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
max_model_input_sizes
=
PRETRAINED_POSITIONAL_EMBEDDINGS_SIZES
def
__init__
(
self
,
vocab_file
,
def
__init__
(
self
,
vocab_file
,
do_lower_case
=
True
,
remove_space
=
True
,
keep_accents
=
False
,
do_lower_case
=
True
,
remove_space
=
True
,
keep_accents
=
False
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment