Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
1e5b31c3
"test/vscode:/vscode.git/clone" did not exist on "ae9560dabb462708fadcb9d5387c655e32a5399a"
Commit
1e5b31c3
authored
Oct 30, 2019
by
Lysandre
Committed by
Lysandre Debut
Nov 26, 2019
Browse files
Several fixes and improvements
parent
ee20201d
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
19 additions
and
19 deletions
+19
-19
transformers/modeling_albert.py
transformers/modeling_albert.py
+18
-18
transformers/tests/fixtures/spiece.model
transformers/tests/fixtures/spiece.model
+0
-0
transformers/tokenization_albert.py
transformers/tokenization_albert.py
+1
-1
No files found.
transformers/modeling_albert.py
View file @
1e5b31c3
...
@@ -7,6 +7,7 @@ import torch.nn as nn
...
@@ -7,6 +7,7 @@ import torch.nn as nn
from
torch.nn
import
CrossEntropyLoss
from
torch.nn
import
CrossEntropyLoss
from
transformers.configuration_albert
import
AlbertConfig
from
transformers.configuration_albert
import
AlbertConfig
from
transformers.modeling_bert
import
BertEmbeddings
,
BertModel
,
BertSelfAttention
,
prune_linear_layer
,
gelu_new
from
transformers.modeling_bert
import
BertEmbeddings
,
BertModel
,
BertSelfAttention
,
prune_linear_layer
,
gelu_new
from
transformers.modeling_utils
import
PreTrainedModel
from
.file_utils
import
add_start_docstrings
from
.file_utils
import
add_start_docstrings
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
...
@@ -37,18 +38,17 @@ def load_tf_weights_in_albert(model, config, tf_checkpoint_path):
...
@@ -37,18 +38,17 @@ def load_tf_weights_in_albert(model, config, tf_checkpoint_path):
print
(
name
)
print
(
name
)
for
name
,
array
in
zip
(
names
,
arrays
):
for
name
,
array
in
zip
(
names
,
arrays
):
print
(
name
)
original_name
=
name
og
=
name
name
=
name
.
replace
(
"ffn_1"
,
"ffn"
)
name
=
name
.
replace
(
"ffn_1"
,
"ffn"
)
name
=
name
.
replace
(
"ffn/intermediate/output"
,
"ffn_output"
)
name
=
name
.
replace
(
"ffn/intermediate/output"
,
"ffn_output"
)
name
=
name
.
replace
(
"attention_1"
,
"attention"
)
name
=
name
.
replace
(
"attention_1"
,
"attention"
)
name
=
name
.
replace
(
"cls/predictions/transform"
,
"predictions"
)
name
=
name
.
replace
(
"cls/predictions"
,
"predictions"
)
name
=
name
.
replace
(
"LayerNorm_1"
,
"attention/LayerNorm"
)
name
=
name
.
replace
(
"transform/"
,
""
)
name
=
name
.
replace
(
"LayerNorm_1"
,
"full_layer_layer_norm"
)
name
=
name
.
replace
(
"LayerNorm"
,
"attention/LayerNorm"
)
name
=
name
.
replace
(
"inner_group_"
,
"albert_layers/"
)
name
=
name
.
replace
(
"inner_group_"
,
"albert_layers/"
)
name
=
name
.
replace
(
"group_"
,
"albert_layer_groups/"
)
name
=
name
.
replace
(
"group_"
,
"albert_layer_groups/"
)
name
=
name
.
split
(
'/'
)
name
=
name
.
split
(
'/'
)
print
(
name
)
pointer
=
model
pointer
=
model
for
m_name
in
name
:
for
m_name
in
name
:
if
re
.
fullmatch
(
r
'[A-Za-z]+_\d+'
,
m_name
):
if
re
.
fullmatch
(
r
'[A-Za-z]+_\d+'
,
m_name
):
...
@@ -78,13 +78,12 @@ def load_tf_weights_in_albert(model, config, tf_checkpoint_path):
...
@@ -78,13 +78,12 @@ def load_tf_weights_in_albert(model, config, tf_checkpoint_path):
pointer
=
getattr
(
pointer
,
'weight'
)
pointer
=
getattr
(
pointer
,
'weight'
)
elif
m_name
==
'kernel'
:
elif
m_name
==
'kernel'
:
array
=
np
.
transpose
(
array
)
array
=
np
.
transpose
(
array
)
print
(
"transposed"
)
try
:
try
:
assert
pointer
.
shape
==
array
.
shape
assert
pointer
.
shape
==
array
.
shape
except
AssertionError
as
e
:
except
AssertionError
as
e
:
e
.
args
+=
(
pointer
.
shape
,
array
.
shape
)
e
.
args
+=
(
pointer
.
shape
,
array
.
shape
)
raise
raise
print
(
"Initialize PyTorch weight {} from {}"
.
format
(
name
,
o
g
))
print
(
"Initialize PyTorch weight {} from {}"
.
format
(
name
,
o
riginal_name
))
pointer
.
data
=
torch
.
from_numpy
(
array
)
pointer
.
data
=
torch
.
from_numpy
(
array
)
return
model
return
model
...
@@ -177,9 +176,9 @@ class AlbertAttention(BertSelfAttention):
...
@@ -177,9 +176,9 @@ class AlbertAttention(BertSelfAttention):
b
=
self
.
dense
.
bias
b
=
self
.
dense
.
bias
projected_context_layer
=
torch
.
einsum
(
"bfnd,ndh->bfh"
,
context_layer
,
w
)
+
b
projected_context_layer
=
torch
.
einsum
(
"bfnd,ndh->bfh"
,
context_layer
,
w
)
+
b
projected_context_layer
=
self
.
dropout
(
projected_context_layer
)
projected_context_layer
_dropout
=
self
.
dropout
(
projected_context_layer
)
layernormed_context_layer
=
self
.
LayerNorm
(
input_ids
+
projected_context_layer
)
layernormed_context_layer
=
self
.
LayerNorm
(
input_ids
+
projected_context_layer
_dropout
)
return
layernormed_context_layer
,
projected_context_layer
,
reshaped_context_layer
,
context_layer
,
attention_scores
,
attention_probs
,
attention_mask
return
layernormed_context_layer
class
AlbertLayer
(
nn
.
Module
):
class
AlbertLayer
(
nn
.
Module
):
...
@@ -187,17 +186,17 @@ class AlbertLayer(nn.Module):
...
@@ -187,17 +186,17 @@ class AlbertLayer(nn.Module):
super
(
AlbertLayer
,
self
).
__init__
()
super
(
AlbertLayer
,
self
).
__init__
()
self
.
config
=
config
self
.
config
=
config
self
.
LayerN
orm
=
nn
.
LayerNorm
(
config
.
hidden_size
,
eps
=
config
.
layer_norm_eps
)
self
.
full_layer_layer_n
orm
=
nn
.
LayerNorm
(
config
.
hidden_size
,
eps
=
config
.
layer_norm_eps
)
self
.
attention
=
AlbertAttention
(
config
)
self
.
attention
=
AlbertAttention
(
config
)
self
.
ffn
=
nn
.
Linear
(
config
.
hidden_size
,
config
.
intermediate_size
)
self
.
ffn
=
nn
.
Linear
(
config
.
hidden_size
,
config
.
intermediate_size
)
self
.
ffn_output
=
nn
.
Linear
(
config
.
intermediate_size
,
config
.
hidden_size
)
self
.
ffn_output
=
nn
.
Linear
(
config
.
intermediate_size
,
config
.
hidden_size
)
def
forward
(
self
,
hidden_states
,
attention_mask
=
None
,
head_mask
=
None
):
def
forward
(
self
,
hidden_states
,
attention_mask
=
None
,
head_mask
=
None
):
attention_output
=
self
.
attention
(
hidden_states
,
attention_mask
)
[
0
]
attention_output
=
self
.
attention
(
hidden_states
,
attention_mask
)
ffn_output
=
self
.
ffn
(
attention_output
)
ffn_output
=
self
.
ffn
(
attention_output
)
ffn_output
=
gelu_new
(
ffn_output
)
ffn_output
=
gelu_new
(
ffn_output
)
ffn_output
=
self
.
ffn_output
(
ffn_output
)
ffn_output
=
self
.
ffn_output
(
ffn_output
)
hidden_states
=
self
.
LayerN
orm
(
ffn_output
+
attention_output
)
hidden_states
=
self
.
full_layer_layer_n
orm
(
ffn_output
+
attention_output
)
return
hidden_states
return
hidden_states
...
@@ -352,16 +351,17 @@ class AlbertModel(BertModel):
...
@@ -352,16 +351,17 @@ class AlbertModel(BertModel):
encoder_outputs
=
self
.
encoder
(
embedding_output
,
encoder_outputs
=
self
.
encoder
(
embedding_output
,
extended_attention_mask
,
extended_attention_mask
,
head_mask
=
head_mask
)
head_mask
=
head_mask
)
sequence_output
=
encoder_outputs
[
0
]
sequence_output
=
encoder_outputs
[
0
]
pooled_output
=
self
.
pooler_activation
(
self
.
pooler
(
sequence_output
[:,
0
]))
pooled_output
=
self
.
pooler_activation
(
self
.
pooler
(
sequence_output
[:,
0
]))
outputs
=
(
sequence_output
,
pooled_output
,
)
+
encoder_outputs
[
1
:]
# add hidden_states and attentions if they are here
outputs
=
(
sequence_output
,
pooled_output
)
+
encoder_outputs
[
1
:]
# add hidden_states and attentions if they are here
return
outputs
return
outputs
@
add_start_docstrings
(
"Bert Model with a `language modeling` head on top."
,
ALBERT_START_DOCSTRING
,
ALBERT_INPUTS_DOCSTRING
)
@
add_start_docstrings
(
"Bert Model with a `language modeling` head on top."
,
ALBERT_START_DOCSTRING
,
ALBERT_INPUTS_DOCSTRING
)
class
AlbertForMaskedLM
(
nn
.
Module
):
class
AlbertForMaskedLM
(
PreTrainedModel
):
r
"""
r
"""
**masked_lm_labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``:
**masked_lm_labels**: (`optional`) ``torch.LongTensor`` of shape ``(batch_size, sequence_length)``:
Labels for computing the masked language modeling loss.
Labels for computing the masked language modeling loss.
...
@@ -384,7 +384,7 @@ class AlbertForMaskedLM(nn.Module):
...
@@ -384,7 +384,7 @@ class AlbertForMaskedLM(nn.Module):
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
(
AlbertForMaskedLM
,
self
).
__init__
()
super
(
AlbertForMaskedLM
,
self
).
__init__
(
config
)
self
.
config
=
config
self
.
config
=
config
self
.
bert
=
AlbertModel
(
config
)
self
.
bert
=
AlbertModel
(
config
)
...
...
transformers/tests/fixtures/
30k-clean
.model
→
transformers/tests/fixtures/
spiece
.model
View file @
1e5b31c3
File moved
transformers/tokenization_albert.py
View file @
1e5b31c3
...
@@ -8,7 +8,7 @@ from shutil import copyfile
...
@@ -8,7 +8,7 @@ from shutil import copyfile
logger
=
logging
.
getLogger
(
__name__
)
logger
=
logging
.
getLogger
(
__name__
)
VOCAB_FILES_NAMES
=
{
'vocab_file'
:
'
30k-clean
.model'
}
VOCAB_FILES_NAMES
=
{
'vocab_file'
:
'
spiece
.model'
}
SPIECE_UNDERLINE
=
u
'▁'
SPIECE_UNDERLINE
=
u
'▁'
class
AlbertTokenizer
(
PreTrainedTokenizer
):
class
AlbertTokenizer
(
PreTrainedTokenizer
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment