Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
f15f0871
Unverified
Commit
f15f0871
authored
Dec 21, 2019
by
Thomas Wolf
Committed by
GitHub
Dec 21, 2019
Browse files
Merge pull request #1764 from DomHudson/bug-fix-1761
Bug-fix: Roberta Embeddings Not Masked
parents
fae4d1c2
3e52915f
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
84 additions
and
12 deletions
+84
-12
transformers/modeling_roberta.py
transformers/modeling_roberta.py
+32
-12
transformers/tests/modeling_roberta_test.py
transformers/tests/modeling_roberta_test.py
+52
-0
No files found.
transformers/modeling_roberta.py
View file @
f15f0871
...
@@ -51,24 +51,44 @@ class RobertaEmbeddings(BertEmbeddings):
...
@@ -51,24 +51,44 @@ class RobertaEmbeddings(BertEmbeddings):
padding_idx
=
self
.
padding_idx
)
padding_idx
=
self
.
padding_idx
)
def
forward
(
self
,
input_ids
=
None
,
token_type_ids
=
None
,
position_ids
=
None
,
inputs_embeds
=
None
):
def
forward
(
self
,
input_ids
=
None
,
token_type_ids
=
None
,
position_ids
=
None
,
inputs_embeds
=
None
):
if
position_ids
is
None
:
if
input_ids
is
not
None
:
if
input_ids
is
not
None
:
input_shape
=
input_ids
.
size
()
# Create the position ids from the input token ids. Any padded tokens remain padded.
position_ids
=
self
.
create_position_ids_from_input_ids
(
input_ids
).
to
(
input_ids
.
device
)
else
:
else
:
input_shape
=
inputs_embeds
.
size
()[:
-
1
]
position_ids
=
self
.
create_position_ids_from_inputs_embeds
(
inputs_embeds
)
seq_length
=
input_shape
[
1
]
device
=
input_ids
.
device
if
input_ids
is
not
None
else
inputs_embeds
.
device
if
position_ids
is
None
:
# Position numbers begin at padding_idx+1. Padding symbols are ignored.
# cf. fairseq's `utils.make_positions`
position_ids
=
torch
.
arange
(
self
.
padding_idx
+
1
,
seq_length
+
self
.
padding_idx
+
1
,
dtype
=
torch
.
long
,
device
=
device
)
position_ids
=
position_ids
.
unsqueeze
(
0
).
expand
(
input_shape
)
return
super
(
RobertaEmbeddings
,
self
).
forward
(
input_ids
,
return
super
(
RobertaEmbeddings
,
self
).
forward
(
input_ids
,
token_type_ids
=
token_type_ids
,
token_type_ids
=
token_type_ids
,
position_ids
=
position_ids
,
position_ids
=
position_ids
,
inputs_embeds
=
inputs_embeds
)
inputs_embeds
=
inputs_embeds
)
def
create_position_ids_from_input_ids
(
self
,
x
):
""" Replace non-padding symbols with their position numbers. Position numbers begin at
padding_idx+1. Padding symbols are ignored. This is modified from fairseq's
`utils.make_positions`.
:param torch.Tensor x:
:return torch.Tensor:
"""
mask
=
x
.
ne
(
self
.
padding_idx
).
long
()
incremental_indicies
=
torch
.
cumsum
(
mask
,
dim
=
1
)
*
mask
return
incremental_indicies
+
self
.
padding_idx
def
create_position_ids_from_inputs_embeds
(
self
,
inputs_embeds
):
""" We are provided embeddings directly. We cannot infer which are padded so just generate
sequential position ids.
:param torch.Tensor inputs_embeds:
:return torch.Tensor:
"""
input_shape
=
inputs_embeds
.
size
()[:
-
1
]
sequence_length
=
input_shape
[
1
]
position_ids
=
torch
.
arange
(
self
.
padding_idx
+
1
,
sequence_length
+
self
.
padding_idx
+
1
,
dtype
=
torch
.
long
,
device
=
inputs_embeds
.
device
)
return
position_ids
.
unsqueeze
(
0
).
expand
(
input_shape
)
ROBERTA_START_DOCSTRING
=
r
""" The RoBERTa model was proposed in
ROBERTA_START_DOCSTRING
=
r
""" The RoBERTa model was proposed in
`RoBERTa: A Robustly Optimized BERT Pretraining Approach`_
`RoBERTa: A Robustly Optimized BERT Pretraining Approach`_
...
...
transformers/tests/modeling_roberta_test.py
View file @
f15f0871
...
@@ -24,6 +24,7 @@ if is_torch_available():
...
@@ -24,6 +24,7 @@ if is_torch_available():
import
torch
import
torch
from
transformers
import
(
RobertaConfig
,
RobertaModel
,
RobertaForMaskedLM
,
from
transformers
import
(
RobertaConfig
,
RobertaModel
,
RobertaForMaskedLM
,
RobertaForSequenceClassification
,
RobertaForTokenClassification
)
RobertaForSequenceClassification
,
RobertaForTokenClassification
)
from
transformers.modeling_roberta
import
RobertaEmbeddings
from
transformers.modeling_roberta
import
ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
from
transformers.modeling_roberta
import
ROBERTA_PRETRAINED_MODEL_ARCHIVE_MAP
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
)
from
.modeling_common_test
import
(
CommonTestCases
,
ids_tensor
)
...
@@ -202,6 +203,57 @@ class RobertaModelTest(CommonTestCases.CommonModelTester):
...
@@ -202,6 +203,57 @@ class RobertaModelTest(CommonTestCases.CommonModelTester):
model
=
RobertaModel
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
model
=
RobertaModel
.
from_pretrained
(
model_name
,
cache_dir
=
CACHE_DIR
)
self
.
assertIsNotNone
(
model
)
self
.
assertIsNotNone
(
model
)
def
test_create_position_ids_respects_padding_index
(
self
):
""" Ensure that the default position ids only assign a sequential . This is a regression
test for https://github.com/huggingface/transformers/issues/1761
The position ids should be masked with the embedding object's padding index. Therefore, the
first available non-padding position index is RobertaEmbeddings.padding_idx + 1
"""
config
=
self
.
model_tester
.
prepare_config_and_inputs
()[
0
]
model
=
RobertaEmbeddings
(
config
=
config
)
input_ids
=
torch
.
as_tensor
([[
12
,
31
,
13
,
model
.
padding_idx
]])
expected_positions
=
torch
.
as_tensor
([[
0
+
model
.
padding_idx
+
1
,
1
+
model
.
padding_idx
+
1
,
2
+
model
.
padding_idx
+
1
,
model
.
padding_idx
]])
position_ids
=
model
.
create_position_ids_from_input_ids
(
input_ids
)
self
.
assertEqual
(
position_ids
.
shape
,
expected_positions
.
shape
)
self
.
assertTrue
(
torch
.
all
(
torch
.
eq
(
position_ids
,
expected_positions
)))
def
test_create_position_ids_from_inputs_embeds
(
self
):
""" Ensure that the default position ids only assign a sequential . This is a regression
test for https://github.com/huggingface/transformers/issues/1761
The position ids should be masked with the embedding object's padding index. Therefore, the
first available non-padding position index is RobertaEmbeddings.padding_idx + 1
"""
config
=
self
.
model_tester
.
prepare_config_and_inputs
()[
0
]
embeddings
=
RobertaEmbeddings
(
config
=
config
)
inputs_embeds
=
torch
.
Tensor
(
2
,
4
,
30
)
expected_single_positions
=
[
0
+
embeddings
.
padding_idx
+
1
,
1
+
embeddings
.
padding_idx
+
1
,
2
+
embeddings
.
padding_idx
+
1
,
3
+
embeddings
.
padding_idx
+
1
,
]
expected_positions
=
torch
.
as_tensor
([
expected_single_positions
,
expected_single_positions
])
position_ids
=
embeddings
.
create_position_ids_from_inputs_embeds
(
inputs_embeds
)
self
.
assertEqual
(
position_ids
.
shape
,
expected_positions
.
shape
)
self
.
assertTrue
(
torch
.
all
(
torch
.
eq
(
position_ids
,
expected_positions
))
)
class
RobertaModelIntegrationTest
(
unittest
.
TestCase
):
class
RobertaModelIntegrationTest
(
unittest
.
TestCase
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment