Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
dc5df92f
Commit
dc5df92f
authored
Jan 08, 2019
by
thomwolf
Browse files
added LM head for OpenAI
parent
3cf12b23
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
45 additions
and
18 deletions
+45
-18
pytorch_pretrained_bert/__init__.py
pytorch_pretrained_bert/__init__.py
+2
-1
pytorch_pretrained_bert/modeling_openai.py
pytorch_pretrained_bert/modeling_openai.py
+8
-9
tests/modeling_openai_test.py
tests/modeling_openai_test.py
+35
-8
No files found.
pytorch_pretrained_bert/__init__.py
View file @
dc5df92f
...
...
@@ -5,7 +5,8 @@ from .modeling import (BertConfig, BertModel, BertForPreTraining,
BertForMaskedLM
,
BertForNextSentencePrediction
,
BertForSequenceClassification
,
BertForMultipleChoice
,
BertForTokenClassification
,
BertForQuestionAnswering
)
from
.modeling_openai
import
OpenAIGPTConfig
,
OpenAIGPTModel
,
OpenAIGPTDoubleHeadsModel
from
.modeling_openai
import
(
OpenAIGPTConfig
,
OpenAIGPTModel
,
OpenAIGPTLMHeadModel
,
OpenAIGPTDoubleHeadsModel
)
from
.optimization
import
BertAdam
from
.optimization_openai
import
OpenAIAdam
from
.file_utils
import
PYTORCH_PRETRAINED_BERT_CACHE
pytorch_pretrained_bert/modeling_openai.py
View file @
dc5df92f
...
...
@@ -267,11 +267,11 @@ class OpenAIGPTMultipleChoiceHead(nn.Module):
nn
.
init
.
normal_
(
self
.
linear
.
weight
,
std
=
0.02
)
nn
.
init
.
normal_
(
self
.
linear
.
bias
,
0
)
def
forward
(
self
,
hidden_states
,
classification
_token_mask
):
def
forward
(
self
,
hidden_states
,
multiple_choice
_token_mask
):
# Classification logits
# hidden_states = hidden_states.view(-1, self.n_embd)
#
classification_token_mask = classification
_token_mask.view(-1, 1).expand_as(hidden_states)
multiple_choice_h
=
hidden_states
*
classification
_token_mask
.
unsqueeze
(
-
1
)
#
multiple_choice_token_mask = multiple_choice
_token_mask.view(-1, 1).expand_as(hidden_states)
multiple_choice_h
=
hidden_states
*
multiple_choice
_token_mask
.
unsqueeze
(
-
1
)
multiple_choice_h
=
multiple_choice_h
.
sum
(
dim
=-
2
)
# flat = x[..., 0].contiguous().view(-1)
# multiple_choice_h = multiple_choice_h[flat == self.multiple_choice_token, :]
...
...
@@ -496,8 +496,8 @@ class OpenAIGPTLMHeadModel(OpenAIGPTPreTrainedModel):
hidden_states
=
self
.
transformer
(
input_ids
,
position_ids
,
token_type_ids
)
lm_logits
=
self
.
lm_head
(
hidden_states
)
if
lm_labels
is
not
None
:
loss_fct
=
CrossEntropyLoss
()
loss
=
loss_fct
(
lm_logits
,
lm_labels
)
loss_fct
=
CrossEntropyLoss
(
ignore_index
=-
1
)
loss
=
loss_fct
(
lm_logits
.
view
(
-
1
,
lm_logits
.
size
(
-
1
))
,
lm_labels
.
view
(
-
1
)
)
return
loss
return
lm_logits
...
...
@@ -515,15 +515,14 @@ class OpenAIGPTDoubleHeadsModel(OpenAIGPTPreTrainedModel):
self
.
transformer
.
set_num_special_tokens
(
num_special_tokens
)
self
.
lm_head
.
set_embeddings_weights
(
self
.
transformer
.
embed
.
weight
)
def
forward
(
self
,
input_ids
,
classification
_token_mask
,
position_ids
=
None
,
token_type_ids
=
None
,
def
forward
(
self
,
input_ids
,
multiple_choice
_token_mask
,
position_ids
=
None
,
token_type_ids
=
None
,
lm_labels
=
None
,
multiple_choice_labels
=
None
):
"""
input_ids as to be of shape B x C x S
""" input_ids should be of shape B x C x S
lm_labels can be masked using the -1 value
"""
hidden_states
=
self
.
transformer
(
input_ids
,
position_ids
,
token_type_ids
)
lm_logits
=
self
.
lm_head
(
hidden_states
)
multiple_choice_logits
=
self
.
multiple_choice_head
(
hidden_states
,
classification
_token_mask
)
multiple_choice_logits
=
self
.
multiple_choice_head
(
hidden_states
,
multiple_choice
_token_mask
)
losses
=
[]
if
lm_labels
is
not
None
:
loss_fct
=
CrossEntropyLoss
(
ignore_index
=-
1
)
...
...
tests/modeling_openai_test.py
View file @
dc5df92f
...
...
@@ -22,7 +22,8 @@ import random
import
torch
from
pytorch_pretrained_bert
import
(
OpenAIGPTConfig
,
OpenAIGPTModel
,
OpenAIGPTDoubleHeadsModel
)
from
pytorch_pretrained_bert
import
(
OpenAIGPTConfig
,
OpenAIGPTModel
,
OpenAIGPTLMHeadModel
,
OpenAIGPTDoubleHeadsModel
)
class
OpenAIGPTModelTest
(
unittest
.
TestCase
):
...
...
@@ -89,11 +90,11 @@ class OpenAIGPTModelTest(unittest.TestCase):
multiple_choice_labels
=
None
lm_labels
=
None
classification
_token_mask
=
None
multiple_choice
_token_mask
=
None
if
self
.
use_labels
:
multiple_choice_labels
=
OpenAIGPTModelTest
.
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
lm_labels
=
OpenAIGPTModelTest
.
ids_tensor
([
self
.
batch_size
,
self
.
n_choices
,
self
.
seq_length
],
self
.
num_labels
)
classification
_token_mask
=
OpenAIGPTModelTest
.
ids_tensor
([
self
.
batch_size
,
self
.
n_choices
,
self
.
seq_length
],
2
).
float
()
multiple_choice
_token_mask
=
OpenAIGPTModelTest
.
ids_tensor
([
self
.
batch_size
,
self
.
n_choices
,
self
.
seq_length
],
2
).
float
()
config
=
OpenAIGPTConfig
(
vocab_size_or_config_json_file
=
self
.
vocab_size
,
...
...
@@ -109,10 +110,10 @@ class OpenAIGPTModelTest(unittest.TestCase):
initializer_range
=
self
.
initializer_range
)
return
(
config
,
input_ids
,
token_type_ids
,
position_ids
,
multiple_choice_labels
,
lm_labels
,
classification
_token_mask
)
multiple_choice_labels
,
lm_labels
,
multiple_choice
_token_mask
)
def
create_openai_model
(
self
,
config
,
input_ids
,
token_type_ids
,
position_ids
,
multiple_choice_labels
,
lm_labels
,
classification
_token_mask
):
multiple_choice_labels
,
lm_labels
,
multiple_choice
_token_mask
):
model
=
OpenAIGPTModel
(
config
)
hidden_states
=
model
(
input_ids
,
position_ids
,
token_type_ids
)
outputs
=
{
...
...
@@ -126,12 +127,34 @@ class OpenAIGPTModelTest(unittest.TestCase):
[
self
.
batch_size
,
self
.
n_choices
,
self
.
seq_length
,
self
.
n_embd
])
def
create_openai_lm_head
(
self
,
config
,
input_ids
,
token_type_ids
,
position_ids
,
multiple_choice_labels
,
lm_labels
,
multiple_choice_token_mask
):
model
=
OpenAIGPTLMHeadModel
(
config
)
loss
=
model
(
input_ids
,
position_ids
,
token_type_ids
,
lm_labels
)
lm_logits
=
model
(
input_ids
,
position_ids
,
token_type_ids
)
outputs
=
{
"loss"
:
loss
,
"lm_logits"
:
lm_logits
,
}
return
outputs
def
check_openai_lm_head_output
(
self
,
result
):
total_voc
=
self
.
n_ctx
+
self
.
n_special
+
self
.
vocab_size
self
.
parent
.
assertListEqual
(
list
(
result
[
"lm_logits"
].
size
()),
[
self
.
batch_size
,
self
.
n_choices
,
self
.
seq_length
,
total_voc
])
def
check_openai_lm_head_loss_output
(
self
,
result
):
self
.
parent
.
assertListEqual
(
list
(
result
[
"loss"
].
size
()),
[])
def
create_openai_double_heads
(
self
,
config
,
input_ids
,
token_type_ids
,
position_ids
,
multiple_choice_labels
,
lm_labels
,
classification
_token_mask
):
multiple_choice_labels
,
lm_labels
,
multiple_choice
_token_mask
):
model
=
OpenAIGPTDoubleHeadsModel
(
config
)
loss
=
model
(
input_ids
,
classification
_token_mask
,
position_ids
,
loss
=
model
(
input_ids
,
multiple_choice
_token_mask
,
position_ids
,
token_type_ids
,
lm_labels
,
multiple_choice_labels
)
lm_logits
,
multiple_choice_logits
=
model
(
input_ids
,
classification
_token_mask
,
position_ids
,
token_type_ids
)
lm_logits
,
multiple_choice_logits
=
model
(
input_ids
,
multiple_choice
_token_mask
,
position_ids
,
token_type_ids
)
outputs
=
{
"loss"
:
loss
,
"lm_logits"
:
lm_logits
,
...
...
@@ -167,6 +190,10 @@ class OpenAIGPTModelTest(unittest.TestCase):
output_result
=
tester
.
create_openai_model
(
*
config_and_inputs
)
tester
.
check_openai_model_output
(
output_result
)
output_result
=
tester
.
create_openai_lm_head
(
*
config_and_inputs
)
tester
.
check_openai_lm_head_output
(
output_result
)
tester
.
check_openai_lm_head_loss_output
(
output_result
)
output_result
=
tester
.
create_openai_double_heads
(
*
config_and_inputs
)
tester
.
check_openai_double_heads_output
(
output_result
)
tester
.
check_openai_double_heads_loss_output
(
output_result
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment