Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
3cf12b23
Commit
3cf12b23
authored
Jan 08, 2019
by
thomwolf
Browse files
added tests + fixed losses
parent
eed51c5b
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
479 additions
and
220 deletions
+479
-220
pytorch_pretrained_bert/modeling.py
pytorch_pretrained_bert/modeling.py
+1
-1
pytorch_pretrained_bert/modeling_openai.py
pytorch_pretrained_bert/modeling_openai.py
+228
-187
pytorch_pretrained_bert/tokenization_openai.py
pytorch_pretrained_bert/tokenization_openai.py
+58
-32
tests/modeling_openai_test.py
tests/modeling_openai_test.py
+192
-0
No files found.
pytorch_pretrained_bert/modeling.py
View file @
3cf12b23
...
@@ -549,7 +549,7 @@ class BertPreTrainedModel(nn.Module):
...
@@ -549,7 +549,7 @@ class BertPreTrainedModel(nn.Module):
model
.
__class__
.
__name__
,
unexpected_keys
))
model
.
__class__
.
__name__
,
unexpected_keys
))
if
len
(
error_msgs
)
>
0
:
if
len
(
error_msgs
)
>
0
:
raise
RuntimeError
(
'Error(s) in loading state_dict for {}:
\n\t
{}'
.
format
(
raise
RuntimeError
(
'Error(s) in loading state_dict for {}:
\n\t
{}'
.
format
(
s
el
f
.
__class__
.
__name__
,
"
\n\t
"
.
join
(
error_msgs
)))
mod
el
.
__class__
.
__name__
,
"
\n\t
"
.
join
(
error_msgs
)))
if
tempdir
:
if
tempdir
:
# Clean up temp dir
# Clean up temp dir
shutil
.
rmtree
(
tempdir
)
shutil
.
rmtree
(
tempdir
)
...
...
pytorch_pretrained_bert/modeling_openai.py
View file @
3cf12b23
...
@@ -48,12 +48,10 @@ class OpenAIGPTConfig(object):
...
@@ -48,12 +48,10 @@ class OpenAIGPTConfig(object):
n_embd
=
768
,
n_embd
=
768
,
n_layer
=
12
,
n_layer
=
12
,
n_head
=
12
,
n_head
=
12
,
intermediate_size
=
3072
,
afn
=
"gelu"
,
afn
=
"gelu"
,
resid_pdrop
=
0.1
,
resid_pdrop
=
0.1
,
embd_pdrop
=
0.1
,
embd_pdrop
=
0.1
,
attn_pdrop
=
0.1
,
attn_pdrop
=
0.1
,
type_vocab_size
=
2
,
initializer_range
=
0.02
):
initializer_range
=
0.02
):
"""Constructs OpenAIGPTConfig.
"""Constructs OpenAIGPTConfig.
...
@@ -65,8 +63,6 @@ class OpenAIGPTConfig(object):
...
@@ -65,8 +63,6 @@ class OpenAIGPTConfig(object):
n_layer: Number of hidden layers in the Transformer encoder.
n_layer: Number of hidden layers in the Transformer encoder.
n_head: Number of attention heads for each attention layer in
n_head: Number of attention heads for each attention layer in
the Transformer encoder.
the Transformer encoder.
intermediate_size: The size of the "intermediate" (i.e., feed-forward)
layer in the Transformer encoder.
afn: The non-linear activation function (function or string) in the
afn: The non-linear activation function (function or string) in the
encoder and pooler. If string, "gelu", "relu" and "swish" are supported.
encoder and pooler. If string, "gelu", "relu" and "swish" are supported.
resid_pdrop: The dropout probabilitiy for all fully connected
resid_pdrop: The dropout probabilitiy for all fully connected
...
@@ -74,8 +70,6 @@ class OpenAIGPTConfig(object):
...
@@ -74,8 +70,6 @@ class OpenAIGPTConfig(object):
attn_pdrop: The dropout ratio for the attention
attn_pdrop: The dropout ratio for the attention
probabilities.
probabilities.
embd_pdrop: The dropout ratio for the embeddings.
embd_pdrop: The dropout ratio for the embeddings.
type_vocab_size: The vocabulary size of the `token_type_ids` passed into
`OpenAIGPTModel`.
initializer_range: The sttdev of the truncated_normal_initializer for
initializer_range: The sttdev of the truncated_normal_initializer for
initializing all weight matrices.
initializing all weight matrices.
"""
"""
...
@@ -92,11 +86,9 @@ class OpenAIGPTConfig(object):
...
@@ -92,11 +86,9 @@ class OpenAIGPTConfig(object):
self
.
n_layer
=
n_layer
self
.
n_layer
=
n_layer
self
.
n_head
=
n_head
self
.
n_head
=
n_head
self
.
afn
=
afn
self
.
afn
=
afn
self
.
intermediate_size
=
intermediate_size
self
.
resid_pdrop
=
resid_pdrop
self
.
resid_pdrop
=
resid_pdrop
self
.
embd_pdrop
=
embd_pdrop
self
.
embd_pdrop
=
embd_pdrop
self
.
attn_pdrop
=
attn_pdrop
self
.
attn_pdrop
=
attn_pdrop
self
.
type_vocab_size
=
type_vocab_size
self
.
initializer_range
=
initializer_range
self
.
initializer_range
=
initializer_range
else
:
else
:
raise
ValueError
(
"First argument must be either a vocabulary size (int)"
raise
ValueError
(
"First argument must be either a vocabulary size (int)"
...
@@ -133,143 +125,6 @@ class OpenAIGPTConfig(object):
...
@@ -133,143 +125,6 @@ class OpenAIGPTConfig(object):
"""Serializes this instance to a JSON string."""
"""Serializes this instance to a JSON string."""
return
json
.
dumps
(
self
.
to_dict
(),
indent
=
2
,
sort_keys
=
True
)
+
"
\n
"
return
json
.
dumps
(
self
.
to_dict
(),
indent
=
2
,
sort_keys
=
True
)
+
"
\n
"
class
OpenAIGPTPreTrainedModel
(
nn
.
Module
):
""" An abstract class to handle weights initialization and
a simple interface for dowloading and loading pretrained models.
"""
def
__init__
(
self
,
config
,
*
inputs
,
**
kwargs
):
super
(
OpenAIGPTPreTrainedModel
,
self
).
__init__
()
if
not
isinstance
(
config
,
OpenAIGPTConfig
):
raise
ValueError
(
"Parameter config in `{}(config)` should be an instance of class `OpenAIGPTConfig`. "
"To create a model from a Google pretrained model use "
"`model = {}.from_pretrained(PRETRAINED_MODEL_NAME)`"
.
format
(
self
.
__class__
.
__name__
,
self
.
__class__
.
__name__
))
self
.
config
=
config
def
init_weights
(
self
,
module
):
""" Initialize the weights.
"""
if
isinstance
(
module
,
(
nn
.
Linear
,
nn
.
Embedding
)):
# Slightly different from the TF version which uses truncated_normal for initialization
# cf https://github.com/pytorch/pytorch/pull/5617
module
.
weight
.
data
.
normal_
(
mean
=
0.0
,
std
=
self
.
config
.
initializer_range
)
elif
isinstance
(
module
,
LayerNorm
):
module
.
bias
.
data
.
zero_
()
module
.
weight
.
data
.
fill_
(
1.0
)
if
isinstance
(
module
,
nn
.
Linear
)
and
module
.
bias
is
not
None
:
module
.
bias
.
data
.
zero_
()
def
post_loading
(
self
):
pass
@
classmethod
def
from_pretrained
(
cls
,
pretrained_model_name
,
state_dict
=
None
,
cache_dir
=
None
,
*
inputs
,
**
kwargs
):
"""
Instantiate a OpenAIGPTPreTrainedModel from a pre-trained model file or a pytorch state dict.
Download and cache the pre-trained model file if needed.
Params:
pretrained_model_name: either:
- a str with the name of a pre-trained model to load selected in the list of:
. `openai-gpt`
- a path or url to a pretrained model archive containing:
. `openai_gpt_config.json` a configuration file for the model
. `pytorch_model.bin` a PyTorch dump of a OpenAIGPTModel instance
cache_dir: an optional path to a folder in which the pre-trained models will be cached.
state_dict: an optional state dictionnary (collections.OrderedDict object) to use instead of Google pre-trained models
*inputs, **kwargs: additional input for the specific Bert class
(ex: num_labels for BertForSequenceClassification)
"""
if
pretrained_model_name
in
PRETRAINED_MODEL_ARCHIVE_MAP
:
archive_file
=
PRETRAINED_MODEL_ARCHIVE_MAP
[
pretrained_model_name
]
else
:
archive_file
=
pretrained_model_name
# redirect to the cache, if necessary
try
:
resolved_archive_file
=
cached_path
(
archive_file
,
cache_dir
=
cache_dir
)
except
FileNotFoundError
:
logger
.
error
(
"Model name '{}' was not found in model name list ({}). "
"We assumed '{}' was a path or url but couldn't find any file "
"associated to this path or url."
.
format
(
pretrained_model_name
,
', '
.
join
(
PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
()),
archive_file
))
return
None
if
resolved_archive_file
==
archive_file
:
logger
.
info
(
"loading archive file {}"
.
format
(
archive_file
))
else
:
logger
.
info
(
"loading archive file {} from cache at {}"
.
format
(
archive_file
,
resolved_archive_file
))
tempdir
=
None
if
os
.
path
.
isdir
(
resolved_archive_file
):
serialization_dir
=
resolved_archive_file
else
:
# Extract archive to temp dir
tempdir
=
tempfile
.
mkdtemp
()
logger
.
info
(
"extracting archive file {} to temp dir {}"
.
format
(
resolved_archive_file
,
tempdir
))
with
tarfile
.
open
(
resolved_archive_file
,
'r:gz'
)
as
archive
:
archive
.
extractall
(
tempdir
)
serialization_dir
=
tempdir
# Load config
config_file
=
os
.
path
.
join
(
serialization_dir
,
CONFIG_NAME
)
config
=
OpenAIGPTConfig
.
from_json_file
(
config_file
)
logger
.
info
(
"Model config {}"
.
format
(
config
))
# Instantiate model.
model
=
cls
(
config
,
*
inputs
,
**
kwargs
)
if
state_dict
is
None
:
weights_path
=
os
.
path
.
join
(
serialization_dir
,
WEIGHTS_NAME
)
state_dict
=
torch
.
load
(
weights_path
)
old_keys
=
[]
new_keys
=
[]
for
key
in
state_dict
.
keys
():
new_key
=
None
if
'gamma'
in
key
:
new_key
=
key
.
replace
(
'gamma'
,
'weight'
)
if
'beta'
in
key
:
new_key
=
key
.
replace
(
'beta'
,
'bias'
)
if
new_key
:
old_keys
.
append
(
key
)
new_keys
.
append
(
new_key
)
for
old_key
,
new_key
in
zip
(
old_keys
,
new_keys
):
state_dict
[
new_key
]
=
state_dict
.
pop
(
old_key
)
missing_keys
=
[]
unexpected_keys
=
[]
error_msgs
=
[]
# copy state_dict so _load_from_state_dict can modify it
metadata
=
getattr
(
state_dict
,
'_metadata'
,
None
)
state_dict
=
state_dict
.
copy
()
if
metadata
is
not
None
:
state_dict
.
_metadata
=
metadata
def
load
(
module
,
prefix
=
''
):
local_metadata
=
{}
if
metadata
is
None
else
metadata
.
get
(
prefix
[:
-
1
],
{})
module
.
_load_from_state_dict
(
state_dict
,
prefix
,
local_metadata
,
True
,
missing_keys
,
unexpected_keys
,
error_msgs
)
for
name
,
child
in
module
.
_modules
.
items
():
if
child
is
not
None
:
load
(
child
,
prefix
+
name
+
'.'
)
load
(
model
.
transformer
if
hasattr
(
model
,
'transformer'
)
else
model
,
prefix
=
''
)
if
len
(
missing_keys
)
>
0
:
logger
.
info
(
"Weights of {} not initialized from pretrained model: {}"
.
format
(
model
.
__class__
.
__name__
,
missing_keys
))
if
len
(
unexpected_keys
)
>
0
:
logger
.
info
(
"Weights from pretrained model not used in {}: {}"
.
format
(
model
.
__class__
.
__name__
,
unexpected_keys
))
if
len
(
error_msgs
)
>
0
:
raise
RuntimeError
(
'Error(s) in loading state_dict for {}:
\n\t
{}'
.
format
(
self
.
__class__
.
__name__
,
"
\n\t
"
.
join
(
error_msgs
)))
model
.
post_loading
()
if
tempdir
:
# Clean up temp dir
shutil
.
rmtree
(
tempdir
)
return
model
class
Conv1D
(
nn
.
Module
):
class
Conv1D
(
nn
.
Module
):
def
__init__
(
self
,
nf
,
rf
,
nx
):
def
__init__
(
self
,
nf
,
rf
,
nx
):
super
(
Conv1D
,
self
).
__init__
()
super
(
Conv1D
,
self
).
__init__
()
...
@@ -312,7 +167,11 @@ class Attention(nn.Module):
...
@@ -312,7 +167,11 @@ class Attention(nn.Module):
w
=
torch
.
matmul
(
q
,
k
)
w
=
torch
.
matmul
(
q
,
k
)
if
self
.
scale
:
if
self
.
scale
:
w
=
w
/
math
.
sqrt
(
v
.
size
(
-
1
))
w
=
w
/
math
.
sqrt
(
v
.
size
(
-
1
))
w
=
w
*
self
.
b
+
-
1e9
*
(
1
-
self
.
b
)
# TF implem method: mask_attn_weights
# w = w * self.b + -1e9 * (1 - self.b) # TF implem method: mask_attn_weights
# XD: self.b may be larger than w, so we need to crop it
b
=
self
.
b
[:,
:,
:
w
.
size
(
-
2
),
:
w
.
size
(
-
1
)]
w
=
w
*
b
+
-
1e9
*
(
1
-
b
)
w
=
nn
.
Softmax
(
dim
=-
1
)(
w
)
w
=
nn
.
Softmax
(
dim
=-
1
)(
w
)
w
=
self
.
attn_dropout
(
w
)
w
=
self
.
attn_dropout
(
w
)
return
torch
.
matmul
(
w
,
v
)
return
torch
.
matmul
(
w
,
v
)
...
@@ -388,41 +247,184 @@ class OpenAIGPTLMHead(nn.Module):
...
@@ -388,41 +247,184 @@ class OpenAIGPTLMHead(nn.Module):
self
.
decoder
=
nn
.
Linear
(
embed_shape
[
1
],
embed_shape
[
0
],
bias
=
False
)
self
.
decoder
=
nn
.
Linear
(
embed_shape
[
1
],
embed_shape
[
0
],
bias
=
False
)
self
.
decoder
.
weight
=
model_embeddings_weights
# Tied weights
self
.
decoder
.
weight
=
model_embeddings_weights
# Tied weights
def
forward
(
self
,
h
):
def
forward
(
self
,
h
idden_state
):
# Truncated Language modeling logits (we remove the last token)
# Truncated Language modeling logits (we remove the last token)
h_trunc
=
h
[:,
:
-
1
].
contiguous
().
view
(
-
1
,
self
.
n_embd
)
#
h_trunc = h[:, :-1].contiguous().view(-1, self.n_embd)
lm_logits
=
self
.
decoder
(
h
_trunc
)
lm_logits
=
self
.
decoder
(
h
idden_state
)
return
lm_logits
return
lm_logits
class
OpenAIGPT
Clf
Head
(
nn
.
Module
):
class
OpenAIGPT
MultipleChoice
Head
(
nn
.
Module
):
""" Classifier Head for the transformer """
""" Classifier Head for the transformer """
def
__init__
(
self
,
clf_token
,
cfg
):
def
__init__
(
self
,
cfg
):
super
(
OpenAIGPT
Clf
Head
,
self
).
__init__
()
super
(
OpenAIGPT
MultipleChoice
Head
,
self
).
__init__
()
self
.
n_embd
=
cfg
.
n_embd
self
.
n_embd
=
cfg
.
n_embd
self
.
clf_token
=
clf
_token
#
self.
multiple_choice_token = multiple_choice
_token
self
.
dropout
=
nn
.
Dropout2d
(
cfg
.
resid_pdrop
)
# To reproduce the noise_shape parameter of TF implementation
self
.
dropout
=
nn
.
Dropout2d
(
cfg
.
resid_pdrop
)
# To reproduce the noise_shape parameter of TF implementation
self
.
linear
=
nn
.
Linear
(
cfg
.
n_embd
,
1
)
self
.
linear
=
nn
.
Linear
(
cfg
.
n_embd
,
1
)
nn
.
init
.
normal_
(
self
.
linear
.
weight
,
std
=
0.02
)
nn
.
init
.
normal_
(
self
.
linear
.
weight
,
std
=
0.02
)
nn
.
init
.
normal_
(
self
.
linear
.
bias
,
0
)
nn
.
init
.
normal_
(
self
.
linear
.
bias
,
0
)
def
forward
(
self
,
h
,
x
):
def
forward
(
self
,
h
idden_states
,
classification_token_mask
):
# Classification logits
# Classification logits
clf_h
=
h
.
view
(
-
1
,
self
.
n_embd
)
# hidden_states = hidden_states.view(-1, self.n_embd)
flat
=
x
[...,
0
].
contiguous
().
view
(
-
1
)
# classification_token_mask = classification_token_mask.view(-1, 1).expand_as(hidden_states)
clf_h
=
clf_h
[
flat
==
self
.
clf_token
,
:]
multiple_choice_h
=
hidden_states
*
classification_token_mask
.
unsqueeze
(
-
1
)
clf_h
=
clf_h
.
view
(
-
1
,
x
.
size
(
1
),
self
.
n_embd
,
1
)
multiple_choice_h
=
multiple_choice_h
.
sum
(
dim
=-
2
)
# This double transposition is there to replicate the behavior
# flat = x[..., 0].contiguous().view(-1)
# of the noise_shape argument in the tensorflow
# multiple_choice_h = multiple_choice_h[flat == self.multiple_choice_token, :]
# implementation. For more details, see
# multiple_choice_h = multiple_choice_h.view(-1, x.size(1), self.n_embd, 1)
# https://github.com/huggingface/pytorch-openai-transformer-lm/issues/11
# # This double transposition is there to replicate the behavior
clf_h
=
self
.
dropout
(
clf_h
.
transpose
(
1
,
2
)).
transpose
(
1
,
2
)
# # of the noise_shape argument in the tensorflow
clf_h
=
clf_h
.
contiguous
().
view
(
-
1
,
self
.
n_embd
)
# # implementation. For more details, see
clf_logits
=
self
.
linear
(
clf_h
)
# # https://github.com/huggingface/pytorch-openai-transformer-lm/issues/11
# multiple_choice_h = self.dropout(multiple_choice_h.transpose(1, 2)).transpose(1, 2)
# multiple_choice_h = multiple_choice_h.contiguous().view(-1, self.n_embd)
multiple_choice_logits
=
self
.
linear
(
multiple_choice_h
).
squeeze
(
-
1
)
return
multiple_choice_logits
class
OpenAIGPTPreTrainedModel
(
nn
.
Module
):
""" An abstract class to handle weights initialization and
a simple interface for dowloading and loading pretrained models.
"""
def
__init__
(
self
,
config
,
*
inputs
,
**
kwargs
):
super
(
OpenAIGPTPreTrainedModel
,
self
).
__init__
()
if
not
isinstance
(
config
,
OpenAIGPTConfig
):
raise
ValueError
(
"Parameter config in `{}(config)` should be an instance of class `OpenAIGPTConfig`. "
"To create a model from a pretrained model use "
"`model = {}.from_pretrained(PRETRAINED_MODEL_NAME)`"
.
format
(
self
.
__class__
.
__name__
,
self
.
__class__
.
__name__
))
self
.
config
=
config
def
init_weights
(
self
,
module
):
""" Initialize the weights.
"""
if
isinstance
(
module
,
(
nn
.
Linear
,
nn
.
Embedding
)):
# Slightly different from the TF version which uses truncated_normal for initialization
# cf https://github.com/pytorch/pytorch/pull/5617
module
.
weight
.
data
.
normal_
(
mean
=
0.0
,
std
=
self
.
config
.
initializer_range
)
elif
isinstance
(
module
,
LayerNorm
):
module
.
bias
.
data
.
zero_
()
module
.
weight
.
data
.
fill_
(
1.0
)
if
isinstance
(
module
,
nn
.
Linear
)
and
module
.
bias
is
not
None
:
module
.
bias
.
data
.
zero_
()
return
clf_logits
.
view
(
-
1
,
x
.
size
(
1
))
def
set_num_special_tokens
(
self
,
num_special_tokens
):
pass
@
classmethod
def
from_pretrained
(
cls
,
pretrained_model_name
,
num_special_tokens
=
0
,
state_dict
=
None
,
cache_dir
=
None
,
*
inputs
,
**
kwargs
):
"""
Instantiate a OpenAIGPTPreTrainedModel from a pre-trained model file or a pytorch state dict.
Download and cache the pre-trained model file if needed.
Params:
pretrained_model_name: either:
- a str with the name of a pre-trained model to load selected in the list of:
. `openai-gpt`
- a path or url to a pretrained model archive containing:
. `openai_gpt_config.json` a configuration file for the model
. `pytorch_model.bin` a PyTorch dump of a OpenAIGPTModel instance
cache_dir: an optional path to a folder in which the pre-trained models will be cached.
state_dict: an optional state dictionnary (collections.OrderedDict object) to use instead of pre-trained models
*inputs, **kwargs: additional input for the specific Bert class
(ex: num_labels for BertForSequenceClassification)
"""
if
pretrained_model_name
in
PRETRAINED_MODEL_ARCHIVE_MAP
:
archive_file
=
PRETRAINED_MODEL_ARCHIVE_MAP
[
pretrained_model_name
]
else
:
archive_file
=
pretrained_model_name
# redirect to the cache, if necessary
try
:
resolved_archive_file
=
cached_path
(
archive_file
,
cache_dir
=
cache_dir
)
except
FileNotFoundError
:
logger
.
error
(
"Model name '{}' was not found in model name list ({}). "
"We assumed '{}' was a path or url but couldn't find any file "
"associated to this path or url."
.
format
(
pretrained_model_name
,
', '
.
join
(
PRETRAINED_MODEL_ARCHIVE_MAP
.
keys
()),
archive_file
))
return
None
if
resolved_archive_file
==
archive_file
:
logger
.
info
(
"loading archive file {}"
.
format
(
archive_file
))
else
:
logger
.
info
(
"loading archive file {} from cache at {}"
.
format
(
archive_file
,
resolved_archive_file
))
tempdir
=
None
if
os
.
path
.
isdir
(
resolved_archive_file
):
serialization_dir
=
resolved_archive_file
else
:
# Extract archive to temp dir
tempdir
=
tempfile
.
mkdtemp
()
logger
.
info
(
"extracting archive file {} to temp dir {}"
.
format
(
resolved_archive_file
,
tempdir
))
with
tarfile
.
open
(
resolved_archive_file
,
'r:gz'
)
as
archive
:
archive
.
extractall
(
tempdir
)
serialization_dir
=
tempdir
# Load config
config_file
=
os
.
path
.
join
(
serialization_dir
,
CONFIG_NAME
)
config
=
OpenAIGPTConfig
.
from_json_file
(
config_file
)
logger
.
info
(
"Model config {}"
.
format
(
config
))
# Instantiate model.
model
=
cls
(
config
,
*
inputs
,
**
kwargs
)
if
state_dict
is
None
:
weights_path
=
os
.
path
.
join
(
serialization_dir
,
WEIGHTS_NAME
)
state_dict
=
torch
.
load
(
weights_path
)
old_keys
=
[]
new_keys
=
[]
for
key
in
state_dict
.
keys
():
new_key
=
None
if
'gamma'
in
key
:
new_key
=
key
.
replace
(
'gamma'
,
'weight'
)
if
'beta'
in
key
:
new_key
=
key
.
replace
(
'beta'
,
'bias'
)
if
new_key
:
old_keys
.
append
(
key
)
new_keys
.
append
(
new_key
)
for
old_key
,
new_key
in
zip
(
old_keys
,
new_keys
):
state_dict
[
new_key
]
=
state_dict
.
pop
(
old_key
)
missing_keys
=
[]
unexpected_keys
=
[]
error_msgs
=
[]
# copy state_dict so _load_from_state_dict can modify it
metadata
=
getattr
(
state_dict
,
'_metadata'
,
None
)
state_dict
=
state_dict
.
copy
()
if
metadata
is
not
None
:
state_dict
.
_metadata
=
metadata
def
load
(
module
,
prefix
=
''
):
local_metadata
=
{}
if
metadata
is
None
else
metadata
.
get
(
prefix
[:
-
1
],
{})
module
.
_load_from_state_dict
(
state_dict
,
prefix
,
local_metadata
,
True
,
missing_keys
,
unexpected_keys
,
error_msgs
)
for
name
,
child
in
module
.
_modules
.
items
():
if
child
is
not
None
:
load
(
child
,
prefix
+
name
+
'.'
)
load
(
model
.
transformer
if
hasattr
(
model
,
'transformer'
)
else
model
,
prefix
=
''
)
if
len
(
missing_keys
)
>
0
:
logger
.
info
(
"Weights of {} not initialized from pretrained model: {}"
.
format
(
model
.
__class__
.
__name__
,
missing_keys
))
if
len
(
unexpected_keys
)
>
0
:
logger
.
info
(
"Weights from pretrained model not used in {}: {}"
.
format
(
model
.
__class__
.
__name__
,
unexpected_keys
))
if
len
(
error_msgs
)
>
0
:
raise
RuntimeError
(
'Error(s) in loading state_dict for {}:
\n\t
{}'
.
format
(
model
.
__class__
.
__name__
,
"
\n\t
"
.
join
(
error_msgs
)))
# Add additional embeddings for special tokens if needed
if
num_special_tokens
!=
config
.
n_special
:
model
.
set_num_special_tokens
(
num_special_tokens
)
if
tempdir
:
# Clean up temp dir
shutil
.
rmtree
(
tempdir
)
return
model
class
OpenAIGPTModel
(
OpenAIGPTPreTrainedModel
):
class
OpenAIGPTModel
(
OpenAIGPTPreTrainedModel
):
...
@@ -440,6 +442,7 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel):
...
@@ -440,6 +442,7 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel):
# nn.init.normal_(self.embed.weight, std=0.02)
# nn.init.normal_(self.embed.weight, std=0.02)
def
set_num_special_tokens
(
self
,
num_special_tokens
):
def
set_num_special_tokens
(
self
,
num_special_tokens
):
" Update input embeddings with new embedding matrice "
# Update config
# Update config
self
.
config
.
n_special
=
num_special_tokens
self
.
config
.
n_special
=
num_special_tokens
# # Build new embeddings and initialize
# # Build new embeddings and initialize
...
@@ -451,45 +454,83 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel):
...
@@ -451,45 +454,83 @@ class OpenAIGPTModel(OpenAIGPTPreTrainedModel):
self
.
embed
.
weight
.
data
[:
self
.
config
.
vocab_size
,
:]
=
old_embed
.
weight
.
data
[:
self
.
config
.
vocab_size
,
:]
self
.
embed
.
weight
.
data
[:
self
.
config
.
vocab_size
,
:]
=
old_embed
.
weight
.
data
[:
self
.
config
.
vocab_size
,
:]
self
.
embed
.
weight
.
data
[
-
self
.
config
.
n_ctx
:,
:]
=
old_embed
.
weight
.
data
[
-
self
.
config
.
n_ctx
:,
:]
self
.
embed
.
weight
.
data
[
-
self
.
config
.
n_ctx
:,
:]
=
old_embed
.
weight
.
data
[
-
self
.
config
.
n_ctx
:,
:]
def
forward
(
self
,
x
):
def
forward
(
self
,
input_ids
,
position_ids
=
None
,
token_type_ids
=
None
):
x
=
x
.
view
(
-
1
,
x
.
size
(
-
2
),
x
.
size
(
-
1
))
if
position_ids
is
None
:
e
=
self
.
embed
(
x
)
start
=
self
.
config
.
vocab_size
+
self
.
config
.
n_special
end
=
start
+
input_ids
.
size
(
-
1
)
position_ids
=
torch
.
arange
(
start
,
end
,
dtype
=
torch
.
long
,
device
=
input_ids
.
device
)
position_ids
=
position_ids
.
unsqueeze
(
0
).
expand_as
(
input_ids
)
input_shape
=
input_ids
.
size
()
input_ids
=
input_ids
.
view
(
-
1
,
input_ids
.
size
(
-
1
))
position_ids
=
position_ids
.
view
(
-
1
,
position_ids
.
size
(
-
1
))
inputs_embeds
=
self
.
embed
(
input_ids
)
position_embeds
=
self
.
embed
(
position_ids
)
if
token_type_ids
is
not
None
:
token_type_ids
=
token_type_ids
.
view
(
-
1
,
token_type_ids
.
size
(
-
1
))
token_type_embeds
=
self
.
embed
(
token_type_ids
)
else
:
token_type_embeds
=
0
# Add the position information to the input embeddings
# Add the position information to the input embeddings
h
=
e
.
sum
(
dim
=
2
)
# h = e.sum(dim=2)
hidden_states
=
inputs_embeds
+
position_embeds
+
token_type_embeds
for
block
in
self
.
h
:
for
block
in
self
.
h
:
h
=
block
(
h
)
h
idden_states
=
block
(
hidden_states
)
return
h
return
h
idden_states
.
view
(
*
input_shape
,
hidden_states
.
size
(
-
1
))
class
OpenAIGPTLMHeadModel
(
OpenAIGPTPreTrainedModel
):
""" OpenAI GPT model with language model and classification heads """
def
__init__
(
self
,
cfg
):
super
(
OpenAIGPTLMHeadModel
,
self
).
__init__
(
cfg
)
self
.
transformer
=
OpenAIGPTModel
(
cfg
)
self
.
lm_head
=
OpenAIGPTLMHead
(
self
.
transformer
.
embed
.
weight
,
cfg
)
self
.
apply
(
self
.
init_weights
)
def
set_num_special_tokens
(
self
,
num_special_tokens
):
" Update input and output embeddings with new embedding matrice "
self
.
transformer
.
set_num_special_tokens
(
num_special_tokens
)
self
.
lm_head
.
set_embeddings_weights
(
self
.
transformer
.
embed
.
weight
)
def
forward
(
self
,
input_ids
,
position_ids
=
None
,
token_type_ids
=
None
,
lm_labels
=
None
):
hidden_states
=
self
.
transformer
(
input_ids
,
position_ids
,
token_type_ids
)
lm_logits
=
self
.
lm_head
(
hidden_states
)
if
lm_labels
is
not
None
:
loss_fct
=
CrossEntropyLoss
()
loss
=
loss_fct
(
lm_logits
,
lm_labels
)
return
loss
return
lm_logits
class
OpenAIGPTDoubleHeadsModel
(
OpenAIGPTPreTrainedModel
):
class
OpenAIGPTDoubleHeadsModel
(
OpenAIGPTPreTrainedModel
):
""" OpenAI GPT model with language model and classification heads """
""" OpenAI GPT model with language model and classification heads """
def
__init__
(
self
,
cfg
,
clf_token
=
'[CLS]'
):
def
__init__
(
self
,
cfg
):
super
(
OpenAIGPTDoubleHeadsModel
,
self
).
__init__
(
cfg
)
super
(
OpenAIGPTDoubleHeadsModel
,
self
).
__init__
(
cfg
)
self
.
transformer
=
OpenAIGPTModel
(
cfg
)
self
.
transformer
=
OpenAIGPTModel
(
cfg
)
self
.
lm_head
=
OpenAIGPTLMHead
(
self
.
transformer
.
embed
.
weight
,
cfg
)
self
.
lm_head
=
OpenAIGPTLMHead
(
self
.
transformer
.
embed
.
weight
,
cfg
)
self
.
clf
_head
=
OpenAIGPT
ClfHead
(
clf_token
,
cfg
)
self
.
multiple_choice
_head
=
OpenAIGPT
MultipleChoiceHead
(
cfg
)
self
.
apply
(
self
.
init_weights
)
self
.
apply
(
self
.
init_weights
)
def
post_loading
(
self
):
" Set the number of special tokens to 1 (for the [CLS] token) "
self
.
set_num_special_tokens
(
1
)
def
set_num_special_tokens
(
self
,
num_special_tokens
):
def
set_num_special_tokens
(
self
,
num_special_tokens
):
" Update input and output embeddings with new embedding matrice "
" Update input and output embeddings with new embedding matrice "
self
.
transformer
.
set_num_special_tokens
(
num_special_tokens
)
self
.
transformer
.
set_num_special_tokens
(
num_special_tokens
)
self
.
lm_head
.
set_embeddings_weights
(
self
.
transformer
.
embed
.
weight
)
self
.
lm_head
.
set_embeddings_weights
(
self
.
transformer
.
embed
.
weight
)
def
forward
(
self
,
x
,
lm_labels
=
None
,
clf_labels
=
None
):
def
forward
(
self
,
input_ids
,
classification_token_mask
,
position_ids
=
None
,
token_type_ids
=
None
,
h
=
self
.
transformer
(
x
)
lm_labels
=
None
,
multiple_choice_labels
=
None
):
lm_logits
=
self
.
lm_head
(
h
)
"""
clf_logits
=
self
.
clf_head
(
h
,
x
)
input_ids as to be of shape B x C x S
lm_labels can be masked using the -1 value
"""
hidden_states
=
self
.
transformer
(
input_ids
,
position_ids
,
token_type_ids
)
lm_logits
=
self
.
lm_head
(
hidden_states
)
multiple_choice_logits
=
self
.
multiple_choice_head
(
hidden_states
,
classification_token_mask
)
losses
=
[]
losses
=
[]
if
lm_labels
is
not
None
:
if
lm_labels
is
not
None
:
loss_fct
=
CrossEntropyLoss
(
ignore_index
=-
1
)
losses
.
append
(
loss_fct
(
lm_logits
.
view
(
-
1
,
lm_logits
.
size
(
-
1
)),
lm_labels
.
view
(
-
1
)))
if
multiple_choice_labels
is
not
None
:
loss_fct
=
CrossEntropyLoss
()
loss_fct
=
CrossEntropyLoss
()
losses
.
append
(
loss_fct
(
lm_logits
,
lm_labels
))
losses
.
append
(
loss_fct
(
multiple_choice_logits
,
multiple_choice_labels
.
view
(
-
1
)))
if
clf_labels
is
not
None
:
loss_fct
=
CrossEntropyLoss
()
losses
.
append
(
loss_fct
(
clf_logits
,
clf_labels
))
if
losses
:
if
losses
:
return
losses
return
losses
return
lm_logits
,
clf
_logits
return
lm_logits
,
multiple_choice
_logits
pytorch_pretrained_bert/tokenization_openai.py
View file @
3cf12b23
...
@@ -67,19 +67,17 @@ class OpenAIGPTTokenizer(object):
...
@@ -67,19 +67,17 @@ class OpenAIGPTTokenizer(object):
mostly a wrapper for a public python bpe tokenizer
mostly a wrapper for a public python bpe tokenizer
"""
"""
@
classmethod
@
classmethod
def
from_pretrained
(
cls
,
pretrained_model_name
,
cache_dir
=
None
,
*
inputs
,
**
kwargs
):
def
from_pretrained
(
cls
,
pretrained_model_name
_or_path
,
cache_dir
=
None
,
*
inputs
,
**
kwargs
):
"""
"""
Instantiate a PreTrainedBertModel from a pre-trained model file.
Instantiate a PreTrainedBertModel from a pre-trained model file.
Download and cache the pre-trained model file if needed.
Download and cache the pre-trained model file if needed.
"""
"""
if
pretrained_model_name
in
PRETRAINED_VOCAB_ARCHIVE_MAP
:
if
pretrained_model_name
_or_path
in
PRETRAINED_VOCAB_ARCHIVE_MAP
:
vocab_file
=
PRETRAINED_VOCAB_ARCHIVE_MAP
[
pretrained_model_name
]
vocab_file
=
PRETRAINED_VOCAB_ARCHIVE_MAP
[
pretrained_model_name
_or_path
]
merges_file
=
PRETRAINED_MERGES_ARCHIVE_MAP
[
pretrained_model_name
]
merges_file
=
PRETRAINED_MERGES_ARCHIVE_MAP
[
pretrained_model_name
_or_path
]
else
:
else
:
vocab_file
=
pretrained_model_name
vocab_file
=
os
.
path
.
join
(
pretrained_model_name_or_path
,
VOCAB_NAME
)
if
os
.
path
.
isdir
(
vocab_file
):
merges_file
=
os
.
path
.
join
(
pretrained_model_name_or_path
,
MERGES_NAME
)
vocab_file
=
os
.
path
.
join
(
vocab_file
,
VOCAB_NAME
)
merges_file
=
os
.
path
.
join
(
vocab_file
,
MERGES_NAME
)
# redirect to the cache, if necessary
# redirect to the cache, if necessary
try
:
try
:
resolved_vocab_file
=
cached_path
(
vocab_file
,
cache_dir
=
cache_dir
)
resolved_vocab_file
=
cached_path
(
vocab_file
,
cache_dir
=
cache_dir
)
...
@@ -87,11 +85,12 @@ class OpenAIGPTTokenizer(object):
...
@@ -87,11 +85,12 @@ class OpenAIGPTTokenizer(object):
except
FileNotFoundError
:
except
FileNotFoundError
:
logger
.
error
(
logger
.
error
(
"Model name '{}' was not found in model name list ({}). "
"Model name '{}' was not found in model name list ({}). "
"We assumed '{}' was a path or url but couldn't find
any
file "
"We assumed '{}' was a path or url but couldn't find file
s {} and {}
"
"a
ssociated to
this path or url."
.
format
(
"a
t
this path or url."
.
format
(
pretrained_model_name
,
pretrained_model_name
_or_path
,
', '
.
join
(
PRETRAINED_VOCAB_ARCHIVE_MAP
.
keys
()),
', '
.
join
(
PRETRAINED_VOCAB_ARCHIVE_MAP
.
keys
()),
vocab_file
))
pretrained_model_name_or_path
,
vocab_file
,
merges_file
))
return
None
return
None
if
resolved_vocab_file
==
vocab_file
and
resolved_merges_file
==
merges_file
:
if
resolved_vocab_file
==
vocab_file
and
resolved_merges_file
==
merges_file
:
logger
.
info
(
"loading vocabulary file {}"
.
format
(
vocab_file
))
logger
.
info
(
"loading vocabulary file {}"
.
format
(
vocab_file
))
...
@@ -101,29 +100,38 @@ class OpenAIGPTTokenizer(object):
...
@@ -101,29 +100,38 @@ class OpenAIGPTTokenizer(object):
vocab_file
,
resolved_vocab_file
))
vocab_file
,
resolved_vocab_file
))
logger
.
info
(
"loading merges file {} from cache at {}"
.
format
(
logger
.
info
(
"loading merges file {} from cache at {}"
.
format
(
merges_file
,
resolved_merges_file
))
merges_file
,
resolved_merges_file
))
if
pretrained_model_name
in
PRETRAINED_VOCAB_POSITIONAL_EMBEDDINGS_SIZE_MAP
:
if
pretrained_model_name
_or_path
in
PRETRAINED_VOCAB_POSITIONAL_EMBEDDINGS_SIZE_MAP
:
# if we're using a pretrained model, ensure the tokenizer wont index sequences longer
# if we're using a pretrained model, ensure the tokenizer wont index sequences longer
# than the number of positional embeddings
# than the number of positional embeddings
max_len
=
PRETRAINED_VOCAB_POSITIONAL_EMBEDDINGS_SIZE_MAP
[
pretrained_model_name
]
max_len
=
PRETRAINED_VOCAB_POSITIONAL_EMBEDDINGS_SIZE_MAP
[
pretrained_model_name
_or_path
]
kwargs
[
'max_len'
]
=
min
(
kwargs
.
get
(
'max_len'
,
int
(
1e12
)),
max_len
)
kwargs
[
'max_len'
]
=
min
(
kwargs
.
get
(
'max_len'
,
int
(
1e12
)),
max_len
)
# Instantiate tokenizer.
# Instantiate tokenizer.
tokenizer
=
cls
(
resolved_vocab_file
,
resolved_merges_file
,
*
inputs
,
**
kwargs
)
tokenizer
=
cls
(
resolved_vocab_file
,
resolved_merges_file
,
*
inputs
,
**
kwargs
)
return
tokenizer
return
tokenizer
def
__init__
(
self
,
vocab_file
,
merges_file
):
def
__init__
(
self
,
vocab_file
,
merges_file
,
special_tokens
=
None
,
max_len
=
None
):
try
:
try
:
import
ftfy
import
ftfy
import
spacy
import
spacy
except
ImportError
:
except
ImportError
:
raise
ImportError
(
"Please install ftfy and spacy to use OpenAI GPT tokenizer."
)
raise
ImportError
(
"Please install ftfy and spacy to use OpenAI GPT tokenizer."
)
self
.
max_len
=
max_len
if
max_len
is
not
None
else
int
(
1e12
)
self
.
nlp
=
spacy
.
load
(
'en'
,
disable
=
[
'parser'
,
'tagger'
,
'ner'
,
'textcat'
])
self
.
nlp
=
spacy
.
load
(
'en'
,
disable
=
[
'parser'
,
'tagger'
,
'ner'
,
'textcat'
])
self
.
fix_text
=
ftfy
.
fix_text
self
.
encoder
=
json
.
load
(
open
(
vocab_file
))
self
.
encoder
=
json
.
load
(
open
(
vocab_file
))
self
.
decoder
=
{
v
:
k
for
k
,
v
in
self
.
encoder
.
items
()}
self
.
decoder
=
{
v
:
k
for
k
,
v
in
self
.
encoder
.
items
()}
merges
=
open
(
merges_file
,
encoding
=
'utf-8'
).
read
().
split
(
'
\n
'
)[
1
:
-
1
]
merges
=
open
(
merges_file
,
encoding
=
'utf-8'
).
read
().
split
(
'
\n
'
)[
1
:
-
1
]
merges
=
[
tuple
(
merge
.
split
())
for
merge
in
merges
]
merges
=
[
tuple
(
merge
.
split
())
for
merge
in
merges
]
self
.
bpe_ranks
=
dict
(
zip
(
merges
,
range
(
len
(
merges
))))
self
.
bpe_ranks
=
dict
(
zip
(
merges
,
range
(
len
(
merges
))))
self
.
cache
=
{}
self
.
cache
=
{}
if
not
special_tokens
:
self
.
special_tokens
=
{}
else
:
self
.
special_tokens
=
dict
((
tok
,
len
(
self
.
encoder
)
+
i
)
for
i
,
tok
in
enumerate
(
special_tokens
))
def
set_special_tokens
(
self
,
special_tokens
):
self
.
special_tokens
=
dict
((
tok
,
len
(
self
.
encoder
)
+
i
)
for
i
,
tok
in
enumerate
(
special_tokens
))
def
bpe
(
self
,
token
):
def
bpe
(
self
,
token
):
word
=
tuple
(
token
[:
-
1
])
+
(
token
[
-
1
]
+
'</w>'
,)
word
=
tuple
(
token
[:
-
1
])
+
(
token
[
-
1
]
+
'</w>'
,)
...
@@ -168,20 +176,38 @@ class OpenAIGPTTokenizer(object):
...
@@ -168,20 +176,38 @@ class OpenAIGPTTokenizer(object):
self
.
cache
[
token
]
=
word
self
.
cache
[
token
]
=
word
return
word
return
word
def
tokenize
(
self
,
texts
,
verbose
=
True
):
def
tokenize
(
self
,
text
):
texts_tokens
=
[]
split_tokens
=
[]
if
verbose
:
text
=
self
.
nlp
(
text_standardize
(
self
.
fix_text
(
text
)))
for
text
in
tqdm
(
texts
,
ncols
=
80
,
leave
=
False
):
for
token
in
text
:
text
=
self
.
nlp
(
text_standardize
(
ftfy
.
fix_text
(
text
)))
split_tokens
.
extend
([
t
for
t
in
self
.
bpe
(
token
.
text
.
lower
()).
split
(
' '
)])
text_tokens
=
[]
return
split_tokens
for
token
in
text
:
text_tokens
.
extend
([
self
.
encoder
.
get
(
t
,
0
)
for
t
in
self
.
bpe
(
token
.
text
.
lower
()).
split
(
' '
)])
def
convert_tokens_to_ids
(
self
,
tokens
):
texts_tokens
.
append
(
text_tokens
)
"""Converts a sequence of tokens into ids using the vocab."""
else
:
ids
=
[]
for
text
in
texts
:
for
token
in
tokens
:
text
=
self
.
nlp
(
text_standardize
(
ftfy
.
fix_text
(
text
)))
if
token
in
self
.
special_tokens
:
text_tokens
=
[]
ids
.
append
(
self
.
special_tokens
[
token
])
for
token
in
text
:
else
:
text_tokens
.
extend
([
self
.
encoder
.
get
(
t
,
0
)
for
t
in
self
.
bpe
(
token
.
text
.
lower
()).
split
(
' '
)])
ids
.
append
(
self
.
encoder
.
get
(
token
,
0
))
texts_tokens
.
append
(
text_tokens
)
if
len
(
ids
)
>
self
.
max_len
:
return
texts_tokens
raise
ValueError
(
"Token indices sequence length is longer than the specified maximum "
" sequence length for this BERT model ({} > {}). Running this"
" sequence through BERT will result in indexing errors"
.
format
(
len
(
ids
),
self
.
max_len
)
)
return
ids
def
convert_ids_to_tokens
(
self
,
ids
):
"""Converts a sequence of ids in BPE tokens using the vocab."""
tokens
=
[]
for
i
in
ids
:
tokens
.
append
(
self
.
decoder
[
i
])
return
tokens
def
decode
(
self
,
ids
):
"""Converts a sequence of ids in a string."""
tokens
=
self
.
convert_ids_to_tokens
(
ids
)
out_string
=
''
.
join
(
tokens
).
replace
(
'</w>'
,
' '
)
return
out_string
tests/modeling_openai_test.py
0 → 100644
View file @
3cf12b23
# coding=utf-8
# Copyright 2018 The Google AI Language Team Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
unittest
import
json
import
random
import
torch
from
pytorch_pretrained_bert
import
(
OpenAIGPTConfig
,
OpenAIGPTModel
,
OpenAIGPTDoubleHeadsModel
)
class
OpenAIGPTModelTest
(
unittest
.
TestCase
):
class
OpenAIGPTModelTester
(
object
):
def
__init__
(
self
,
parent
,
batch_size
=
13
,
seq_length
=
7
,
is_training
=
True
,
use_position_ids
=
True
,
use_token_type_ids
=
True
,
use_labels
=
True
,
vocab_size
=
99
,
n_special
=
1
,
n_ctx
=
33
,
n_embd
=
32
,
n_layer
=
5
,
n_head
=
4
,
n_choices
=
3
,
afn
=
"gelu"
,
resid_pdrop
=
0.1
,
attn_pdrop
=
0.1
,
embd_pdrop
=
0.1
,
type_sequence_label_size
=
2
,
initializer_range
=
0.02
,
num_labels
=
3
,
scope
=
None
):
self
.
parent
=
parent
self
.
batch_size
=
batch_size
self
.
seq_length
=
seq_length
self
.
is_training
=
is_training
self
.
use_position_ids
=
use_position_ids
self
.
use_token_type_ids
=
use_token_type_ids
self
.
use_labels
=
use_labels
self
.
vocab_size
=
vocab_size
self
.
n_special
=
n_special
self
.
n_ctx
=
n_ctx
self
.
n_embd
=
n_embd
self
.
n_layer
=
n_layer
self
.
n_head
=
n_head
self
.
afn
=
afn
self
.
n_choices
=
n_choices
self
.
resid_pdrop
=
resid_pdrop
self
.
attn_pdrop
=
attn_pdrop
self
.
embd_pdrop
=
embd_pdrop
self
.
type_sequence_label_size
=
type_sequence_label_size
self
.
initializer_range
=
initializer_range
self
.
num_labels
=
num_labels
self
.
scope
=
scope
def
prepare_config_and_inputs
(
self
):
input_ids
=
OpenAIGPTModelTest
.
ids_tensor
([
self
.
batch_size
,
self
.
n_choices
,
self
.
seq_length
],
self
.
vocab_size
)
position_ids
=
None
if
self
.
use_position_ids
:
position_ids
=
OpenAIGPTModelTest
.
ids_tensor
([
self
.
batch_size
,
self
.
n_choices
,
self
.
seq_length
],
self
.
n_ctx
)
position_ids
=
position_ids
+
self
.
n_special
+
self
.
vocab_size
token_type_ids
=
None
if
self
.
use_token_type_ids
:
total_voc
=
self
.
n_ctx
+
self
.
n_special
+
self
.
vocab_size
token_type_ids
=
OpenAIGPTModelTest
.
ids_tensor
([
self
.
batch_size
,
self
.
n_choices
,
self
.
seq_length
],
total_voc
)
multiple_choice_labels
=
None
lm_labels
=
None
classification_token_mask
=
None
if
self
.
use_labels
:
multiple_choice_labels
=
OpenAIGPTModelTest
.
ids_tensor
([
self
.
batch_size
],
self
.
type_sequence_label_size
)
lm_labels
=
OpenAIGPTModelTest
.
ids_tensor
([
self
.
batch_size
,
self
.
n_choices
,
self
.
seq_length
],
self
.
num_labels
)
classification_token_mask
=
OpenAIGPTModelTest
.
ids_tensor
([
self
.
batch_size
,
self
.
n_choices
,
self
.
seq_length
],
2
).
float
()
config
=
OpenAIGPTConfig
(
vocab_size_or_config_json_file
=
self
.
vocab_size
,
n_ctx
=
self
.
n_ctx
,
n_special
=
self
.
n_special
,
n_embd
=
self
.
n_embd
,
n_layer
=
self
.
n_layer
,
n_head
=
self
.
n_head
,
afn
=
self
.
afn
,
resid_pdrop
=
self
.
resid_pdrop
,
attn_pdrop
=
self
.
attn_pdrop
,
embd_pdrop
=
self
.
embd_pdrop
,
initializer_range
=
self
.
initializer_range
)
return
(
config
,
input_ids
,
token_type_ids
,
position_ids
,
multiple_choice_labels
,
lm_labels
,
classification_token_mask
)
def
create_openai_model
(
self
,
config
,
input_ids
,
token_type_ids
,
position_ids
,
multiple_choice_labels
,
lm_labels
,
classification_token_mask
):
model
=
OpenAIGPTModel
(
config
)
hidden_states
=
model
(
input_ids
,
position_ids
,
token_type_ids
)
outputs
=
{
"hidden_states"
:
hidden_states
,
}
return
outputs
def
check_openai_model_output
(
self
,
result
):
self
.
parent
.
assertListEqual
(
list
(
result
[
"hidden_states"
].
size
()),
[
self
.
batch_size
,
self
.
n_choices
,
self
.
seq_length
,
self
.
n_embd
])
def
create_openai_double_heads
(
self
,
config
,
input_ids
,
token_type_ids
,
position_ids
,
multiple_choice_labels
,
lm_labels
,
classification_token_mask
):
model
=
OpenAIGPTDoubleHeadsModel
(
config
)
loss
=
model
(
input_ids
,
classification_token_mask
,
position_ids
,
token_type_ids
,
lm_labels
,
multiple_choice_labels
)
lm_logits
,
multiple_choice_logits
=
model
(
input_ids
,
classification_token_mask
,
position_ids
,
token_type_ids
)
outputs
=
{
"loss"
:
loss
,
"lm_logits"
:
lm_logits
,
"multiple_choice_logits"
:
multiple_choice_logits
,
}
return
outputs
def
check_openai_double_heads_output
(
self
,
result
):
total_voc
=
self
.
n_ctx
+
self
.
n_special
+
self
.
vocab_size
self
.
parent
.
assertListEqual
(
list
(
result
[
"lm_logits"
].
size
()),
[
self
.
batch_size
,
self
.
n_choices
,
self
.
seq_length
,
total_voc
])
self
.
parent
.
assertListEqual
(
list
(
result
[
"multiple_choice_logits"
].
size
()),
[
self
.
batch_size
,
self
.
n_choices
])
def
check_openai_double_heads_loss_output
(
self
,
result
):
self
.
parent
.
assertListEqual
(
[
list
(
l
.
size
())
for
l
in
result
[
"loss"
]],
[[],
[]])
def
test_default
(
self
):
self
.
run_tester
(
OpenAIGPTModelTest
.
OpenAIGPTModelTester
(
self
))
def
test_config_to_json_string
(
self
):
config
=
OpenAIGPTConfig
(
vocab_size_or_config_json_file
=
99
,
n_embd
=
37
)
obj
=
json
.
loads
(
config
.
to_json_string
())
self
.
assertEqual
(
obj
[
"vocab_size"
],
99
)
self
.
assertEqual
(
obj
[
"n_embd"
],
37
)
def
run_tester
(
self
,
tester
):
config_and_inputs
=
tester
.
prepare_config_and_inputs
()
output_result
=
tester
.
create_openai_model
(
*
config_and_inputs
)
tester
.
check_openai_model_output
(
output_result
)
output_result
=
tester
.
create_openai_double_heads
(
*
config_and_inputs
)
tester
.
check_openai_double_heads_output
(
output_result
)
tester
.
check_openai_double_heads_loss_output
(
output_result
)
@
classmethod
def
ids_tensor
(
cls
,
shape
,
vocab_size
,
rng
=
None
,
name
=
None
):
"""Creates a random int32 tensor of the shape within the vocab size."""
if
rng
is
None
:
rng
=
random
.
Random
()
total_dims
=
1
for
dim
in
shape
:
total_dims
*=
dim
values
=
[]
for
_
in
range
(
total_dims
):
values
.
append
(
rng
.
randint
(
0
,
vocab_size
-
1
))
return
torch
.
tensor
(
data
=
values
,
dtype
=
torch
.
long
).
view
(
shape
).
contiguous
()
if
__name__
==
"__main__"
:
unittest
.
main
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment