Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
c19b8e4a
Commit
c19b8e4a
authored
Oct 09, 2019
by
thomwolf
Browse files
fixing CTRL tests and OpenAI GPT tests
parent
6dce6dda
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
31 additions
and
25 deletions
+31
-25
transformers/modeling_ctrl.py
transformers/modeling_ctrl.py
+24
-20
transformers/modeling_openai.py
transformers/modeling_openai.py
+1
-1
transformers/modeling_tf_ctrl.py
transformers/modeling_tf_ctrl.py
+2
-1
transformers/tests/modeling_tf_common_test.py
transformers/tests/modeling_tf_common_test.py
+4
-3
No files found.
transformers/modeling_ctrl.py
View file @
c19b8e4a
...
@@ -303,11 +303,6 @@ class CTRLModel(CTRLPreTrainedModel):
...
@@ -303,11 +303,6 @@ class CTRLModel(CTRLPreTrainedModel):
def
forward
(
self
,
input_ids
,
past
=
None
,
attention_mask
=
None
,
token_type_ids
=
None
,
position_ids
=
None
,
head_mask
=
None
):
def
forward
(
self
,
input_ids
,
past
=
None
,
attention_mask
=
None
,
token_type_ids
=
None
,
position_ids
=
None
,
head_mask
=
None
):
input_shape
=
input_ids
.
size
()
input_shape
=
input_ids
.
size
()
input_ids
=
input_ids
.
view
(
-
1
,
input_shape
[
-
1
])
input_ids
=
input_ids
.
view
(
-
1
,
input_shape
[
-
1
])
if
token_type_ids
is
not
None
:
token_type_ids
=
token_type_ids
.
view
(
-
1
,
input_shape
[
-
1
])
if
position_ids
is
not
None
:
position_ids
=
position_ids
.
view
(
-
1
,
input_shape
[
-
1
])
if
past
is
None
:
if
past
is
None
:
past_length
=
0
past_length
=
0
past
=
[
None
]
*
len
(
self
.
h
)
past
=
[
None
]
*
len
(
self
.
h
)
...
@@ -349,42 +344,51 @@ class CTRLModel(CTRLPreTrainedModel):
...
@@ -349,42 +344,51 @@ class CTRLModel(CTRLPreTrainedModel):
else
:
else
:
head_mask
=
[
None
]
*
self
.
config
.
n_layer
head_mask
=
[
None
]
*
self
.
config
.
n_layer
x
=
self
.
w
(
input_ids
)
if
token_type_ids
is
not
None
:
# x = embedded.unsqueeze(0) if len(input_ids.shape)<2 else embedded
token_type_ids
=
token_type_ids
.
view
(
-
1
,
input_shape
[
-
1
])
token_type_embeds
=
self
.
w
(
token_type_ids
)
token_type_embeds
*=
np
.
sqrt
(
self
.
d_model_size
)
else
:
token_type_embeds
=
0
position_ids
=
position_ids
.
view
(
-
1
,
input_shape
[
-
1
])
inputs_embeds
=
self
.
w
(
input_ids
)
# inputs_embeds = embedded.unsqueeze(0) if len(input_ids.shape)<2 else embedded
seq_len
=
input_ids
.
shape
[
-
1
]
seq_len
=
input_ids
.
shape
[
-
1
]
mask
=
torch
.
triu
(
torch
.
ones
(
seq_len
,
seq_len
),
1
).
to
(
x
.
device
)
mask
=
torch
.
triu
(
torch
.
ones
(
seq_len
,
seq_len
),
1
).
to
(
inputs_embeds
.
device
)
inputs_embeds
*=
np
.
sqrt
(
self
.
d_model_size
)
x
*=
np
.
sqrt
(
self
.
d_model_siz
e
)
pos_embeds
=
self
.
pos_encoding
[
position_ids
,
:].
to
(
inputs_embeds
.
devic
e
)
pos_x
=
self
.
pos_encoding
[
position_ids
,
:].
to
(
x
.
device
)
hidden_states
=
inputs_embeds
+
pos_embeds
+
token_type_embeds
x
+=
pos_x
x
=
self
.
dropout
(
x
)
hidden_states
=
self
.
dropout
(
hidden_states
)
output_shape
=
input_shape
+
(
x
.
size
(
-
1
),)
output_shape
=
input_shape
+
(
inputs_embeds
.
size
(
-
1
),)
presents
=
()
presents
=
()
all_hidden_states
=
()
all_hidden_states
=
()
all_attentions
=
[]
all_attentions
=
[]
for
i
,
(
h
,
layer_past
)
in
enumerate
(
zip
(
self
.
h
,
past
)):
for
i
,
(
h
,
layer_past
)
in
enumerate
(
zip
(
self
.
h
,
past
)):
if
self
.
output_hidden_states
:
if
self
.
output_hidden_states
:
all_hidden_states
=
all_hidden_states
+
(
x
.
view
(
*
output_shape
),)
all_hidden_states
=
all_hidden_states
+
(
hidden_states
.
view
(
*
output_shape
),)
outputs
=
h
(
x
,
outputs
=
h
(
hidden_states
,
mask
,
mask
,
layer_past
=
layer_past
,
layer_past
=
layer_past
,
attention_mask
=
attention_mask
,
attention_mask
=
attention_mask
,
head_mask
=
head_mask
[
i
])
head_mask
=
head_mask
[
i
])
x
,
present
=
outputs
[:
2
]
hidden_states
,
present
=
outputs
[:
2
]
presents
=
presents
+
(
present
,)
presents
=
presents
+
(
present
,)
if
self
.
output_attentions
:
if
self
.
output_attentions
:
all_attentions
.
append
(
outputs
[
2
])
all_attentions
.
append
(
outputs
[
2
])
x
=
self
.
layernorm
(
x
)
hidden_states
=
self
.
layernorm
(
hidden_states
)
x
=
x
.
view
(
*
output_shape
)
hidden_states
=
hidden_states
.
view
(
*
output_shape
)
if
self
.
output_hidden_states
:
if
self
.
output_hidden_states
:
all_hidden_states
=
all_hidden_states
+
(
x
,)
all_hidden_states
=
all_hidden_states
+
(
hidden_states
,)
outputs
=
(
x
,
presents
)
outputs
=
(
hidden_states
,
presents
)
if
self
.
output_hidden_states
:
if
self
.
output_hidden_states
:
outputs
=
outputs
+
(
all_hidden_states
,)
outputs
=
outputs
+
(
all_hidden_states
,)
if
self
.
output_attentions
:
if
self
.
output_attentions
:
...
...
transformers/modeling_openai.py
View file @
c19b8e4a
...
@@ -170,7 +170,7 @@ class Attention(nn.Module):
...
@@ -170,7 +170,7 @@ class Attention(nn.Module):
# w = w * self.bias + -1e9 * (1 - self.bias) # TF implem method: mask_attn_weights
# w = w * self.bias + -1e9 * (1 - self.bias) # TF implem method: mask_attn_weights
# XD: self.b may be larger than w, so we need to crop it
# XD: self.b may be larger than w, so we need to crop it
b
=
self
.
bias
[:,
:,
:
w
.
size
(
-
2
),
:
w
.
size
(
-
1
)]
b
=
self
.
bias
[:,
:,
:
w
.
size
(
-
2
),
:
w
.
size
(
-
1
)]
w
=
w
*
b
+
-
1e
9
*
(
1
-
b
)
w
=
w
*
b
+
-
1e
4
*
(
1
-
b
)
if
attention_mask
is
not
None
:
if
attention_mask
is
not
None
:
# Apply the attention mask
# Apply the attention mask
...
...
transformers/modeling_tf_ctrl.py
View file @
c19b8e4a
...
@@ -238,6 +238,7 @@ class TFCTRLMainLayer(tf.keras.layers.Layer):
...
@@ -238,6 +238,7 @@ class TFCTRLMainLayer(tf.keras.layers.Layer):
past_length
=
shape_list
(
past
[
0
][
0
])[
-
2
]
past_length
=
shape_list
(
past
[
0
][
0
])[
-
2
]
if
position_ids
is
None
:
if
position_ids
is
None
:
position_ids
=
tf
.
range
(
past_length
,
shape_list
(
input_ids
)[
-
1
]
+
past_length
,
dtype
=
tf
.
int32
)[
tf
.
newaxis
,
:]
position_ids
=
tf
.
range
(
past_length
,
shape_list
(
input_ids
)[
-
1
]
+
past_length
,
dtype
=
tf
.
int32
)[
tf
.
newaxis
,
:]
position_ids
=
tf
.
tile
(
position_ids
,
[
shape_list
(
input_ids
)[
0
],
1
])
# Attention mask.
# Attention mask.
if
attention_mask
is
not
None
:
if
attention_mask
is
not
None
:
...
@@ -276,7 +277,7 @@ class TFCTRLMainLayer(tf.keras.layers.Layer):
...
@@ -276,7 +277,7 @@ class TFCTRLMainLayer(tf.keras.layers.Layer):
token_type_embeds
=
0
token_type_embeds
=
0
position_ids
=
tf
.
reshape
(
position_ids
,
[
-
1
,
shape_list
(
position_ids
)[
-
1
]])
position_ids
=
tf
.
reshape
(
position_ids
,
[
-
1
,
shape_list
(
position_ids
)[
-
1
]])
inputs_embeds
=
self
.
w
(
input_ids
)
inputs_embeds
=
self
.
w
(
input_ids
,
mode
=
'embedding'
)
# x = embedded.unsqueeze(0) if len(input_ids.shape)<2 else embedded
# x = embedded.unsqueeze(0) if len(input_ids.shape)<2 else embedded
seq_len
=
input_shape
[
-
1
]
seq_len
=
input_shape
[
-
1
]
mask
=
1
-
tf
.
linalg
.
band_part
(
tf
.
ones
((
seq_len
,
seq_len
)),
-
1
,
0
)
mask
=
1
-
tf
.
linalg
.
band_part
(
tf
.
ones
((
seq_len
,
seq_len
)),
-
1
,
0
)
...
...
transformers/tests/modeling_tf_common_test.py
View file @
c19b8e4a
...
@@ -81,8 +81,9 @@ class TFCommonTestCases:
...
@@ -81,8 +81,9 @@ class TFCommonTestCases:
pt_model_class_name
=
model_class
.
__name__
[
2
:]
# Skip the "TF" at the beggining
pt_model_class_name
=
model_class
.
__name__
[
2
:]
# Skip the "TF" at the beggining
pt_model_class
=
getattr
(
transformers
,
pt_model_class_name
)
pt_model_class
=
getattr
(
transformers
,
pt_model_class_name
)
tf_model
=
model_class
(
config
,
output_hidden_states
=
True
)
config
.
output_hidden_states
=
True
pt_model
=
pt_model_class
(
config
,
output_hidden_states
=
True
)
tf_model
=
model_class
(
config
)
pt_model
=
pt_model_class
(
config
)
# Check we can load pt model in tf and vice-versa (architecture similar)
# Check we can load pt model in tf and vice-versa (architecture similar)
tf_model
=
transformers
.
load_pytorch_model_in_tf2_model
(
tf_model
,
pt_model
,
tf_inputs
=
inputs_dict
)
tf_model
=
transformers
.
load_pytorch_model_in_tf2_model
(
tf_model
,
pt_model
,
tf_inputs
=
inputs_dict
)
...
@@ -96,7 +97,7 @@ class TFCommonTestCases:
...
@@ -96,7 +97,7 @@ class TFCommonTestCases:
pto
=
pt_model
(
**
pt_inputs_dict
)
pto
=
pt_model
(
**
pt_inputs_dict
)
tfo
=
tf_model
(
inputs_dict
)
tfo
=
tf_model
(
inputs_dict
)
max_diff
=
np
.
amax
(
np
.
abs
(
tfo
[
0
].
numpy
()
-
pto
[
0
].
numpy
()))
max_diff
=
np
.
amax
(
np
.
abs
(
tfo
[
0
].
numpy
()
-
pto
[
0
].
numpy
()))
self
.
assertLessEqual
(
max_diff
,
2e-
2
)
self
.
assertLessEqual
(
max_diff
,
2e-
5
)
def
test_keyword_and_dict_args
(
self
):
def
test_keyword_and_dict_args
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment