Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
2d2ed2cc
Unverified
Commit
2d2ed2cc
authored
Mar 03, 2021
by
Patrick von Platen
Committed by
GitHub
Mar 03, 2021
Browse files
[T5] Fix speed degradation bug t5 (#10496)
* fix speed degradation bug t5 * fix for all models * fix code quality
parent
5dc303e2
Changes
9
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
30 additions
and
11 deletions
+30
-11
src/transformers/models/bart/modeling_bart.py
src/transformers/models/bart/modeling_bart.py
+3
-1
src/transformers/models/blenderbot/modeling_blenderbot.py
src/transformers/models/blenderbot/modeling_blenderbot.py
+3
-1
src/transformers/models/blenderbot_small/modeling_blenderbot_small.py
...mers/models/blenderbot_small/modeling_blenderbot_small.py
+3
-1
src/transformers/models/led/modeling_led.py
src/transformers/models/led/modeling_led.py
+3
-1
src/transformers/models/marian/modeling_marian.py
src/transformers/models/marian/modeling_marian.py
+3
-1
src/transformers/models/mbart/modeling_mbart.py
src/transformers/models/mbart/modeling_mbart.py
+3
-1
src/transformers/models/pegasus/modeling_pegasus.py
src/transformers/models/pegasus/modeling_pegasus.py
+3
-1
src/transformers/models/t5/modeling_t5.py
src/transformers/models/t5/modeling_t5.py
+8
-3
templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_{{cookiecutter.lowercase_modelname}}.py
...elname}}/modeling_{{cookiecutter.lowercase_modelname}}.py
+1
-1
No files found.
src/transformers/models/bart/modeling_bart.py
View file @
2d2ed2cc
...
@@ -319,7 +319,9 @@ class BartEncoderLayer(nn.Module):
...
@@ -319,7 +319,9 @@ class BartEncoderLayer(nn.Module):
hidden_states
=
residual
+
hidden_states
hidden_states
=
residual
+
hidden_states
hidden_states
=
self
.
final_layer_norm
(
hidden_states
)
hidden_states
=
self
.
final_layer_norm
(
hidden_states
)
if
torch
.
isinf
(
hidden_states
).
any
()
or
torch
.
isnan
(
hidden_states
).
any
():
if
hidden_states
.
dtype
==
torch
.
float16
and
(
torch
.
isinf
(
hidden_states
).
any
()
or
torch
.
isnan
(
hidden_states
).
any
()
):
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
...
...
src/transformers/models/blenderbot/modeling_blenderbot.py
View file @
2d2ed2cc
...
@@ -322,7 +322,9 @@ class BlenderbotEncoderLayer(nn.Module):
...
@@ -322,7 +322,9 @@ class BlenderbotEncoderLayer(nn.Module):
hidden_states
=
F
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
hidden_states
=
F
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
hidden_states
=
residual
+
hidden_states
hidden_states
=
residual
+
hidden_states
if
torch
.
isinf
(
hidden_states
).
any
()
or
torch
.
isnan
(
hidden_states
).
any
():
if
hidden_states
.
dtype
==
torch
.
float16
and
(
torch
.
isinf
(
hidden_states
).
any
()
or
torch
.
isnan
(
hidden_states
).
any
()
):
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
...
...
src/transformers/models/blenderbot_small/modeling_blenderbot_small.py
View file @
2d2ed2cc
...
@@ -320,7 +320,9 @@ class BlenderbotSmallEncoderLayer(nn.Module):
...
@@ -320,7 +320,9 @@ class BlenderbotSmallEncoderLayer(nn.Module):
hidden_states
=
residual
+
hidden_states
hidden_states
=
residual
+
hidden_states
hidden_states
=
self
.
final_layer_norm
(
hidden_states
)
hidden_states
=
self
.
final_layer_norm
(
hidden_states
)
if
torch
.
isinf
(
hidden_states
).
any
()
or
torch
.
isnan
(
hidden_states
).
any
():
if
hidden_states
.
dtype
==
torch
.
float16
and
(
torch
.
isinf
(
hidden_states
).
any
()
or
torch
.
isnan
(
hidden_states
).
any
()
):
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
...
...
src/transformers/models/led/modeling_led.py
View file @
2d2ed2cc
...
@@ -925,7 +925,9 @@ class LEDEncoderLayer(nn.Module):
...
@@ -925,7 +925,9 @@ class LEDEncoderLayer(nn.Module):
hidden_states
=
residual
+
hidden_states
hidden_states
=
residual
+
hidden_states
hidden_states
=
self
.
final_layer_norm
(
hidden_states
)
hidden_states
=
self
.
final_layer_norm
(
hidden_states
)
if
torch
.
isinf
(
hidden_states
).
any
()
or
torch
.
isnan
(
hidden_states
).
any
():
if
hidden_states
.
dtype
==
torch
.
float16
and
(
torch
.
isinf
(
hidden_states
).
any
()
or
torch
.
isnan
(
hidden_states
).
any
()
):
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
return
(
hidden_states
,)
+
attn_outputs
[
1
:]
return
(
hidden_states
,)
+
attn_outputs
[
1
:]
...
...
src/transformers/models/marian/modeling_marian.py
View file @
2d2ed2cc
...
@@ -337,7 +337,9 @@ class MarianEncoderLayer(nn.Module):
...
@@ -337,7 +337,9 @@ class MarianEncoderLayer(nn.Module):
hidden_states
=
residual
+
hidden_states
hidden_states
=
residual
+
hidden_states
hidden_states
=
self
.
final_layer_norm
(
hidden_states
)
hidden_states
=
self
.
final_layer_norm
(
hidden_states
)
if
torch
.
isinf
(
hidden_states
).
any
()
or
torch
.
isnan
(
hidden_states
).
any
():
if
hidden_states
.
dtype
==
torch
.
float16
and
(
torch
.
isinf
(
hidden_states
).
any
()
or
torch
.
isnan
(
hidden_states
).
any
()
):
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
...
...
src/transformers/models/mbart/modeling_mbart.py
View file @
2d2ed2cc
...
@@ -326,7 +326,9 @@ class MBartEncoderLayer(nn.Module):
...
@@ -326,7 +326,9 @@ class MBartEncoderLayer(nn.Module):
hidden_states
=
F
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
hidden_states
=
F
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
hidden_states
=
residual
+
hidden_states
hidden_states
=
residual
+
hidden_states
if
torch
.
isinf
(
hidden_states
).
any
()
or
torch
.
isnan
(
hidden_states
).
any
():
if
hidden_states
.
dtype
==
torch
.
float16
and
(
torch
.
isinf
(
hidden_states
).
any
()
or
torch
.
isnan
(
hidden_states
).
any
()
):
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
...
...
src/transformers/models/pegasus/modeling_pegasus.py
View file @
2d2ed2cc
...
@@ -337,7 +337,9 @@ class PegasusEncoderLayer(nn.Module):
...
@@ -337,7 +337,9 @@ class PegasusEncoderLayer(nn.Module):
hidden_states
=
F
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
hidden_states
=
F
.
dropout
(
hidden_states
,
p
=
self
.
dropout
,
training
=
self
.
training
)
hidden_states
=
residual
+
hidden_states
hidden_states
=
residual
+
hidden_states
if
torch
.
isinf
(
hidden_states
).
any
()
or
torch
.
isnan
(
hidden_states
).
any
():
if
hidden_states
.
dtype
==
torch
.
float16
and
(
torch
.
isinf
(
hidden_states
).
any
()
or
torch
.
isnan
(
hidden_states
).
any
()
):
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
...
...
src/transformers/models/t5/modeling_t5.py
View file @
2d2ed2cc
...
@@ -643,7 +643,7 @@ class T5Block(nn.Module):
...
@@ -643,7 +643,7 @@ class T5Block(nn.Module):
attention_outputs
=
self_attention_outputs
[
2
:]
# Keep self-attention outputs and relative position weights
attention_outputs
=
self_attention_outputs
[
2
:]
# Keep self-attention outputs and relative position weights
# clamp inf values to enable fp16 training
# clamp inf values to enable fp16 training
if
torch
.
isinf
(
hidden_states
).
any
():
if
hidden_states
.
dtype
==
torch
.
float16
and
torch
.
isinf
(
hidden_states
).
any
():
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
...
@@ -668,7 +668,9 @@ class T5Block(nn.Module):
...
@@ -668,7 +668,9 @@ class T5Block(nn.Module):
output_attentions
=
output_attentions
,
output_attentions
=
output_attentions
,
)
)
hidden_states
=
cross_attention_outputs
[
0
]
hidden_states
=
cross_attention_outputs
[
0
]
if
torch
.
isinf
(
hidden_states
).
any
():
# clamp inf values to enable fp16 training
if
hidden_states
.
dtype
==
torch
.
float16
and
torch
.
isinf
(
hidden_states
).
any
():
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
...
@@ -681,9 +683,12 @@ class T5Block(nn.Module):
...
@@ -681,9 +683,12 @@ class T5Block(nn.Module):
# Apply Feed Forward layer
# Apply Feed Forward layer
hidden_states
=
self
.
layer
[
-
1
](
hidden_states
)
hidden_states
=
self
.
layer
[
-
1
](
hidden_states
)
if
torch
.
isinf
(
hidden_states
).
any
():
# clamp inf values to enable fp16 training
if
hidden_states
.
dtype
==
torch
.
float16
and
torch
.
isinf
(
hidden_states
).
any
():
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
outputs
=
(
hidden_states
,)
outputs
=
(
hidden_states
,)
outputs
=
outputs
+
(
present_key_value_state
,)
+
attention_outputs
outputs
=
outputs
+
(
present_key_value_state
,)
+
attention_outputs
...
...
templates/adding_a_new_model/cookiecutter-template-{{cookiecutter.modelname}}/modeling_{{cookiecutter.lowercase_modelname}}.py
View file @
2d2ed2cc
...
@@ -1824,7 +1824,7 @@ class {{cookiecutter.camelcase_modelname}}EncoderLayer(nn.Module):
...
@@ -1824,7 +1824,7 @@ class {{cookiecutter.camelcase_modelname}}EncoderLayer(nn.Module):
hidden_states
=
residual
+
hidden_states
hidden_states
=
residual
+
hidden_states
hidden_states
=
self
.
final_layer_norm
(
hidden_states
)
hidden_states
=
self
.
final_layer_norm
(
hidden_states
)
if
torch
.
isinf
(
hidden_states
).
any
()
or
torch
.
isnan
(
hidden_states
).
any
():
if
hidden_states
.
dtype
==
torch
.
float16
and
(
torch
.
isinf
(
hidden_states
).
any
()
or
torch
.
isnan
(
hidden_states
).
any
()
)
:
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
clamp_value
=
torch
.
finfo
(
hidden_states
.
dtype
).
max
-
1000
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
hidden_states
=
torch
.
clamp
(
hidden_states
,
min
=-
clamp_value
,
max
=
clamp_value
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment