Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
372ab9cd
Unverified
Commit
372ab9cd
authored
Jun 14, 2021
by
Stas Bekman
Committed by
GitHub
Jun 14, 2021
Browse files
[style] consistent nn. and nn.functional: part 3 `tests` (#12155)
* consistent nn. and nn.functional: p3 templates * restore
parent
d9c0d08f
Changes
14
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
93 additions
and
81 deletions
+93
-81
tests/test_generation_logits_process.py
tests/test_generation_logits_process.py
+4
-4
tests/test_modeling_clip.py
tests/test_modeling_clip.py
+3
-2
tests/test_modeling_common.py
tests/test_modeling_common.py
+5
-4
tests/test_modeling_deit.py
tests/test_modeling_deit.py
+3
-2
tests/test_modeling_fsmt.py
tests/test_modeling_fsmt.py
+4
-3
tests/test_modeling_ibert.py
tests/test_modeling_ibert.py
+17
-17
tests/test_modeling_reformer.py
tests/test_modeling_reformer.py
+2
-1
tests/test_modeling_transfo_xl.py
tests/test_modeling_transfo_xl.py
+3
-2
tests/test_modeling_vit.py
tests/test_modeling_vit.py
+3
-2
tests/test_optimization.py
tests/test_optimization.py
+4
-3
tests/test_pipelines_conversational.py
tests/test_pipelines_conversational.py
+3
-2
tests/test_pipelines_summarization.py
tests/test_pipelines_summarization.py
+2
-1
tests/test_trainer.py
tests/test_trainer.py
+25
-24
tests/test_trainer_utils.py
tests/test_trainer_utils.py
+15
-14
No files found.
tests/test_generation_logits_process.py
View file @
372ab9cd
...
...
@@ -24,7 +24,7 @@ from .test_modeling_common import ids_tensor
if
is_torch_available
():
import
torch
import
torch.nn.functional
as
F
from
torch
import
nn
from
transformers.generation_logits_process
import
(
EncoderNoRepeatNGramLogitsProcessor
,
...
...
@@ -80,13 +80,13 @@ class LogitsProcessorTest(unittest.TestCase):
scores
[
1
,
10
]
=
(
1
/
length
)
-
0.4
# valley, 1st batch
# compute softmax
probs
=
F
.
softmax
(
scores
,
dim
=-
1
)
probs
=
nn
.
functional
.
softmax
(
scores
,
dim
=-
1
)
temp_dist_warper_sharper
=
TemperatureLogitsWarper
(
temperature
=
0.5
)
temp_dist_warper_smoother
=
TemperatureLogitsWarper
(
temperature
=
1.3
)
warped_prob_sharp
=
F
.
softmax
(
temp_dist_warper_sharper
(
input_ids
,
scores
.
clone
()),
dim
=-
1
)
warped_prob_smooth
=
F
.
softmax
(
temp_dist_warper_smoother
(
input_ids
,
scores
.
clone
()),
dim
=-
1
)
warped_prob_sharp
=
nn
.
functional
.
softmax
(
temp_dist_warper_sharper
(
input_ids
,
scores
.
clone
()),
dim
=-
1
)
warped_prob_smooth
=
nn
.
functional
.
softmax
(
temp_dist_warper_smoother
(
input_ids
,
scores
.
clone
()),
dim
=-
1
)
# uniform distribution stays uniform
self
.
assertTrue
(
torch
.
allclose
(
probs
[
0
,
:],
warped_prob_sharp
[
0
,
:],
atol
=
1e-3
))
...
...
tests/test_modeling_clip.py
View file @
372ab9cd
...
...
@@ -30,6 +30,7 @@ from .test_modeling_common import ModelTesterMixin, _config_zero_init, floats_te
if
is_torch_available
():
import
torch
from
torch
import
nn
from
transformers
import
CLIPConfig
,
CLIPModel
,
CLIPTextConfig
,
CLIPTextModel
,
CLIPVisionConfig
,
CLIPVisionModel
from
transformers.models.clip.modeling_clip
import
CLIP_PRETRAINED_MODEL_ARCHIVE_LIST
...
...
@@ -140,9 +141,9 @@ class CLIPVisionModelTest(ModelTesterMixin, unittest.TestCase):
for
model_class
in
self
.
all_model_classes
:
model
=
model_class
(
config
)
self
.
assertIsInstance
(
model
.
get_input_embeddings
(),
(
torch
.
nn
.
Module
))
self
.
assertIsInstance
(
model
.
get_input_embeddings
(),
(
nn
.
Module
))
x
=
model
.
get_output_embeddings
()
self
.
assertTrue
(
x
is
None
or
isinstance
(
x
,
torch
.
nn
.
Linear
))
self
.
assertTrue
(
x
is
None
or
isinstance
(
x
,
nn
.
Linear
))
def
test_forward_signature
(
self
):
config
,
_
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
...
...
tests/test_modeling_common.py
View file @
372ab9cd
...
...
@@ -44,6 +44,7 @@ from transformers.testing_utils import (
if
is_torch_available
():
import
numpy
as
np
import
torch
from
torch
import
nn
from
transformers
import
(
BERT_PRETRAINED_MODEL_ARCHIVE_LIST
,
...
...
@@ -1150,10 +1151,10 @@ class ModelTesterMixin:
for
model_class
in
self
.
all_model_classes
:
model
=
model_class
(
config
)
self
.
assertIsInstance
(
model
.
get_input_embeddings
(),
(
torch
.
nn
.
Embedding
,
AdaptiveEmbedding
))
model
.
set_input_embeddings
(
torch
.
nn
.
Embedding
(
10
,
10
))
self
.
assertIsInstance
(
model
.
get_input_embeddings
(),
(
nn
.
Embedding
,
AdaptiveEmbedding
))
model
.
set_input_embeddings
(
nn
.
Embedding
(
10
,
10
))
x
=
model
.
get_output_embeddings
()
self
.
assertTrue
(
x
is
None
or
isinstance
(
x
,
torch
.
nn
.
Linear
))
self
.
assertTrue
(
x
is
None
or
isinstance
(
x
,
nn
.
Linear
))
def
test_correct_missing_keys
(
self
):
if
not
self
.
test_missing_keys
:
...
...
@@ -1337,7 +1338,7 @@ class ModelTesterMixin:
model
.
eval
()
# Wrap model in nn.DataParallel
model
=
torch
.
nn
.
DataParallel
(
model
)
model
=
nn
.
DataParallel
(
model
)
with
torch
.
no_grad
():
_
=
model
(
**
self
.
_prepare_for_class
(
inputs_dict
,
model_class
))
...
...
tests/test_modeling_deit.py
View file @
372ab9cd
...
...
@@ -27,6 +27,7 @@ from .test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor
if
is_torch_available
():
import
torch
from
torch
import
nn
from
transformers
import
(
MODEL_MAPPING
,
...
...
@@ -176,9 +177,9 @@ class DeiTModelTest(ModelTesterMixin, unittest.TestCase):
for
model_class
in
self
.
all_model_classes
:
model
=
model_class
(
config
)
self
.
assertIsInstance
(
model
.
get_input_embeddings
(),
(
torch
.
nn
.
Module
))
self
.
assertIsInstance
(
model
.
get_input_embeddings
(),
(
nn
.
Module
))
x
=
model
.
get_output_embeddings
()
self
.
assertTrue
(
x
is
None
or
isinstance
(
x
,
torch
.
nn
.
Linear
))
self
.
assertTrue
(
x
is
None
or
isinstance
(
x
,
nn
.
Linear
))
def
test_forward_signature
(
self
):
config
,
_
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
...
...
tests/test_modeling_fsmt.py
View file @
372ab9cd
...
...
@@ -30,6 +30,7 @@ from .test_modeling_common import ModelTesterMixin, ids_tensor
if
is_torch_available
():
import
torch
from
torch
import
nn
from
transformers
import
FSMTConfig
,
FSMTForConditionalGeneration
,
FSMTModel
,
FSMTTokenizer
from
transformers.models.fsmt.modeling_fsmt
import
(
...
...
@@ -160,10 +161,10 @@ class FSMTModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase):
for
model_class
in
self
.
all_model_classes
:
model
=
model_class
(
config
)
self
.
assertIsInstance
(
model
.
get_input_embeddings
(),
(
torch
.
nn
.
Embedding
))
model
.
set_input_embeddings
(
torch
.
nn
.
Embedding
(
10
,
10
))
self
.
assertIsInstance
(
model
.
get_input_embeddings
(),
(
nn
.
Embedding
))
model
.
set_input_embeddings
(
nn
.
Embedding
(
10
,
10
))
x
=
model
.
get_output_embeddings
()
self
.
assertTrue
(
x
is
None
or
isinstance
(
x
,
torch
.
nn
.
modules
.
sparse
.
Embedding
))
self
.
assertTrue
(
x
is
None
or
isinstance
(
x
,
nn
.
modules
.
sparse
.
Embedding
))
def
test_initialization_more
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs
()
...
...
tests/test_modeling_ibert.py
View file @
372ab9cd
...
...
@@ -26,7 +26,7 @@ from .test_modeling_common import ModelTesterMixin, ids_tensor, random_attention
if
is_torch_available
():
import
torch
import
torch.nn
as
nn
from
torch
import
nn
from
transformers
import
(
IBERT_PRETRAINED_MODEL_ARCHIVE_LIST
,
...
...
@@ -304,9 +304,9 @@ class IBertModelTest(ModelTesterMixin, unittest.TestCase):
for
model_class
in
self
.
all_model_classes
:
model
=
model_class
(
config
)
self
.
assertIsInstance
(
model
.
get_input_embeddings
(),
QuantEmbedding
)
model
.
set_input_embeddings
(
torch
.
nn
.
Embedding
(
10
,
10
))
model
.
set_input_embeddings
(
nn
.
Embedding
(
10
,
10
))
x
=
model
.
get_output_embeddings
()
self
.
assertTrue
(
x
is
None
or
isinstance
(
x
,
torch
.
nn
.
Linear
))
self
.
assertTrue
(
x
is
None
or
isinstance
(
x
,
nn
.
Linear
))
# Override
def
test_feed_forward_chunking
(
self
):
...
...
@@ -350,7 +350,7 @@ class IBertModelIntegrationTest(unittest.TestCase):
weight_bit
=
8
embedding
=
QuantEmbedding
(
2
,
4
,
quant_mode
=
True
,
weight_bit
=
weight_bit
)
embedding_weight
=
torch
.
tensor
([[
-
1.0
,
-
2.0
,
-
3.0
,
-
4.0
],
[
5.0
,
6.0
,
7.0
,
8.0
]])
embedding
.
weight
=
torch
.
nn
.
Parameter
(
embedding_weight
)
embedding
.
weight
=
nn
.
Parameter
(
embedding_weight
)
expected_scaling_factor
=
embedding_weight
.
abs
().
max
()
/
(
2
**
(
weight_bit
-
1
)
-
1
)
x
,
x_scaling_factor
=
embedding
(
torch
.
tensor
(
0
))
...
...
@@ -447,8 +447,8 @@ class IBertModelIntegrationTest(unittest.TestCase):
linear_q
=
QuantLinear
(
2
,
4
,
quant_mode
=
True
,
per_channel
=
per_channel
,
weight_bit
=
weight_bit
)
linear_dq
=
QuantLinear
(
2
,
4
,
quant_mode
=
False
,
per_channel
=
per_channel
,
weight_bit
=
weight_bit
)
linear_weight
=
torch
.
tensor
([[
-
1.0
,
2.0
,
3.0
,
-
4.0
],
[
5.0
,
-
6.0
,
-
7.0
,
8.0
]]).
T
linear_q
.
weight
=
torch
.
nn
.
Parameter
(
linear_weight
)
linear_dq
.
weight
=
torch
.
nn
.
Parameter
(
linear_weight
)
linear_q
.
weight
=
nn
.
Parameter
(
linear_weight
)
linear_dq
.
weight
=
nn
.
Parameter
(
linear_weight
)
q
,
q_scaling_factor
=
linear_q
(
x
,
x_scaling_factor
)
q_int
=
q
/
q_scaling_factor
...
...
@@ -477,7 +477,7 @@ class IBertModelIntegrationTest(unittest.TestCase):
def
test_int_gelu
(
self
):
gelu_q
=
IntGELU
(
quant_mode
=
True
)
gelu_dq
=
torch
.
nn
.
GELU
()
gelu_dq
=
nn
.
GELU
()
x_int
=
torch
.
range
(
-
10000
,
10000
,
1
)
x_scaling_factor
=
torch
.
tensor
(
0.001
)
...
...
@@ -523,7 +523,7 @@ class IBertModelIntegrationTest(unittest.TestCase):
def
test_int_softmax
(
self
):
output_bit
=
8
softmax_q
=
IntSoftmax
(
output_bit
,
quant_mode
=
True
)
softmax_dq
=
torch
.
nn
.
Softmax
()
softmax_dq
=
nn
.
Softmax
()
# x_int = torch.range(-10000, 10000, 1)
def
_test
(
array
):
...
...
@@ -590,12 +590,12 @@ class IBertModelIntegrationTest(unittest.TestCase):
x
=
x_int
*
x_scaling_factor
ln_q
=
IntLayerNorm
(
x
.
shape
[
1
:],
1e-5
,
quant_mode
=
True
,
output_bit
=
output_bit
)
ln_dq
=
torch
.
nn
.
LayerNorm
(
x
.
shape
[
1
:],
1e-5
)
ln_dq
=
nn
.
LayerNorm
(
x
.
shape
[
1
:],
1e-5
)
ln_q
.
weight
=
torch
.
nn
.
Parameter
(
torch
.
ones
(
x
.
shape
[
1
:]))
ln_q
.
bias
=
torch
.
nn
.
Parameter
(
torch
.
ones
(
x
.
shape
[
1
:]))
ln_dq
.
weight
=
torch
.
nn
.
Parameter
(
torch
.
ones
(
x
.
shape
[
1
:]))
ln_dq
.
bias
=
torch
.
nn
.
Parameter
(
torch
.
ones
(
x
.
shape
[
1
:]))
ln_q
.
weight
=
nn
.
Parameter
(
torch
.
ones
(
x
.
shape
[
1
:]))
ln_q
.
bias
=
nn
.
Parameter
(
torch
.
ones
(
x
.
shape
[
1
:]))
ln_dq
.
weight
=
nn
.
Parameter
(
torch
.
ones
(
x
.
shape
[
1
:]))
ln_dq
.
bias
=
nn
.
Parameter
(
torch
.
ones
(
x
.
shape
[
1
:]))
q
,
q_scaling_factor
=
ln_q
(
x
,
x_scaling_factor
)
q_int
=
q
/
q_scaling_factor
...
...
@@ -627,13 +627,13 @@ class IBertModelIntegrationTest(unittest.TestCase):
],
}
ln_dq
.
weight
=
torch
.
nn
.
Parameter
(
torch
.
ones
(
x
.
shape
[
1
:]))
ln_dq
.
bias
=
torch
.
nn
.
Parameter
(
torch
.
ones
(
x
.
shape
[
1
:]))
ln_dq
.
weight
=
nn
.
Parameter
(
torch
.
ones
(
x
.
shape
[
1
:]))
ln_dq
.
bias
=
nn
.
Parameter
(
torch
.
ones
(
x
.
shape
[
1
:]))
dq
,
dq_scaling_factor
=
ln_dq
(
x
,
x_scaling_factor
)
for
label
,
ln_fdqs
in
ln_fdqs_dict
.
items
():
for
ln_fdq
in
ln_fdqs
:
ln_fdq
.
weight
=
torch
.
nn
.
Parameter
(
torch
.
ones
(
x
.
shape
[
1
:]))
ln_fdq
.
bias
=
torch
.
nn
.
Parameter
(
torch
.
ones
(
x
.
shape
[
1
:]))
ln_fdq
.
weight
=
nn
.
Parameter
(
torch
.
ones
(
x
.
shape
[
1
:]))
ln_fdq
.
bias
=
nn
.
Parameter
(
torch
.
ones
(
x
.
shape
[
1
:]))
q
,
q_scaling_factor
=
ln_fdq
(
x
,
x_scaling_factor
)
if
label
:
self
.
assertTrue
(
torch
.
allclose
(
q
,
dq
,
atol
=
1e-4
))
...
...
tests/test_modeling_reformer.py
View file @
372ab9cd
...
...
@@ -32,6 +32,7 @@ from .test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor, r
if
is_torch_available
():
import
torch
from
torch
import
nn
from
transformers
import
(
REFORMER_PRETRAINED_MODEL_ARCHIVE_LIST
,
...
...
@@ -241,7 +242,7 @@ class ReformerModelTester:
# set all position encodings to zero so that postions don't matter
with
torch
.
no_grad
():
embedding
=
model
.
embeddings
.
position_embeddings
.
embedding
embedding
.
weight
=
torch
.
nn
.
Parameter
(
torch
.
zeros
(
embedding
.
weight
.
shape
).
to
(
torch_device
))
embedding
.
weight
=
nn
.
Parameter
(
torch
.
zeros
(
embedding
.
weight
.
shape
).
to
(
torch_device
))
embedding
.
weight
.
requires_grad
=
False
half_seq_len
=
self
.
seq_length
//
2
...
...
tests/test_modeling_transfo_xl.py
View file @
372ab9cd
...
...
@@ -27,6 +27,7 @@ from .test_modeling_common import ModelTesterMixin, ids_tensor
if
is_torch_available
():
import
torch
from
torch
import
nn
from
transformers
import
TransfoXLConfig
,
TransfoXLForSequenceClassification
,
TransfoXLLMHeadModel
,
TransfoXLModel
from
transformers.models.transfo_xl.modeling_transfo_xl
import
TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST
...
...
@@ -362,11 +363,11 @@ class TransfoXLModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestC
if
hasattr
(
module
,
"emb_projs"
):
for
i
in
range
(
len
(
module
.
emb_projs
)):
if
module
.
emb_projs
[
i
]
is
not
None
:
torch
.
nn
.
init
.
constant_
(
module
.
emb_projs
[
i
],
0.0003
)
nn
.
init
.
constant_
(
module
.
emb_projs
[
i
],
0.0003
)
if
hasattr
(
module
,
"out_projs"
):
for
i
in
range
(
len
(
module
.
out_projs
)):
if
module
.
out_projs
[
i
]
is
not
None
:
torch
.
nn
.
init
.
constant_
(
module
.
out_projs
[
i
],
0.0003
)
nn
.
init
.
constant_
(
module
.
out_projs
[
i
],
0.0003
)
for
param
in
[
"r_emb"
,
"r_w_bias"
,
"r_r_bias"
,
"r_bias"
]:
if
hasattr
(
module
,
param
)
and
getattr
(
module
,
param
)
is
not
None
:
...
...
tests/test_modeling_vit.py
View file @
372ab9cd
...
...
@@ -27,6 +27,7 @@ from .test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor
if
is_torch_available
():
import
torch
from
torch
import
nn
from
transformers
import
ViTConfig
,
ViTForImageClassification
,
ViTModel
from
transformers.models.vit.modeling_vit
import
VIT_PRETRAINED_MODEL_ARCHIVE_LIST
,
to_2tuple
...
...
@@ -169,9 +170,9 @@ class ViTModelTest(ModelTesterMixin, unittest.TestCase):
for
model_class
in
self
.
all_model_classes
:
model
=
model_class
(
config
)
self
.
assertIsInstance
(
model
.
get_input_embeddings
(),
(
torch
.
nn
.
Module
))
self
.
assertIsInstance
(
model
.
get_input_embeddings
(),
(
nn
.
Module
))
x
=
model
.
get_output_embeddings
()
self
.
assertTrue
(
x
is
None
or
isinstance
(
x
,
torch
.
nn
.
Linear
))
self
.
assertTrue
(
x
is
None
or
isinstance
(
x
,
nn
.
Linear
))
def
test_forward_signature
(
self
):
config
,
_
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
...
...
tests/test_optimization.py
View file @
372ab9cd
...
...
@@ -24,6 +24,7 @@ from transformers.testing_utils import require_torch
if
is_torch_available
():
import
torch
from
torch
import
nn
from
transformers
import
(
Adafactor
,
...
...
@@ -70,7 +71,7 @@ class OptimizationTest(unittest.TestCase):
def
test_adam_w
(
self
):
w
=
torch
.
tensor
([
0.1
,
-
0.2
,
-
0.1
],
requires_grad
=
True
)
target
=
torch
.
tensor
([
0.4
,
0.2
,
-
0.5
])
criterion
=
torch
.
nn
.
MSELoss
()
criterion
=
nn
.
MSELoss
()
# No warmup, constant schedule, no gradient clipping
optimizer
=
AdamW
(
params
=
[
w
],
lr
=
2e-1
,
weight_decay
=
0.0
)
for
_
in
range
(
100
):
...
...
@@ -84,7 +85,7 @@ class OptimizationTest(unittest.TestCase):
def
test_adafactor
(
self
):
w
=
torch
.
tensor
([
0.1
,
-
0.2
,
-
0.1
],
requires_grad
=
True
)
target
=
torch
.
tensor
([
0.4
,
0.2
,
-
0.5
])
criterion
=
torch
.
nn
.
MSELoss
()
criterion
=
nn
.
MSELoss
()
# No warmup, constant schedule, no gradient clipping
optimizer
=
Adafactor
(
params
=
[
w
],
...
...
@@ -109,7 +110,7 @@ class OptimizationTest(unittest.TestCase):
@
require_torch
class
ScheduleInitTest
(
unittest
.
TestCase
):
m
=
torch
.
nn
.
Linear
(
50
,
50
)
if
is_torch_available
()
else
None
m
=
nn
.
Linear
(
50
,
50
)
if
is_torch_available
()
else
None
optimizer
=
AdamW
(
m
.
parameters
(),
lr
=
10.0
)
if
is_torch_available
()
else
None
num_steps
=
10
...
...
tests/test_pipelines_conversational.py
View file @
372ab9cd
...
...
@@ -32,6 +32,7 @@ from .test_pipelines_common import MonoInputPipelineCommonMixin
if
is_torch_available
():
import
torch
from
torch
import
nn
from
transformers.models.gpt2
import
GPT2Config
,
GPT2LMHeadModel
...
...
@@ -59,8 +60,8 @@ class SimpleConversationPipelineTests(unittest.TestCase):
bias
[
76
]
=
1
weight
=
torch
.
zeros
((
V
,
D
),
requires_grad
=
True
)
model
.
lm_head
.
bias
=
torch
.
nn
.
Parameter
(
bias
)
model
.
lm_head
.
weight
=
torch
.
nn
.
Parameter
(
weight
)
model
.
lm_head
.
bias
=
nn
.
Parameter
(
bias
)
model
.
lm_head
.
weight
=
nn
.
Parameter
(
weight
)
# # Created with:
# import tempfile
...
...
tests/test_pipelines_summarization.py
View file @
372ab9cd
...
...
@@ -23,6 +23,7 @@ from .test_pipelines_common import MonoInputPipelineCommonMixin
if
is_torch_available
():
import
torch
from
torch
import
nn
from
transformers.models.bart
import
BartConfig
,
BartForConditionalGeneration
...
...
@@ -55,7 +56,7 @@ class SimpleSummarizationPipelineTests(unittest.TestCase):
bias
=
torch
.
zeros
(
V
)
bias
[
76
]
=
10
model
.
lm_head
.
bias
=
torch
.
nn
.
Parameter
(
bias
)
model
.
lm_head
.
bias
=
nn
.
Parameter
(
bias
)
# # Generated with:
# import tempfile
...
...
tests/test_trainer.py
View file @
372ab9cd
...
...
@@ -53,6 +53,7 @@ from transformers.utils.hp_naming import TrialShortNamer
if
is_torch_available
():
import
torch
from
torch
import
nn
from
torch.utils.data
import
IterableDataset
from
transformers
import
(
...
...
@@ -154,11 +155,11 @@ if is_torch_available():
for
i
in
range
(
len
(
self
.
dataset
)):
yield
self
.
dataset
[
i
]
class
RegressionModel
(
torch
.
nn
.
Module
):
class
RegressionModel
(
nn
.
Module
):
def
__init__
(
self
,
a
=
0
,
b
=
0
,
double_output
=
False
):
super
().
__init__
()
self
.
a
=
torch
.
nn
.
Parameter
(
torch
.
tensor
(
a
).
float
())
self
.
b
=
torch
.
nn
.
Parameter
(
torch
.
tensor
(
b
).
float
())
self
.
a
=
nn
.
Parameter
(
torch
.
tensor
(
a
).
float
())
self
.
b
=
nn
.
Parameter
(
torch
.
tensor
(
b
).
float
())
self
.
double_output
=
double_output
self
.
config
=
None
...
...
@@ -166,21 +167,21 @@ if is_torch_available():
y
=
input_x
*
self
.
a
+
self
.
b
if
labels
is
None
:
return
(
y
,
y
)
if
self
.
double_output
else
(
y
,)
loss
=
torch
.
nn
.
functional
.
mse_loss
(
y
,
labels
)
loss
=
nn
.
functional
.
mse_loss
(
y
,
labels
)
return
(
loss
,
y
,
y
)
if
self
.
double_output
else
(
loss
,
y
)
class
RegressionDictModel
(
torch
.
nn
.
Module
):
class
RegressionDictModel
(
nn
.
Module
):
def
__init__
(
self
,
a
=
0
,
b
=
0
):
super
().
__init__
()
self
.
a
=
torch
.
nn
.
Parameter
(
torch
.
tensor
(
a
).
float
())
self
.
b
=
torch
.
nn
.
Parameter
(
torch
.
tensor
(
b
).
float
())
self
.
a
=
nn
.
Parameter
(
torch
.
tensor
(
a
).
float
())
self
.
b
=
nn
.
Parameter
(
torch
.
tensor
(
b
).
float
())
self
.
config
=
None
def
forward
(
self
,
input_x
,
labels
=
None
,
**
kwargs
):
y
=
input_x
*
self
.
a
+
self
.
b
result
=
{
"output"
:
y
}
if
labels
is
not
None
:
result
[
"loss"
]
=
torch
.
nn
.
functional
.
mse_loss
(
y
,
labels
)
result
[
"loss"
]
=
nn
.
functional
.
mse_loss
(
y
,
labels
)
return
result
class
RegressionPreTrainedModel
(
PreTrainedModel
):
...
...
@@ -189,15 +190,15 @@ if is_torch_available():
def
__init__
(
self
,
config
):
super
().
__init__
(
config
)
self
.
a
=
torch
.
nn
.
Parameter
(
torch
.
tensor
(
config
.
a
).
float
())
self
.
b
=
torch
.
nn
.
Parameter
(
torch
.
tensor
(
config
.
b
).
float
())
self
.
a
=
nn
.
Parameter
(
torch
.
tensor
(
config
.
a
).
float
())
self
.
b
=
nn
.
Parameter
(
torch
.
tensor
(
config
.
b
).
float
())
self
.
double_output
=
config
.
double_output
def
forward
(
self
,
input_x
,
labels
=
None
,
**
kwargs
):
y
=
input_x
*
self
.
a
+
self
.
b
if
labels
is
None
:
return
(
y
,
y
)
if
self
.
double_output
else
(
y
,)
loss
=
torch
.
nn
.
functional
.
mse_loss
(
y
,
labels
)
loss
=
nn
.
functional
.
mse_loss
(
y
,
labels
)
return
(
loss
,
y
,
y
)
if
self
.
double_output
else
(
loss
,
y
)
class
RegressionRandomPreTrainedModel
(
PreTrainedModel
):
...
...
@@ -206,8 +207,8 @@ if is_torch_available():
def
__init__
(
self
,
config
):
super
().
__init__
(
config
)
self
.
a
=
torch
.
nn
.
Parameter
(
torch
.
tensor
(
config
.
a
).
float
())
self
.
b
=
torch
.
nn
.
Parameter
(
torch
.
tensor
(
config
.
b
).
float
())
self
.
a
=
nn
.
Parameter
(
torch
.
tensor
(
config
.
a
).
float
())
self
.
b
=
nn
.
Parameter
(
torch
.
tensor
(
config
.
b
).
float
())
def
forward
(
self
,
input_x
,
labels
=
None
,
**
kwargs
):
y
=
input_x
*
self
.
a
+
self
.
b
...
...
@@ -219,21 +220,21 @@ if is_torch_available():
if
labels
is
None
:
return
(
y
,)
loss
=
torch
.
nn
.
functional
.
mse_loss
(
y
,
labels
)
loss
=
nn
.
functional
.
mse_loss
(
y
,
labels
)
return
(
loss
,
y
)
class
TstLayer
(
torch
.
nn
.
Module
):
class
TstLayer
(
nn
.
Module
):
def
__init__
(
self
,
hidden_size
):
super
().
__init__
()
self
.
linear1
=
torch
.
nn
.
Linear
(
hidden_size
,
hidden_size
)
self
.
ln1
=
torch
.
nn
.
LayerNorm
(
hidden_size
)
self
.
linear2
=
torch
.
nn
.
Linear
(
hidden_size
,
hidden_size
)
self
.
ln2
=
torch
.
nn
.
LayerNorm
(
hidden_size
)
self
.
bias
=
torch
.
nn
.
Parameter
(
torch
.
zeros
(
hidden_size
))
self
.
linear1
=
nn
.
Linear
(
hidden_size
,
hidden_size
)
self
.
ln1
=
nn
.
LayerNorm
(
hidden_size
)
self
.
linear2
=
nn
.
Linear
(
hidden_size
,
hidden_size
)
self
.
ln2
=
nn
.
LayerNorm
(
hidden_size
)
self
.
bias
=
nn
.
Parameter
(
torch
.
zeros
(
hidden_size
))
def
forward
(
self
,
x
):
h
=
self
.
ln1
(
torch
.
nn
.
functional
.
relu
(
self
.
linear1
(
x
)))
h
=
torch
.
nn
.
functional
.
relu
(
self
.
linear2
(
x
))
h
=
self
.
ln1
(
nn
.
functional
.
relu
(
self
.
linear1
(
x
)))
h
=
nn
.
functional
.
relu
(
self
.
linear2
(
x
))
return
self
.
ln2
(
x
+
h
+
self
.
bias
)
def
get_regression_trainer
(
a
=
0
,
b
=
0
,
double_output
=
False
,
train_len
=
64
,
eval_len
=
64
,
pretrained
=
True
,
**
kwargs
):
...
...
@@ -1065,7 +1066,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
assert_flos_extraction
(
trainer
,
trainer
.
model
)
# with enforced DataParallel
assert_flos_extraction
(
trainer
,
torch
.
nn
.
DataParallel
(
trainer
.
model
))
assert_flos_extraction
(
trainer
,
nn
.
DataParallel
(
trainer
.
model
))
trainer
.
train
()
self
.
assertTrue
(
isinstance
(
trainer
.
state
.
total_flos
,
float
))
...
...
@@ -1186,7 +1187,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
self
.
assertAlmostEqual
(
fp16_eval
,
fp32_init
/
2
,
delta
=
5_000
)
def
test_no_wd_param_group
(
self
):
model
=
torch
.
nn
.
Sequential
(
TstLayer
(
128
),
torch
.
nn
.
ModuleList
([
TstLayer
(
128
),
TstLayer
(
128
)]))
model
=
nn
.
Sequential
(
TstLayer
(
128
),
nn
.
ModuleList
([
TstLayer
(
128
),
TstLayer
(
128
)]))
trainer
=
Trainer
(
model
=
model
)
trainer
.
create_optimizer_and_scheduler
(
10
)
# fmt: off
...
...
tests/test_trainer_utils.py
View file @
372ab9cd
...
...
@@ -24,6 +24,7 @@ from transformers.testing_utils import require_torch
if
is_torch_available
():
import
torch
from
torch
import
nn
from
torch.utils.data
import
IterableDataset
from
transformers.modeling_outputs
import
SequenceClassifierOutput
...
...
@@ -40,18 +41,18 @@ if is_torch_available():
get_parameter_names
,
)
class
TstLayer
(
torch
.
nn
.
Module
):
class
TstLayer
(
nn
.
Module
):
def
__init__
(
self
,
hidden_size
):
super
().
__init__
()
self
.
linear1
=
torch
.
nn
.
Linear
(
hidden_size
,
hidden_size
)
self
.
ln1
=
torch
.
nn
.
LayerNorm
(
hidden_size
)
self
.
linear2
=
torch
.
nn
.
Linear
(
hidden_size
,
hidden_size
)
self
.
ln2
=
torch
.
nn
.
LayerNorm
(
hidden_size
)
self
.
bias
=
torch
.
nn
.
Parameter
(
torch
.
zeros
(
hidden_size
))
self
.
linear1
=
nn
.
Linear
(
hidden_size
,
hidden_size
)
self
.
ln1
=
nn
.
LayerNorm
(
hidden_size
)
self
.
linear2
=
nn
.
Linear
(
hidden_size
,
hidden_size
)
self
.
ln2
=
nn
.
LayerNorm
(
hidden_size
)
self
.
bias
=
nn
.
Parameter
(
torch
.
zeros
(
hidden_size
))
def
forward
(
self
,
x
):
h
=
self
.
ln1
(
torch
.
nn
.
functional
.
relu
(
self
.
linear1
(
x
)))
h
=
torch
.
nn
.
functional
.
relu
(
self
.
linear2
(
x
))
h
=
self
.
ln1
(
nn
.
functional
.
relu
(
self
.
linear1
(
x
)))
h
=
nn
.
functional
.
relu
(
self
.
linear2
(
x
))
return
self
.
ln2
(
x
+
h
+
self
.
bias
)
class
RandomIterableDataset
(
IterableDataset
):
...
...
@@ -151,10 +152,10 @@ class TrainerUtilsTest(unittest.TestCase):
num_labels
=
12
random_logits
=
torch
.
randn
(
4
,
5
,
num_labels
)
random_labels
=
torch
.
randint
(
0
,
num_labels
,
(
4
,
5
))
loss
=
torch
.
nn
.
functional
.
cross_entropy
(
random_logits
.
view
(
-
1
,
num_labels
),
random_labels
.
view
(
-
1
))
loss
=
nn
.
functional
.
cross_entropy
(
random_logits
.
view
(
-
1
,
num_labels
),
random_labels
.
view
(
-
1
))
model_output
=
SequenceClassifierOutput
(
logits
=
random_logits
)
label_smoothed_loss
=
LabelSmoother
(
0.1
)(
model_output
,
random_labels
)
log_probs
=
-
torch
.
nn
.
functional
.
log_softmax
(
random_logits
,
dim
=-
1
)
log_probs
=
-
nn
.
functional
.
log_softmax
(
random_logits
,
dim
=-
1
)
expected_loss
=
(
1
-
epsilon
)
*
loss
+
epsilon
*
log_probs
.
mean
()
self
.
assertTrue
(
torch
.
allclose
(
label_smoothed_loss
,
expected_loss
))
...
...
@@ -163,10 +164,10 @@ class TrainerUtilsTest(unittest.TestCase):
random_labels
[
2
,
1
]
=
-
100
random_labels
[
2
,
3
]
=
-
100
loss
=
torch
.
nn
.
functional
.
cross_entropy
(
random_logits
.
view
(
-
1
,
num_labels
),
random_labels
.
view
(
-
1
))
loss
=
nn
.
functional
.
cross_entropy
(
random_logits
.
view
(
-
1
,
num_labels
),
random_labels
.
view
(
-
1
))
model_output
=
SequenceClassifierOutput
(
logits
=
random_logits
)
label_smoothed_loss
=
LabelSmoother
(
0.1
)(
model_output
,
random_labels
)
log_probs
=
-
torch
.
nn
.
functional
.
log_softmax
(
random_logits
,
dim
=-
1
)
log_probs
=
-
nn
.
functional
.
log_softmax
(
random_logits
,
dim
=-
1
)
# Mask the log probs with the -100 labels
log_probs
[
0
,
1
]
=
0.0
log_probs
[
2
,
1
]
=
0.0
...
...
@@ -230,10 +231,10 @@ class TrainerUtilsTest(unittest.TestCase):
self
.
assertEqual
(
list
(
sorted
(
indices_process_0
+
indices_process_1
)),
list
(
range
(
100
)))
def
test_get_parameter_names
(
self
):
model
=
torch
.
nn
.
Sequential
(
TstLayer
(
128
),
torch
.
nn
.
ModuleList
([
TstLayer
(
128
),
TstLayer
(
128
)]))
model
=
nn
.
Sequential
(
TstLayer
(
128
),
nn
.
ModuleList
([
TstLayer
(
128
),
TstLayer
(
128
)]))
# fmt: off
self
.
assertEqual
(
get_parameter_names
(
model
,
[
torch
.
nn
.
LayerNorm
]),
get_parameter_names
(
model
,
[
nn
.
LayerNorm
]),
[
'0.linear1.weight'
,
'0.linear1.bias'
,
'0.linear2.weight'
,
'0.linear2.bias'
,
'0.bias'
,
'1.0.linear1.weight'
,
'1.0.linear1.bias'
,
'1.0.linear2.weight'
,
'1.0.linear2.bias'
,
'1.0.bias'
,
'1.1.linear1.weight'
,
'1.1.linear1.bias'
,
'1.1.linear2.weight'
,
'1.1.linear2.bias'
,
'1.1.bias'
]
)
# fmt: on
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment