Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
372ab9cd
Unverified
Commit
372ab9cd
authored
Jun 14, 2021
by
Stas Bekman
Committed by
GitHub
Jun 14, 2021
Browse files
[style] consistent nn. and nn.functional: part 3 `tests` (#12155)
* consistent nn. and nn.functional: p3 templates * restore
parent
d9c0d08f
Changes
14
Show whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
93 additions
and
81 deletions
+93
-81
tests/test_generation_logits_process.py
tests/test_generation_logits_process.py
+4
-4
tests/test_modeling_clip.py
tests/test_modeling_clip.py
+3
-2
tests/test_modeling_common.py
tests/test_modeling_common.py
+5
-4
tests/test_modeling_deit.py
tests/test_modeling_deit.py
+3
-2
tests/test_modeling_fsmt.py
tests/test_modeling_fsmt.py
+4
-3
tests/test_modeling_ibert.py
tests/test_modeling_ibert.py
+17
-17
tests/test_modeling_reformer.py
tests/test_modeling_reformer.py
+2
-1
tests/test_modeling_transfo_xl.py
tests/test_modeling_transfo_xl.py
+3
-2
tests/test_modeling_vit.py
tests/test_modeling_vit.py
+3
-2
tests/test_optimization.py
tests/test_optimization.py
+4
-3
tests/test_pipelines_conversational.py
tests/test_pipelines_conversational.py
+3
-2
tests/test_pipelines_summarization.py
tests/test_pipelines_summarization.py
+2
-1
tests/test_trainer.py
tests/test_trainer.py
+25
-24
tests/test_trainer_utils.py
tests/test_trainer_utils.py
+15
-14
No files found.
tests/test_generation_logits_process.py
View file @
372ab9cd
...
@@ -24,7 +24,7 @@ from .test_modeling_common import ids_tensor
...
@@ -24,7 +24,7 @@ from .test_modeling_common import ids_tensor
if
is_torch_available
():
if
is_torch_available
():
import
torch
import
torch
import
torch.nn.functional
as
F
from
torch
import
nn
from
transformers.generation_logits_process
import
(
from
transformers.generation_logits_process
import
(
EncoderNoRepeatNGramLogitsProcessor
,
EncoderNoRepeatNGramLogitsProcessor
,
...
@@ -80,13 +80,13 @@ class LogitsProcessorTest(unittest.TestCase):
...
@@ -80,13 +80,13 @@ class LogitsProcessorTest(unittest.TestCase):
scores
[
1
,
10
]
=
(
1
/
length
)
-
0.4
# valley, 1st batch
scores
[
1
,
10
]
=
(
1
/
length
)
-
0.4
# valley, 1st batch
# compute softmax
# compute softmax
probs
=
F
.
softmax
(
scores
,
dim
=-
1
)
probs
=
nn
.
functional
.
softmax
(
scores
,
dim
=-
1
)
temp_dist_warper_sharper
=
TemperatureLogitsWarper
(
temperature
=
0.5
)
temp_dist_warper_sharper
=
TemperatureLogitsWarper
(
temperature
=
0.5
)
temp_dist_warper_smoother
=
TemperatureLogitsWarper
(
temperature
=
1.3
)
temp_dist_warper_smoother
=
TemperatureLogitsWarper
(
temperature
=
1.3
)
warped_prob_sharp
=
F
.
softmax
(
temp_dist_warper_sharper
(
input_ids
,
scores
.
clone
()),
dim
=-
1
)
warped_prob_sharp
=
nn
.
functional
.
softmax
(
temp_dist_warper_sharper
(
input_ids
,
scores
.
clone
()),
dim
=-
1
)
warped_prob_smooth
=
F
.
softmax
(
temp_dist_warper_smoother
(
input_ids
,
scores
.
clone
()),
dim
=-
1
)
warped_prob_smooth
=
nn
.
functional
.
softmax
(
temp_dist_warper_smoother
(
input_ids
,
scores
.
clone
()),
dim
=-
1
)
# uniform distribution stays uniform
# uniform distribution stays uniform
self
.
assertTrue
(
torch
.
allclose
(
probs
[
0
,
:],
warped_prob_sharp
[
0
,
:],
atol
=
1e-3
))
self
.
assertTrue
(
torch
.
allclose
(
probs
[
0
,
:],
warped_prob_sharp
[
0
,
:],
atol
=
1e-3
))
...
...
tests/test_modeling_clip.py
View file @
372ab9cd
...
@@ -30,6 +30,7 @@ from .test_modeling_common import ModelTesterMixin, _config_zero_init, floats_te
...
@@ -30,6 +30,7 @@ from .test_modeling_common import ModelTesterMixin, _config_zero_init, floats_te
if
is_torch_available
():
if
is_torch_available
():
import
torch
import
torch
from
torch
import
nn
from
transformers
import
CLIPConfig
,
CLIPModel
,
CLIPTextConfig
,
CLIPTextModel
,
CLIPVisionConfig
,
CLIPVisionModel
from
transformers
import
CLIPConfig
,
CLIPModel
,
CLIPTextConfig
,
CLIPTextModel
,
CLIPVisionConfig
,
CLIPVisionModel
from
transformers.models.clip.modeling_clip
import
CLIP_PRETRAINED_MODEL_ARCHIVE_LIST
from
transformers.models.clip.modeling_clip
import
CLIP_PRETRAINED_MODEL_ARCHIVE_LIST
...
@@ -140,9 +141,9 @@ class CLIPVisionModelTest(ModelTesterMixin, unittest.TestCase):
...
@@ -140,9 +141,9 @@ class CLIPVisionModelTest(ModelTesterMixin, unittest.TestCase):
for
model_class
in
self
.
all_model_classes
:
for
model_class
in
self
.
all_model_classes
:
model
=
model_class
(
config
)
model
=
model_class
(
config
)
self
.
assertIsInstance
(
model
.
get_input_embeddings
(),
(
torch
.
nn
.
Module
))
self
.
assertIsInstance
(
model
.
get_input_embeddings
(),
(
nn
.
Module
))
x
=
model
.
get_output_embeddings
()
x
=
model
.
get_output_embeddings
()
self
.
assertTrue
(
x
is
None
or
isinstance
(
x
,
torch
.
nn
.
Linear
))
self
.
assertTrue
(
x
is
None
or
isinstance
(
x
,
nn
.
Linear
))
def
test_forward_signature
(
self
):
def
test_forward_signature
(
self
):
config
,
_
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
config
,
_
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
...
...
tests/test_modeling_common.py
View file @
372ab9cd
...
@@ -44,6 +44,7 @@ from transformers.testing_utils import (
...
@@ -44,6 +44,7 @@ from transformers.testing_utils import (
if
is_torch_available
():
if
is_torch_available
():
import
numpy
as
np
import
numpy
as
np
import
torch
import
torch
from
torch
import
nn
from
transformers
import
(
from
transformers
import
(
BERT_PRETRAINED_MODEL_ARCHIVE_LIST
,
BERT_PRETRAINED_MODEL_ARCHIVE_LIST
,
...
@@ -1150,10 +1151,10 @@ class ModelTesterMixin:
...
@@ -1150,10 +1151,10 @@ class ModelTesterMixin:
for
model_class
in
self
.
all_model_classes
:
for
model_class
in
self
.
all_model_classes
:
model
=
model_class
(
config
)
model
=
model_class
(
config
)
self
.
assertIsInstance
(
model
.
get_input_embeddings
(),
(
torch
.
nn
.
Embedding
,
AdaptiveEmbedding
))
self
.
assertIsInstance
(
model
.
get_input_embeddings
(),
(
nn
.
Embedding
,
AdaptiveEmbedding
))
model
.
set_input_embeddings
(
torch
.
nn
.
Embedding
(
10
,
10
))
model
.
set_input_embeddings
(
nn
.
Embedding
(
10
,
10
))
x
=
model
.
get_output_embeddings
()
x
=
model
.
get_output_embeddings
()
self
.
assertTrue
(
x
is
None
or
isinstance
(
x
,
torch
.
nn
.
Linear
))
self
.
assertTrue
(
x
is
None
or
isinstance
(
x
,
nn
.
Linear
))
def
test_correct_missing_keys
(
self
):
def
test_correct_missing_keys
(
self
):
if
not
self
.
test_missing_keys
:
if
not
self
.
test_missing_keys
:
...
@@ -1337,7 +1338,7 @@ class ModelTesterMixin:
...
@@ -1337,7 +1338,7 @@ class ModelTesterMixin:
model
.
eval
()
model
.
eval
()
# Wrap model in nn.DataParallel
# Wrap model in nn.DataParallel
model
=
torch
.
nn
.
DataParallel
(
model
)
model
=
nn
.
DataParallel
(
model
)
with
torch
.
no_grad
():
with
torch
.
no_grad
():
_
=
model
(
**
self
.
_prepare_for_class
(
inputs_dict
,
model_class
))
_
=
model
(
**
self
.
_prepare_for_class
(
inputs_dict
,
model_class
))
...
...
tests/test_modeling_deit.py
View file @
372ab9cd
...
@@ -27,6 +27,7 @@ from .test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor
...
@@ -27,6 +27,7 @@ from .test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor
if
is_torch_available
():
if
is_torch_available
():
import
torch
import
torch
from
torch
import
nn
from
transformers
import
(
from
transformers
import
(
MODEL_MAPPING
,
MODEL_MAPPING
,
...
@@ -176,9 +177,9 @@ class DeiTModelTest(ModelTesterMixin, unittest.TestCase):
...
@@ -176,9 +177,9 @@ class DeiTModelTest(ModelTesterMixin, unittest.TestCase):
for
model_class
in
self
.
all_model_classes
:
for
model_class
in
self
.
all_model_classes
:
model
=
model_class
(
config
)
model
=
model_class
(
config
)
self
.
assertIsInstance
(
model
.
get_input_embeddings
(),
(
torch
.
nn
.
Module
))
self
.
assertIsInstance
(
model
.
get_input_embeddings
(),
(
nn
.
Module
))
x
=
model
.
get_output_embeddings
()
x
=
model
.
get_output_embeddings
()
self
.
assertTrue
(
x
is
None
or
isinstance
(
x
,
torch
.
nn
.
Linear
))
self
.
assertTrue
(
x
is
None
or
isinstance
(
x
,
nn
.
Linear
))
def
test_forward_signature
(
self
):
def
test_forward_signature
(
self
):
config
,
_
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
config
,
_
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
...
...
tests/test_modeling_fsmt.py
View file @
372ab9cd
...
@@ -30,6 +30,7 @@ from .test_modeling_common import ModelTesterMixin, ids_tensor
...
@@ -30,6 +30,7 @@ from .test_modeling_common import ModelTesterMixin, ids_tensor
if
is_torch_available
():
if
is_torch_available
():
import
torch
import
torch
from
torch
import
nn
from
transformers
import
FSMTConfig
,
FSMTForConditionalGeneration
,
FSMTModel
,
FSMTTokenizer
from
transformers
import
FSMTConfig
,
FSMTForConditionalGeneration
,
FSMTModel
,
FSMTTokenizer
from
transformers.models.fsmt.modeling_fsmt
import
(
from
transformers.models.fsmt.modeling_fsmt
import
(
...
@@ -160,10 +161,10 @@ class FSMTModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase):
...
@@ -160,10 +161,10 @@ class FSMTModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase):
for
model_class
in
self
.
all_model_classes
:
for
model_class
in
self
.
all_model_classes
:
model
=
model_class
(
config
)
model
=
model_class
(
config
)
self
.
assertIsInstance
(
model
.
get_input_embeddings
(),
(
torch
.
nn
.
Embedding
))
self
.
assertIsInstance
(
model
.
get_input_embeddings
(),
(
nn
.
Embedding
))
model
.
set_input_embeddings
(
torch
.
nn
.
Embedding
(
10
,
10
))
model
.
set_input_embeddings
(
nn
.
Embedding
(
10
,
10
))
x
=
model
.
get_output_embeddings
()
x
=
model
.
get_output_embeddings
()
self
.
assertTrue
(
x
is
None
or
isinstance
(
x
,
torch
.
nn
.
modules
.
sparse
.
Embedding
))
self
.
assertTrue
(
x
is
None
or
isinstance
(
x
,
nn
.
modules
.
sparse
.
Embedding
))
def
test_initialization_more
(
self
):
def
test_initialization_more
(
self
):
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs
()
config
,
inputs_dict
=
self
.
model_tester
.
prepare_config_and_inputs
()
...
...
tests/test_modeling_ibert.py
View file @
372ab9cd
...
@@ -26,7 +26,7 @@ from .test_modeling_common import ModelTesterMixin, ids_tensor, random_attention
...
@@ -26,7 +26,7 @@ from .test_modeling_common import ModelTesterMixin, ids_tensor, random_attention
if
is_torch_available
():
if
is_torch_available
():
import
torch
import
torch
import
torch.nn
as
nn
from
torch
import
nn
from
transformers
import
(
from
transformers
import
(
IBERT_PRETRAINED_MODEL_ARCHIVE_LIST
,
IBERT_PRETRAINED_MODEL_ARCHIVE_LIST
,
...
@@ -304,9 +304,9 @@ class IBertModelTest(ModelTesterMixin, unittest.TestCase):
...
@@ -304,9 +304,9 @@ class IBertModelTest(ModelTesterMixin, unittest.TestCase):
for
model_class
in
self
.
all_model_classes
:
for
model_class
in
self
.
all_model_classes
:
model
=
model_class
(
config
)
model
=
model_class
(
config
)
self
.
assertIsInstance
(
model
.
get_input_embeddings
(),
QuantEmbedding
)
self
.
assertIsInstance
(
model
.
get_input_embeddings
(),
QuantEmbedding
)
model
.
set_input_embeddings
(
torch
.
nn
.
Embedding
(
10
,
10
))
model
.
set_input_embeddings
(
nn
.
Embedding
(
10
,
10
))
x
=
model
.
get_output_embeddings
()
x
=
model
.
get_output_embeddings
()
self
.
assertTrue
(
x
is
None
or
isinstance
(
x
,
torch
.
nn
.
Linear
))
self
.
assertTrue
(
x
is
None
or
isinstance
(
x
,
nn
.
Linear
))
# Override
# Override
def
test_feed_forward_chunking
(
self
):
def
test_feed_forward_chunking
(
self
):
...
@@ -350,7 +350,7 @@ class IBertModelIntegrationTest(unittest.TestCase):
...
@@ -350,7 +350,7 @@ class IBertModelIntegrationTest(unittest.TestCase):
weight_bit
=
8
weight_bit
=
8
embedding
=
QuantEmbedding
(
2
,
4
,
quant_mode
=
True
,
weight_bit
=
weight_bit
)
embedding
=
QuantEmbedding
(
2
,
4
,
quant_mode
=
True
,
weight_bit
=
weight_bit
)
embedding_weight
=
torch
.
tensor
([[
-
1.0
,
-
2.0
,
-
3.0
,
-
4.0
],
[
5.0
,
6.0
,
7.0
,
8.0
]])
embedding_weight
=
torch
.
tensor
([[
-
1.0
,
-
2.0
,
-
3.0
,
-
4.0
],
[
5.0
,
6.0
,
7.0
,
8.0
]])
embedding
.
weight
=
torch
.
nn
.
Parameter
(
embedding_weight
)
embedding
.
weight
=
nn
.
Parameter
(
embedding_weight
)
expected_scaling_factor
=
embedding_weight
.
abs
().
max
()
/
(
2
**
(
weight_bit
-
1
)
-
1
)
expected_scaling_factor
=
embedding_weight
.
abs
().
max
()
/
(
2
**
(
weight_bit
-
1
)
-
1
)
x
,
x_scaling_factor
=
embedding
(
torch
.
tensor
(
0
))
x
,
x_scaling_factor
=
embedding
(
torch
.
tensor
(
0
))
...
@@ -447,8 +447,8 @@ class IBertModelIntegrationTest(unittest.TestCase):
...
@@ -447,8 +447,8 @@ class IBertModelIntegrationTest(unittest.TestCase):
linear_q
=
QuantLinear
(
2
,
4
,
quant_mode
=
True
,
per_channel
=
per_channel
,
weight_bit
=
weight_bit
)
linear_q
=
QuantLinear
(
2
,
4
,
quant_mode
=
True
,
per_channel
=
per_channel
,
weight_bit
=
weight_bit
)
linear_dq
=
QuantLinear
(
2
,
4
,
quant_mode
=
False
,
per_channel
=
per_channel
,
weight_bit
=
weight_bit
)
linear_dq
=
QuantLinear
(
2
,
4
,
quant_mode
=
False
,
per_channel
=
per_channel
,
weight_bit
=
weight_bit
)
linear_weight
=
torch
.
tensor
([[
-
1.0
,
2.0
,
3.0
,
-
4.0
],
[
5.0
,
-
6.0
,
-
7.0
,
8.0
]]).
T
linear_weight
=
torch
.
tensor
([[
-
1.0
,
2.0
,
3.0
,
-
4.0
],
[
5.0
,
-
6.0
,
-
7.0
,
8.0
]]).
T
linear_q
.
weight
=
torch
.
nn
.
Parameter
(
linear_weight
)
linear_q
.
weight
=
nn
.
Parameter
(
linear_weight
)
linear_dq
.
weight
=
torch
.
nn
.
Parameter
(
linear_weight
)
linear_dq
.
weight
=
nn
.
Parameter
(
linear_weight
)
q
,
q_scaling_factor
=
linear_q
(
x
,
x_scaling_factor
)
q
,
q_scaling_factor
=
linear_q
(
x
,
x_scaling_factor
)
q_int
=
q
/
q_scaling_factor
q_int
=
q
/
q_scaling_factor
...
@@ -477,7 +477,7 @@ class IBertModelIntegrationTest(unittest.TestCase):
...
@@ -477,7 +477,7 @@ class IBertModelIntegrationTest(unittest.TestCase):
def
test_int_gelu
(
self
):
def
test_int_gelu
(
self
):
gelu_q
=
IntGELU
(
quant_mode
=
True
)
gelu_q
=
IntGELU
(
quant_mode
=
True
)
gelu_dq
=
torch
.
nn
.
GELU
()
gelu_dq
=
nn
.
GELU
()
x_int
=
torch
.
range
(
-
10000
,
10000
,
1
)
x_int
=
torch
.
range
(
-
10000
,
10000
,
1
)
x_scaling_factor
=
torch
.
tensor
(
0.001
)
x_scaling_factor
=
torch
.
tensor
(
0.001
)
...
@@ -523,7 +523,7 @@ class IBertModelIntegrationTest(unittest.TestCase):
...
@@ -523,7 +523,7 @@ class IBertModelIntegrationTest(unittest.TestCase):
def
test_int_softmax
(
self
):
def
test_int_softmax
(
self
):
output_bit
=
8
output_bit
=
8
softmax_q
=
IntSoftmax
(
output_bit
,
quant_mode
=
True
)
softmax_q
=
IntSoftmax
(
output_bit
,
quant_mode
=
True
)
softmax_dq
=
torch
.
nn
.
Softmax
()
softmax_dq
=
nn
.
Softmax
()
# x_int = torch.range(-10000, 10000, 1)
# x_int = torch.range(-10000, 10000, 1)
def
_test
(
array
):
def
_test
(
array
):
...
@@ -590,12 +590,12 @@ class IBertModelIntegrationTest(unittest.TestCase):
...
@@ -590,12 +590,12 @@ class IBertModelIntegrationTest(unittest.TestCase):
x
=
x_int
*
x_scaling_factor
x
=
x_int
*
x_scaling_factor
ln_q
=
IntLayerNorm
(
x
.
shape
[
1
:],
1e-5
,
quant_mode
=
True
,
output_bit
=
output_bit
)
ln_q
=
IntLayerNorm
(
x
.
shape
[
1
:],
1e-5
,
quant_mode
=
True
,
output_bit
=
output_bit
)
ln_dq
=
torch
.
nn
.
LayerNorm
(
x
.
shape
[
1
:],
1e-5
)
ln_dq
=
nn
.
LayerNorm
(
x
.
shape
[
1
:],
1e-5
)
ln_q
.
weight
=
torch
.
nn
.
Parameter
(
torch
.
ones
(
x
.
shape
[
1
:]))
ln_q
.
weight
=
nn
.
Parameter
(
torch
.
ones
(
x
.
shape
[
1
:]))
ln_q
.
bias
=
torch
.
nn
.
Parameter
(
torch
.
ones
(
x
.
shape
[
1
:]))
ln_q
.
bias
=
nn
.
Parameter
(
torch
.
ones
(
x
.
shape
[
1
:]))
ln_dq
.
weight
=
torch
.
nn
.
Parameter
(
torch
.
ones
(
x
.
shape
[
1
:]))
ln_dq
.
weight
=
nn
.
Parameter
(
torch
.
ones
(
x
.
shape
[
1
:]))
ln_dq
.
bias
=
torch
.
nn
.
Parameter
(
torch
.
ones
(
x
.
shape
[
1
:]))
ln_dq
.
bias
=
nn
.
Parameter
(
torch
.
ones
(
x
.
shape
[
1
:]))
q
,
q_scaling_factor
=
ln_q
(
x
,
x_scaling_factor
)
q
,
q_scaling_factor
=
ln_q
(
x
,
x_scaling_factor
)
q_int
=
q
/
q_scaling_factor
q_int
=
q
/
q_scaling_factor
...
@@ -627,13 +627,13 @@ class IBertModelIntegrationTest(unittest.TestCase):
...
@@ -627,13 +627,13 @@ class IBertModelIntegrationTest(unittest.TestCase):
],
],
}
}
ln_dq
.
weight
=
torch
.
nn
.
Parameter
(
torch
.
ones
(
x
.
shape
[
1
:]))
ln_dq
.
weight
=
nn
.
Parameter
(
torch
.
ones
(
x
.
shape
[
1
:]))
ln_dq
.
bias
=
torch
.
nn
.
Parameter
(
torch
.
ones
(
x
.
shape
[
1
:]))
ln_dq
.
bias
=
nn
.
Parameter
(
torch
.
ones
(
x
.
shape
[
1
:]))
dq
,
dq_scaling_factor
=
ln_dq
(
x
,
x_scaling_factor
)
dq
,
dq_scaling_factor
=
ln_dq
(
x
,
x_scaling_factor
)
for
label
,
ln_fdqs
in
ln_fdqs_dict
.
items
():
for
label
,
ln_fdqs
in
ln_fdqs_dict
.
items
():
for
ln_fdq
in
ln_fdqs
:
for
ln_fdq
in
ln_fdqs
:
ln_fdq
.
weight
=
torch
.
nn
.
Parameter
(
torch
.
ones
(
x
.
shape
[
1
:]))
ln_fdq
.
weight
=
nn
.
Parameter
(
torch
.
ones
(
x
.
shape
[
1
:]))
ln_fdq
.
bias
=
torch
.
nn
.
Parameter
(
torch
.
ones
(
x
.
shape
[
1
:]))
ln_fdq
.
bias
=
nn
.
Parameter
(
torch
.
ones
(
x
.
shape
[
1
:]))
q
,
q_scaling_factor
=
ln_fdq
(
x
,
x_scaling_factor
)
q
,
q_scaling_factor
=
ln_fdq
(
x
,
x_scaling_factor
)
if
label
:
if
label
:
self
.
assertTrue
(
torch
.
allclose
(
q
,
dq
,
atol
=
1e-4
))
self
.
assertTrue
(
torch
.
allclose
(
q
,
dq
,
atol
=
1e-4
))
...
...
tests/test_modeling_reformer.py
View file @
372ab9cd
...
@@ -32,6 +32,7 @@ from .test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor, r
...
@@ -32,6 +32,7 @@ from .test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor, r
if
is_torch_available
():
if
is_torch_available
():
import
torch
import
torch
from
torch
import
nn
from
transformers
import
(
from
transformers
import
(
REFORMER_PRETRAINED_MODEL_ARCHIVE_LIST
,
REFORMER_PRETRAINED_MODEL_ARCHIVE_LIST
,
...
@@ -241,7 +242,7 @@ class ReformerModelTester:
...
@@ -241,7 +242,7 @@ class ReformerModelTester:
# set all position encodings to zero so that postions don't matter
# set all position encodings to zero so that postions don't matter
with
torch
.
no_grad
():
with
torch
.
no_grad
():
embedding
=
model
.
embeddings
.
position_embeddings
.
embedding
embedding
=
model
.
embeddings
.
position_embeddings
.
embedding
embedding
.
weight
=
torch
.
nn
.
Parameter
(
torch
.
zeros
(
embedding
.
weight
.
shape
).
to
(
torch_device
))
embedding
.
weight
=
nn
.
Parameter
(
torch
.
zeros
(
embedding
.
weight
.
shape
).
to
(
torch_device
))
embedding
.
weight
.
requires_grad
=
False
embedding
.
weight
.
requires_grad
=
False
half_seq_len
=
self
.
seq_length
//
2
half_seq_len
=
self
.
seq_length
//
2
...
...
tests/test_modeling_transfo_xl.py
View file @
372ab9cd
...
@@ -27,6 +27,7 @@ from .test_modeling_common import ModelTesterMixin, ids_tensor
...
@@ -27,6 +27,7 @@ from .test_modeling_common import ModelTesterMixin, ids_tensor
if
is_torch_available
():
if
is_torch_available
():
import
torch
import
torch
from
torch
import
nn
from
transformers
import
TransfoXLConfig
,
TransfoXLForSequenceClassification
,
TransfoXLLMHeadModel
,
TransfoXLModel
from
transformers
import
TransfoXLConfig
,
TransfoXLForSequenceClassification
,
TransfoXLLMHeadModel
,
TransfoXLModel
from
transformers.models.transfo_xl.modeling_transfo_xl
import
TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST
from
transformers.models.transfo_xl.modeling_transfo_xl
import
TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_LIST
...
@@ -362,11 +363,11 @@ class TransfoXLModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestC
...
@@ -362,11 +363,11 @@ class TransfoXLModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestC
if
hasattr
(
module
,
"emb_projs"
):
if
hasattr
(
module
,
"emb_projs"
):
for
i
in
range
(
len
(
module
.
emb_projs
)):
for
i
in
range
(
len
(
module
.
emb_projs
)):
if
module
.
emb_projs
[
i
]
is
not
None
:
if
module
.
emb_projs
[
i
]
is
not
None
:
torch
.
nn
.
init
.
constant_
(
module
.
emb_projs
[
i
],
0.0003
)
nn
.
init
.
constant_
(
module
.
emb_projs
[
i
],
0.0003
)
if
hasattr
(
module
,
"out_projs"
):
if
hasattr
(
module
,
"out_projs"
):
for
i
in
range
(
len
(
module
.
out_projs
)):
for
i
in
range
(
len
(
module
.
out_projs
)):
if
module
.
out_projs
[
i
]
is
not
None
:
if
module
.
out_projs
[
i
]
is
not
None
:
torch
.
nn
.
init
.
constant_
(
module
.
out_projs
[
i
],
0.0003
)
nn
.
init
.
constant_
(
module
.
out_projs
[
i
],
0.0003
)
for
param
in
[
"r_emb"
,
"r_w_bias"
,
"r_r_bias"
,
"r_bias"
]:
for
param
in
[
"r_emb"
,
"r_w_bias"
,
"r_r_bias"
,
"r_bias"
]:
if
hasattr
(
module
,
param
)
and
getattr
(
module
,
param
)
is
not
None
:
if
hasattr
(
module
,
param
)
and
getattr
(
module
,
param
)
is
not
None
:
...
...
tests/test_modeling_vit.py
View file @
372ab9cd
...
@@ -27,6 +27,7 @@ from .test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor
...
@@ -27,6 +27,7 @@ from .test_modeling_common import ModelTesterMixin, floats_tensor, ids_tensor
if
is_torch_available
():
if
is_torch_available
():
import
torch
import
torch
from
torch
import
nn
from
transformers
import
ViTConfig
,
ViTForImageClassification
,
ViTModel
from
transformers
import
ViTConfig
,
ViTForImageClassification
,
ViTModel
from
transformers.models.vit.modeling_vit
import
VIT_PRETRAINED_MODEL_ARCHIVE_LIST
,
to_2tuple
from
transformers.models.vit.modeling_vit
import
VIT_PRETRAINED_MODEL_ARCHIVE_LIST
,
to_2tuple
...
@@ -169,9 +170,9 @@ class ViTModelTest(ModelTesterMixin, unittest.TestCase):
...
@@ -169,9 +170,9 @@ class ViTModelTest(ModelTesterMixin, unittest.TestCase):
for
model_class
in
self
.
all_model_classes
:
for
model_class
in
self
.
all_model_classes
:
model
=
model_class
(
config
)
model
=
model_class
(
config
)
self
.
assertIsInstance
(
model
.
get_input_embeddings
(),
(
torch
.
nn
.
Module
))
self
.
assertIsInstance
(
model
.
get_input_embeddings
(),
(
nn
.
Module
))
x
=
model
.
get_output_embeddings
()
x
=
model
.
get_output_embeddings
()
self
.
assertTrue
(
x
is
None
or
isinstance
(
x
,
torch
.
nn
.
Linear
))
self
.
assertTrue
(
x
is
None
or
isinstance
(
x
,
nn
.
Linear
))
def
test_forward_signature
(
self
):
def
test_forward_signature
(
self
):
config
,
_
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
config
,
_
=
self
.
model_tester
.
prepare_config_and_inputs_for_common
()
...
...
tests/test_optimization.py
View file @
372ab9cd
...
@@ -24,6 +24,7 @@ from transformers.testing_utils import require_torch
...
@@ -24,6 +24,7 @@ from transformers.testing_utils import require_torch
if
is_torch_available
():
if
is_torch_available
():
import
torch
import
torch
from
torch
import
nn
from
transformers
import
(
from
transformers
import
(
Adafactor
,
Adafactor
,
...
@@ -70,7 +71,7 @@ class OptimizationTest(unittest.TestCase):
...
@@ -70,7 +71,7 @@ class OptimizationTest(unittest.TestCase):
def
test_adam_w
(
self
):
def
test_adam_w
(
self
):
w
=
torch
.
tensor
([
0.1
,
-
0.2
,
-
0.1
],
requires_grad
=
True
)
w
=
torch
.
tensor
([
0.1
,
-
0.2
,
-
0.1
],
requires_grad
=
True
)
target
=
torch
.
tensor
([
0.4
,
0.2
,
-
0.5
])
target
=
torch
.
tensor
([
0.4
,
0.2
,
-
0.5
])
criterion
=
torch
.
nn
.
MSELoss
()
criterion
=
nn
.
MSELoss
()
# No warmup, constant schedule, no gradient clipping
# No warmup, constant schedule, no gradient clipping
optimizer
=
AdamW
(
params
=
[
w
],
lr
=
2e-1
,
weight_decay
=
0.0
)
optimizer
=
AdamW
(
params
=
[
w
],
lr
=
2e-1
,
weight_decay
=
0.0
)
for
_
in
range
(
100
):
for
_
in
range
(
100
):
...
@@ -84,7 +85,7 @@ class OptimizationTest(unittest.TestCase):
...
@@ -84,7 +85,7 @@ class OptimizationTest(unittest.TestCase):
def
test_adafactor
(
self
):
def
test_adafactor
(
self
):
w
=
torch
.
tensor
([
0.1
,
-
0.2
,
-
0.1
],
requires_grad
=
True
)
w
=
torch
.
tensor
([
0.1
,
-
0.2
,
-
0.1
],
requires_grad
=
True
)
target
=
torch
.
tensor
([
0.4
,
0.2
,
-
0.5
])
target
=
torch
.
tensor
([
0.4
,
0.2
,
-
0.5
])
criterion
=
torch
.
nn
.
MSELoss
()
criterion
=
nn
.
MSELoss
()
# No warmup, constant schedule, no gradient clipping
# No warmup, constant schedule, no gradient clipping
optimizer
=
Adafactor
(
optimizer
=
Adafactor
(
params
=
[
w
],
params
=
[
w
],
...
@@ -109,7 +110,7 @@ class OptimizationTest(unittest.TestCase):
...
@@ -109,7 +110,7 @@ class OptimizationTest(unittest.TestCase):
@
require_torch
@
require_torch
class
ScheduleInitTest
(
unittest
.
TestCase
):
class
ScheduleInitTest
(
unittest
.
TestCase
):
m
=
torch
.
nn
.
Linear
(
50
,
50
)
if
is_torch_available
()
else
None
m
=
nn
.
Linear
(
50
,
50
)
if
is_torch_available
()
else
None
optimizer
=
AdamW
(
m
.
parameters
(),
lr
=
10.0
)
if
is_torch_available
()
else
None
optimizer
=
AdamW
(
m
.
parameters
(),
lr
=
10.0
)
if
is_torch_available
()
else
None
num_steps
=
10
num_steps
=
10
...
...
tests/test_pipelines_conversational.py
View file @
372ab9cd
...
@@ -32,6 +32,7 @@ from .test_pipelines_common import MonoInputPipelineCommonMixin
...
@@ -32,6 +32,7 @@ from .test_pipelines_common import MonoInputPipelineCommonMixin
if
is_torch_available
():
if
is_torch_available
():
import
torch
import
torch
from
torch
import
nn
from
transformers.models.gpt2
import
GPT2Config
,
GPT2LMHeadModel
from
transformers.models.gpt2
import
GPT2Config
,
GPT2LMHeadModel
...
@@ -59,8 +60,8 @@ class SimpleConversationPipelineTests(unittest.TestCase):
...
@@ -59,8 +60,8 @@ class SimpleConversationPipelineTests(unittest.TestCase):
bias
[
76
]
=
1
bias
[
76
]
=
1
weight
=
torch
.
zeros
((
V
,
D
),
requires_grad
=
True
)
weight
=
torch
.
zeros
((
V
,
D
),
requires_grad
=
True
)
model
.
lm_head
.
bias
=
torch
.
nn
.
Parameter
(
bias
)
model
.
lm_head
.
bias
=
nn
.
Parameter
(
bias
)
model
.
lm_head
.
weight
=
torch
.
nn
.
Parameter
(
weight
)
model
.
lm_head
.
weight
=
nn
.
Parameter
(
weight
)
# # Created with:
# # Created with:
# import tempfile
# import tempfile
...
...
tests/test_pipelines_summarization.py
View file @
372ab9cd
...
@@ -23,6 +23,7 @@ from .test_pipelines_common import MonoInputPipelineCommonMixin
...
@@ -23,6 +23,7 @@ from .test_pipelines_common import MonoInputPipelineCommonMixin
if
is_torch_available
():
if
is_torch_available
():
import
torch
import
torch
from
torch
import
nn
from
transformers.models.bart
import
BartConfig
,
BartForConditionalGeneration
from
transformers.models.bart
import
BartConfig
,
BartForConditionalGeneration
...
@@ -55,7 +56,7 @@ class SimpleSummarizationPipelineTests(unittest.TestCase):
...
@@ -55,7 +56,7 @@ class SimpleSummarizationPipelineTests(unittest.TestCase):
bias
=
torch
.
zeros
(
V
)
bias
=
torch
.
zeros
(
V
)
bias
[
76
]
=
10
bias
[
76
]
=
10
model
.
lm_head
.
bias
=
torch
.
nn
.
Parameter
(
bias
)
model
.
lm_head
.
bias
=
nn
.
Parameter
(
bias
)
# # Generated with:
# # Generated with:
# import tempfile
# import tempfile
...
...
tests/test_trainer.py
View file @
372ab9cd
...
@@ -53,6 +53,7 @@ from transformers.utils.hp_naming import TrialShortNamer
...
@@ -53,6 +53,7 @@ from transformers.utils.hp_naming import TrialShortNamer
if
is_torch_available
():
if
is_torch_available
():
import
torch
import
torch
from
torch
import
nn
from
torch.utils.data
import
IterableDataset
from
torch.utils.data
import
IterableDataset
from
transformers
import
(
from
transformers
import
(
...
@@ -154,11 +155,11 @@ if is_torch_available():
...
@@ -154,11 +155,11 @@ if is_torch_available():
for
i
in
range
(
len
(
self
.
dataset
)):
for
i
in
range
(
len
(
self
.
dataset
)):
yield
self
.
dataset
[
i
]
yield
self
.
dataset
[
i
]
class
RegressionModel
(
torch
.
nn
.
Module
):
class
RegressionModel
(
nn
.
Module
):
def
__init__
(
self
,
a
=
0
,
b
=
0
,
double_output
=
False
):
def
__init__
(
self
,
a
=
0
,
b
=
0
,
double_output
=
False
):
super
().
__init__
()
super
().
__init__
()
self
.
a
=
torch
.
nn
.
Parameter
(
torch
.
tensor
(
a
).
float
())
self
.
a
=
nn
.
Parameter
(
torch
.
tensor
(
a
).
float
())
self
.
b
=
torch
.
nn
.
Parameter
(
torch
.
tensor
(
b
).
float
())
self
.
b
=
nn
.
Parameter
(
torch
.
tensor
(
b
).
float
())
self
.
double_output
=
double_output
self
.
double_output
=
double_output
self
.
config
=
None
self
.
config
=
None
...
@@ -166,21 +167,21 @@ if is_torch_available():
...
@@ -166,21 +167,21 @@ if is_torch_available():
y
=
input_x
*
self
.
a
+
self
.
b
y
=
input_x
*
self
.
a
+
self
.
b
if
labels
is
None
:
if
labels
is
None
:
return
(
y
,
y
)
if
self
.
double_output
else
(
y
,)
return
(
y
,
y
)
if
self
.
double_output
else
(
y
,)
loss
=
torch
.
nn
.
functional
.
mse_loss
(
y
,
labels
)
loss
=
nn
.
functional
.
mse_loss
(
y
,
labels
)
return
(
loss
,
y
,
y
)
if
self
.
double_output
else
(
loss
,
y
)
return
(
loss
,
y
,
y
)
if
self
.
double_output
else
(
loss
,
y
)
class
RegressionDictModel
(
torch
.
nn
.
Module
):
class
RegressionDictModel
(
nn
.
Module
):
def
__init__
(
self
,
a
=
0
,
b
=
0
):
def
__init__
(
self
,
a
=
0
,
b
=
0
):
super
().
__init__
()
super
().
__init__
()
self
.
a
=
torch
.
nn
.
Parameter
(
torch
.
tensor
(
a
).
float
())
self
.
a
=
nn
.
Parameter
(
torch
.
tensor
(
a
).
float
())
self
.
b
=
torch
.
nn
.
Parameter
(
torch
.
tensor
(
b
).
float
())
self
.
b
=
nn
.
Parameter
(
torch
.
tensor
(
b
).
float
())
self
.
config
=
None
self
.
config
=
None
def
forward
(
self
,
input_x
,
labels
=
None
,
**
kwargs
):
def
forward
(
self
,
input_x
,
labels
=
None
,
**
kwargs
):
y
=
input_x
*
self
.
a
+
self
.
b
y
=
input_x
*
self
.
a
+
self
.
b
result
=
{
"output"
:
y
}
result
=
{
"output"
:
y
}
if
labels
is
not
None
:
if
labels
is
not
None
:
result
[
"loss"
]
=
torch
.
nn
.
functional
.
mse_loss
(
y
,
labels
)
result
[
"loss"
]
=
nn
.
functional
.
mse_loss
(
y
,
labels
)
return
result
return
result
class
RegressionPreTrainedModel
(
PreTrainedModel
):
class
RegressionPreTrainedModel
(
PreTrainedModel
):
...
@@ -189,15 +190,15 @@ if is_torch_available():
...
@@ -189,15 +190,15 @@ if is_torch_available():
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
().
__init__
(
config
)
super
().
__init__
(
config
)
self
.
a
=
torch
.
nn
.
Parameter
(
torch
.
tensor
(
config
.
a
).
float
())
self
.
a
=
nn
.
Parameter
(
torch
.
tensor
(
config
.
a
).
float
())
self
.
b
=
torch
.
nn
.
Parameter
(
torch
.
tensor
(
config
.
b
).
float
())
self
.
b
=
nn
.
Parameter
(
torch
.
tensor
(
config
.
b
).
float
())
self
.
double_output
=
config
.
double_output
self
.
double_output
=
config
.
double_output
def
forward
(
self
,
input_x
,
labels
=
None
,
**
kwargs
):
def
forward
(
self
,
input_x
,
labels
=
None
,
**
kwargs
):
y
=
input_x
*
self
.
a
+
self
.
b
y
=
input_x
*
self
.
a
+
self
.
b
if
labels
is
None
:
if
labels
is
None
:
return
(
y
,
y
)
if
self
.
double_output
else
(
y
,)
return
(
y
,
y
)
if
self
.
double_output
else
(
y
,)
loss
=
torch
.
nn
.
functional
.
mse_loss
(
y
,
labels
)
loss
=
nn
.
functional
.
mse_loss
(
y
,
labels
)
return
(
loss
,
y
,
y
)
if
self
.
double_output
else
(
loss
,
y
)
return
(
loss
,
y
,
y
)
if
self
.
double_output
else
(
loss
,
y
)
class
RegressionRandomPreTrainedModel
(
PreTrainedModel
):
class
RegressionRandomPreTrainedModel
(
PreTrainedModel
):
...
@@ -206,8 +207,8 @@ if is_torch_available():
...
@@ -206,8 +207,8 @@ if is_torch_available():
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
super
().
__init__
(
config
)
super
().
__init__
(
config
)
self
.
a
=
torch
.
nn
.
Parameter
(
torch
.
tensor
(
config
.
a
).
float
())
self
.
a
=
nn
.
Parameter
(
torch
.
tensor
(
config
.
a
).
float
())
self
.
b
=
torch
.
nn
.
Parameter
(
torch
.
tensor
(
config
.
b
).
float
())
self
.
b
=
nn
.
Parameter
(
torch
.
tensor
(
config
.
b
).
float
())
def
forward
(
self
,
input_x
,
labels
=
None
,
**
kwargs
):
def
forward
(
self
,
input_x
,
labels
=
None
,
**
kwargs
):
y
=
input_x
*
self
.
a
+
self
.
b
y
=
input_x
*
self
.
a
+
self
.
b
...
@@ -219,21 +220,21 @@ if is_torch_available():
...
@@ -219,21 +220,21 @@ if is_torch_available():
if
labels
is
None
:
if
labels
is
None
:
return
(
y
,)
return
(
y
,)
loss
=
torch
.
nn
.
functional
.
mse_loss
(
y
,
labels
)
loss
=
nn
.
functional
.
mse_loss
(
y
,
labels
)
return
(
loss
,
y
)
return
(
loss
,
y
)
class
TstLayer
(
torch
.
nn
.
Module
):
class
TstLayer
(
nn
.
Module
):
def
__init__
(
self
,
hidden_size
):
def
__init__
(
self
,
hidden_size
):
super
().
__init__
()
super
().
__init__
()
self
.
linear1
=
torch
.
nn
.
Linear
(
hidden_size
,
hidden_size
)
self
.
linear1
=
nn
.
Linear
(
hidden_size
,
hidden_size
)
self
.
ln1
=
torch
.
nn
.
LayerNorm
(
hidden_size
)
self
.
ln1
=
nn
.
LayerNorm
(
hidden_size
)
self
.
linear2
=
torch
.
nn
.
Linear
(
hidden_size
,
hidden_size
)
self
.
linear2
=
nn
.
Linear
(
hidden_size
,
hidden_size
)
self
.
ln2
=
torch
.
nn
.
LayerNorm
(
hidden_size
)
self
.
ln2
=
nn
.
LayerNorm
(
hidden_size
)
self
.
bias
=
torch
.
nn
.
Parameter
(
torch
.
zeros
(
hidden_size
))
self
.
bias
=
nn
.
Parameter
(
torch
.
zeros
(
hidden_size
))
def
forward
(
self
,
x
):
def
forward
(
self
,
x
):
h
=
self
.
ln1
(
torch
.
nn
.
functional
.
relu
(
self
.
linear1
(
x
)))
h
=
self
.
ln1
(
nn
.
functional
.
relu
(
self
.
linear1
(
x
)))
h
=
torch
.
nn
.
functional
.
relu
(
self
.
linear2
(
x
))
h
=
nn
.
functional
.
relu
(
self
.
linear2
(
x
))
return
self
.
ln2
(
x
+
h
+
self
.
bias
)
return
self
.
ln2
(
x
+
h
+
self
.
bias
)
def
get_regression_trainer
(
a
=
0
,
b
=
0
,
double_output
=
False
,
train_len
=
64
,
eval_len
=
64
,
pretrained
=
True
,
**
kwargs
):
def
get_regression_trainer
(
a
=
0
,
b
=
0
,
double_output
=
False
,
train_len
=
64
,
eval_len
=
64
,
pretrained
=
True
,
**
kwargs
):
...
@@ -1065,7 +1066,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
...
@@ -1065,7 +1066,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
assert_flos_extraction
(
trainer
,
trainer
.
model
)
assert_flos_extraction
(
trainer
,
trainer
.
model
)
# with enforced DataParallel
# with enforced DataParallel
assert_flos_extraction
(
trainer
,
torch
.
nn
.
DataParallel
(
trainer
.
model
))
assert_flos_extraction
(
trainer
,
nn
.
DataParallel
(
trainer
.
model
))
trainer
.
train
()
trainer
.
train
()
self
.
assertTrue
(
isinstance
(
trainer
.
state
.
total_flos
,
float
))
self
.
assertTrue
(
isinstance
(
trainer
.
state
.
total_flos
,
float
))
...
@@ -1186,7 +1187,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
...
@@ -1186,7 +1187,7 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
self
.
assertAlmostEqual
(
fp16_eval
,
fp32_init
/
2
,
delta
=
5_000
)
self
.
assertAlmostEqual
(
fp16_eval
,
fp32_init
/
2
,
delta
=
5_000
)
def
test_no_wd_param_group
(
self
):
def
test_no_wd_param_group
(
self
):
model
=
torch
.
nn
.
Sequential
(
TstLayer
(
128
),
torch
.
nn
.
ModuleList
([
TstLayer
(
128
),
TstLayer
(
128
)]))
model
=
nn
.
Sequential
(
TstLayer
(
128
),
nn
.
ModuleList
([
TstLayer
(
128
),
TstLayer
(
128
)]))
trainer
=
Trainer
(
model
=
model
)
trainer
=
Trainer
(
model
=
model
)
trainer
.
create_optimizer_and_scheduler
(
10
)
trainer
.
create_optimizer_and_scheduler
(
10
)
# fmt: off
# fmt: off
...
...
tests/test_trainer_utils.py
View file @
372ab9cd
...
@@ -24,6 +24,7 @@ from transformers.testing_utils import require_torch
...
@@ -24,6 +24,7 @@ from transformers.testing_utils import require_torch
if
is_torch_available
():
if
is_torch_available
():
import
torch
import
torch
from
torch
import
nn
from
torch.utils.data
import
IterableDataset
from
torch.utils.data
import
IterableDataset
from
transformers.modeling_outputs
import
SequenceClassifierOutput
from
transformers.modeling_outputs
import
SequenceClassifierOutput
...
@@ -40,18 +41,18 @@ if is_torch_available():
...
@@ -40,18 +41,18 @@ if is_torch_available():
get_parameter_names
,
get_parameter_names
,
)
)
class
TstLayer
(
torch
.
nn
.
Module
):
class
TstLayer
(
nn
.
Module
):
def
__init__
(
self
,
hidden_size
):
def
__init__
(
self
,
hidden_size
):
super
().
__init__
()
super
().
__init__
()
self
.
linear1
=
torch
.
nn
.
Linear
(
hidden_size
,
hidden_size
)
self
.
linear1
=
nn
.
Linear
(
hidden_size
,
hidden_size
)
self
.
ln1
=
torch
.
nn
.
LayerNorm
(
hidden_size
)
self
.
ln1
=
nn
.
LayerNorm
(
hidden_size
)
self
.
linear2
=
torch
.
nn
.
Linear
(
hidden_size
,
hidden_size
)
self
.
linear2
=
nn
.
Linear
(
hidden_size
,
hidden_size
)
self
.
ln2
=
torch
.
nn
.
LayerNorm
(
hidden_size
)
self
.
ln2
=
nn
.
LayerNorm
(
hidden_size
)
self
.
bias
=
torch
.
nn
.
Parameter
(
torch
.
zeros
(
hidden_size
))
self
.
bias
=
nn
.
Parameter
(
torch
.
zeros
(
hidden_size
))
def
forward
(
self
,
x
):
def
forward
(
self
,
x
):
h
=
self
.
ln1
(
torch
.
nn
.
functional
.
relu
(
self
.
linear1
(
x
)))
h
=
self
.
ln1
(
nn
.
functional
.
relu
(
self
.
linear1
(
x
)))
h
=
torch
.
nn
.
functional
.
relu
(
self
.
linear2
(
x
))
h
=
nn
.
functional
.
relu
(
self
.
linear2
(
x
))
return
self
.
ln2
(
x
+
h
+
self
.
bias
)
return
self
.
ln2
(
x
+
h
+
self
.
bias
)
class
RandomIterableDataset
(
IterableDataset
):
class
RandomIterableDataset
(
IterableDataset
):
...
@@ -151,10 +152,10 @@ class TrainerUtilsTest(unittest.TestCase):
...
@@ -151,10 +152,10 @@ class TrainerUtilsTest(unittest.TestCase):
num_labels
=
12
num_labels
=
12
random_logits
=
torch
.
randn
(
4
,
5
,
num_labels
)
random_logits
=
torch
.
randn
(
4
,
5
,
num_labels
)
random_labels
=
torch
.
randint
(
0
,
num_labels
,
(
4
,
5
))
random_labels
=
torch
.
randint
(
0
,
num_labels
,
(
4
,
5
))
loss
=
torch
.
nn
.
functional
.
cross_entropy
(
random_logits
.
view
(
-
1
,
num_labels
),
random_labels
.
view
(
-
1
))
loss
=
nn
.
functional
.
cross_entropy
(
random_logits
.
view
(
-
1
,
num_labels
),
random_labels
.
view
(
-
1
))
model_output
=
SequenceClassifierOutput
(
logits
=
random_logits
)
model_output
=
SequenceClassifierOutput
(
logits
=
random_logits
)
label_smoothed_loss
=
LabelSmoother
(
0.1
)(
model_output
,
random_labels
)
label_smoothed_loss
=
LabelSmoother
(
0.1
)(
model_output
,
random_labels
)
log_probs
=
-
torch
.
nn
.
functional
.
log_softmax
(
random_logits
,
dim
=-
1
)
log_probs
=
-
nn
.
functional
.
log_softmax
(
random_logits
,
dim
=-
1
)
expected_loss
=
(
1
-
epsilon
)
*
loss
+
epsilon
*
log_probs
.
mean
()
expected_loss
=
(
1
-
epsilon
)
*
loss
+
epsilon
*
log_probs
.
mean
()
self
.
assertTrue
(
torch
.
allclose
(
label_smoothed_loss
,
expected_loss
))
self
.
assertTrue
(
torch
.
allclose
(
label_smoothed_loss
,
expected_loss
))
...
@@ -163,10 +164,10 @@ class TrainerUtilsTest(unittest.TestCase):
...
@@ -163,10 +164,10 @@ class TrainerUtilsTest(unittest.TestCase):
random_labels
[
2
,
1
]
=
-
100
random_labels
[
2
,
1
]
=
-
100
random_labels
[
2
,
3
]
=
-
100
random_labels
[
2
,
3
]
=
-
100
loss
=
torch
.
nn
.
functional
.
cross_entropy
(
random_logits
.
view
(
-
1
,
num_labels
),
random_labels
.
view
(
-
1
))
loss
=
nn
.
functional
.
cross_entropy
(
random_logits
.
view
(
-
1
,
num_labels
),
random_labels
.
view
(
-
1
))
model_output
=
SequenceClassifierOutput
(
logits
=
random_logits
)
model_output
=
SequenceClassifierOutput
(
logits
=
random_logits
)
label_smoothed_loss
=
LabelSmoother
(
0.1
)(
model_output
,
random_labels
)
label_smoothed_loss
=
LabelSmoother
(
0.1
)(
model_output
,
random_labels
)
log_probs
=
-
torch
.
nn
.
functional
.
log_softmax
(
random_logits
,
dim
=-
1
)
log_probs
=
-
nn
.
functional
.
log_softmax
(
random_logits
,
dim
=-
1
)
# Mask the log probs with the -100 labels
# Mask the log probs with the -100 labels
log_probs
[
0
,
1
]
=
0.0
log_probs
[
0
,
1
]
=
0.0
log_probs
[
2
,
1
]
=
0.0
log_probs
[
2
,
1
]
=
0.0
...
@@ -230,10 +231,10 @@ class TrainerUtilsTest(unittest.TestCase):
...
@@ -230,10 +231,10 @@ class TrainerUtilsTest(unittest.TestCase):
self
.
assertEqual
(
list
(
sorted
(
indices_process_0
+
indices_process_1
)),
list
(
range
(
100
)))
self
.
assertEqual
(
list
(
sorted
(
indices_process_0
+
indices_process_1
)),
list
(
range
(
100
)))
def
test_get_parameter_names
(
self
):
def
test_get_parameter_names
(
self
):
model
=
torch
.
nn
.
Sequential
(
TstLayer
(
128
),
torch
.
nn
.
ModuleList
([
TstLayer
(
128
),
TstLayer
(
128
)]))
model
=
nn
.
Sequential
(
TstLayer
(
128
),
nn
.
ModuleList
([
TstLayer
(
128
),
TstLayer
(
128
)]))
# fmt: off
# fmt: off
self
.
assertEqual
(
self
.
assertEqual
(
get_parameter_names
(
model
,
[
torch
.
nn
.
LayerNorm
]),
get_parameter_names
(
model
,
[
nn
.
LayerNorm
]),
[
'0.linear1.weight'
,
'0.linear1.bias'
,
'0.linear2.weight'
,
'0.linear2.bias'
,
'0.bias'
,
'1.0.linear1.weight'
,
'1.0.linear1.bias'
,
'1.0.linear2.weight'
,
'1.0.linear2.bias'
,
'1.0.bias'
,
'1.1.linear1.weight'
,
'1.1.linear1.bias'
,
'1.1.linear2.weight'
,
'1.1.linear2.bias'
,
'1.1.bias'
]
[
'0.linear1.weight'
,
'0.linear1.bias'
,
'0.linear2.weight'
,
'0.linear2.bias'
,
'0.bias'
,
'1.0.linear1.weight'
,
'1.0.linear1.bias'
,
'1.0.linear2.weight'
,
'1.0.linear2.bias'
,
'1.0.bias'
,
'1.1.linear1.weight'
,
'1.1.linear1.bias'
,
'1.1.linear2.weight'
,
'1.1.linear2.bias'
,
'1.1.bias'
]
)
)
# fmt: on
# fmt: on
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment