Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
ccb6947d
Commit
ccb6947d
authored
Jul 11, 2019
by
thomwolf
Browse files
optimization tests
parent
e4f9dca0
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
93 additions
and
51 deletions
+93
-51
examples/run_glue.py
examples/run_glue.py
+18
-11
examples/test_examples.py
examples/test_examples.py
+12
-3
pytorch_transformers/optimization.py
pytorch_transformers/optimization.py
+12
-11
pytorch_transformers/tests/optimization_test.py
pytorch_transformers/tests/optimization_test.py
+51
-26
No files found.
examples/run_glue.py
View file @
ccb6947d
...
@@ -96,8 +96,10 @@ def train(args, train_dataset, model, tokenizer):
...
@@ -96,8 +96,10 @@ def train(args, train_dataset, model, tokenizer):
global_step
=
0
global_step
=
0
tr_loss
,
logging_loss
=
0.0
,
0.0
tr_loss
,
logging_loss
=
0.0
,
0.0
model
.
zero_grad
()
model
.
zero_grad
()
for
_
in
trange
(
int
(
args
.
num_train_epochs
),
desc
=
"Epoch"
,
disable
=
args
.
local_rank
not
in
[
-
1
,
0
]):
train_iterator
=
trange
(
int
(
args
.
num_train_epochs
),
desc
=
"Epoch"
,
disable
=
args
.
local_rank
not
in
[
-
1
,
0
])
for
step
,
batch
in
enumerate
(
tqdm
(
train_dataloader
,
desc
=
"Iteration"
,
disable
=
args
.
local_rank
not
in
[
-
1
,
0
])):
for
_
in
train_iterator
:
epoch_iterator
=
tqdm
(
train_dataloader
,
desc
=
"Iteration"
,
disable
=
args
.
local_rank
not
in
[
-
1
,
0
])
for
step
,
batch
in
enumerate
(
epoch_iterator
):
model
.
train
()
model
.
train
()
batch
=
tuple
(
t
.
to
(
args
.
device
)
for
t
in
batch
)
batch
=
tuple
(
t
.
to
(
args
.
device
)
for
t
in
batch
)
inputs
=
{
'input_ids'
:
batch
[
0
],
inputs
=
{
'input_ids'
:
batch
[
0
],
...
@@ -129,7 +131,7 @@ def train(args, train_dataset, model, tokenizer):
...
@@ -129,7 +131,7 @@ def train(args, train_dataset, model, tokenizer):
if
args
.
local_rank
in
[
-
1
,
0
]
and
args
.
logging_steps
>
0
and
global_step
%
args
.
logging_steps
==
0
:
if
args
.
local_rank
in
[
-
1
,
0
]
and
args
.
logging_steps
>
0
and
global_step
%
args
.
logging_steps
==
0
:
# Log metrics
# Log metrics
if
args
.
local_rank
==
-
1
:
# Only evaluate when single GPU otherwise metrics may not average well
if
args
.
local_rank
==
-
1
and
args
.
evaluate_during_training
:
# Only evaluate when single GPU otherwise metrics may not average well
results
=
evaluate
(
args
,
model
,
tokenizer
)
results
=
evaluate
(
args
,
model
,
tokenizer
)
for
key
,
value
in
results
.
items
():
for
key
,
value
in
results
.
items
():
tb_writer
.
add_scalar
(
'eval_{}'
.
format
(
key
),
value
,
global_step
)
tb_writer
.
add_scalar
(
'eval_{}'
.
format
(
key
),
value
,
global_step
)
...
@@ -148,8 +150,10 @@ def train(args, train_dataset, model, tokenizer):
...
@@ -148,8 +150,10 @@ def train(args, train_dataset, model, tokenizer):
logger
.
info
(
"Saving model checkpoint to %s"
,
output_dir
)
logger
.
info
(
"Saving model checkpoint to %s"
,
output_dir
)
if
args
.
max_steps
>
0
and
global_step
>
args
.
max_steps
:
if
args
.
max_steps
>
0
and
global_step
>
args
.
max_steps
:
epoch_iterator
.
close
()
break
break
if
args
.
max_steps
>
0
and
global_step
>
args
.
max_steps
:
if
args
.
max_steps
>
0
and
global_step
>
args
.
max_steps
:
train_iterator
.
close
()
break
break
return
global_step
,
tr_loss
/
global_step
return
global_step
,
tr_loss
/
global_step
...
@@ -164,11 +168,10 @@ def evaluate(args, model, tokenizer, prefix=""):
...
@@ -164,11 +168,10 @@ def evaluate(args, model, tokenizer, prefix=""):
for
eval_task
,
eval_output_dir
in
zip
(
eval_task_names
,
eval_outputs_dirs
):
for
eval_task
,
eval_output_dir
in
zip
(
eval_task_names
,
eval_outputs_dirs
):
eval_dataset
=
load_and_cache_examples
(
args
,
eval_task
,
tokenizer
,
evaluate
=
True
)
eval_dataset
=
load_and_cache_examples
(
args
,
eval_task
,
tokenizer
,
evaluate
=
True
)
""" Evaluate the model """
if
not
os
.
path
.
exists
(
eval_output_dir
)
and
args
.
local_rank
in
[
-
1
,
0
]:
if
not
os
.
path
.
exists
(
eval_output_dir
)
and
args
.
local_rank
in
[
-
1
,
0
]:
os
.
makedirs
(
eval_output_dir
)
os
.
makedirs
(
eval_output_dir
)
args
.
eval_batch_size
=
args
.
per_gpu_eval_batch_size
*
args
.
n_gpu
args
.
eval_batch_size
=
args
.
per_gpu_eval_batch_size
*
max
(
1
,
args
.
n_gpu
)
# Note that DistributedSampler samples randomly
# Note that DistributedSampler samples randomly
eval_sampler
=
SequentialSampler
(
eval_dataset
)
if
args
.
local_rank
==
-
1
else
DistributedSampler
(
eval_dataset
)
eval_sampler
=
SequentialSampler
(
eval_dataset
)
if
args
.
local_rank
==
-
1
else
DistributedSampler
(
eval_dataset
)
eval_dataloader
=
DataLoader
(
eval_dataset
,
sampler
=
eval_sampler
,
batch_size
=
args
.
eval_batch_size
)
eval_dataloader
=
DataLoader
(
eval_dataset
,
sampler
=
eval_sampler
,
batch_size
=
args
.
eval_batch_size
)
...
@@ -177,7 +180,7 @@ def evaluate(args, model, tokenizer, prefix=""):
...
@@ -177,7 +180,7 @@ def evaluate(args, model, tokenizer, prefix=""):
logger
.
info
(
"***** Running evaluation {} *****"
.
format
(
prefix
))
logger
.
info
(
"***** Running evaluation {} *****"
.
format
(
prefix
))
logger
.
info
(
" Num examples = %d"
,
len
(
eval_dataset
))
logger
.
info
(
" Num examples = %d"
,
len
(
eval_dataset
))
logger
.
info
(
" Batch size = %d"
,
args
.
eval_batch_size
)
logger
.
info
(
" Batch size = %d"
,
args
.
eval_batch_size
)
eval_loss
=
0
eval_loss
=
0
.0
nb_eval_steps
=
0
nb_eval_steps
=
0
preds
=
None
preds
=
None
out_label_ids
=
None
out_label_ids
=
None
...
@@ -287,6 +290,8 @@ def main():
...
@@ -287,6 +290,8 @@ def main():
help
=
"Whether to run training."
)
help
=
"Whether to run training."
)
parser
.
add_argument
(
"--do_eval"
,
action
=
'store_true'
,
parser
.
add_argument
(
"--do_eval"
,
action
=
'store_true'
,
help
=
"Whether to run eval on the dev set."
)
help
=
"Whether to run eval on the dev set."
)
parser
.
add_argument
(
"--evaluate_during_training"
,
action
=
'store_true'
,
help
=
"Rul evaluation during training at each logging step."
)
parser
.
add_argument
(
"--do_lower_case"
,
action
=
'store_true'
,
parser
.
add_argument
(
"--do_lower_case"
,
action
=
'store_true'
,
help
=
"Set this flag if you are using an uncased model."
)
help
=
"Set this flag if you are using an uncased model."
)
...
@@ -364,7 +369,7 @@ def main():
...
@@ -364,7 +369,7 @@ def main():
datefmt
=
'%m/%d/%Y %H:%M:%S'
,
datefmt
=
'%m/%d/%Y %H:%M:%S'
,
level
=
logging
.
INFO
if
args
.
local_rank
in
[
-
1
,
0
]
else
logging
.
WARN
)
level
=
logging
.
INFO
if
args
.
local_rank
in
[
-
1
,
0
]
else
logging
.
WARN
)
logger
.
warning
(
"Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s"
,
logger
.
warning
(
"Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s"
,
args
.
local_rank
,
device
,
args
.
n_gpu
,
bool
(
args
.
local_rank
!=
-
1
),
args
.
fp16
)
args
.
local_rank
,
device
,
args
.
n_gpu
,
bool
(
args
.
local_rank
!=
-
1
),
args
.
fp16
)
# Setup seeds
# Setup seeds
random
.
seed
(
args
.
seed
)
random
.
seed
(
args
.
seed
)
...
@@ -409,6 +414,8 @@ def main():
...
@@ -409,6 +414,8 @@ def main():
elif
args
.
n_gpu
>
1
:
elif
args
.
n_gpu
>
1
:
model
=
torch
.
nn
.
DataParallel
(
model
)
model
=
torch
.
nn
.
DataParallel
(
model
)
logger
.
info
(
"Training/evaluation parameters %s"
,
args
)
# Training
# Training
if
args
.
do_train
:
if
args
.
do_train
:
train_dataset
=
load_and_cache_examples
(
args
,
args
.
task_name
,
tokenizer
,
evaluate
=
False
)
train_dataset
=
load_and_cache_examples
(
args
,
args
.
task_name
,
tokenizer
,
evaluate
=
False
)
...
@@ -438,22 +445,22 @@ def main():
...
@@ -438,22 +445,22 @@ def main():
model
.
to
(
args
.
device
)
model
.
to
(
args
.
device
)
# Evaluation
# Evaluation
results
=
{}
if
args
.
do_eval
and
args
.
local_rank
in
[
-
1
,
0
]:
if
args
.
do_eval
and
args
.
local_rank
in
[
-
1
,
0
]:
checkpoints
=
[
args
.
output_dir
+
'./'
+
WEIGHTS_NAME
]
checkpoints
=
[
args
.
output_dir
]
if
args
.
eval_all_checkpoints
:
if
args
.
eval_all_checkpoints
:
checkpoints
=
list
(
os
.
path
.
dirname
(
c
)
for
c
in
sorted
(
glob
.
glob
(
args
.
output_dir
+
'/**/'
+
WEIGHTS_NAME
,
recursive
=
True
)))
checkpoints
=
list
(
os
.
path
.
dirname
(
c
)
for
c
in
sorted
(
glob
.
glob
(
args
.
output_dir
+
'/**/'
+
WEIGHTS_NAME
,
recursive
=
True
)))
logging
.
getLogger
(
"pytorch_transformers.modeling_utils"
).
setLevel
(
logging
.
WARN
)
# Reduce logging
logging
.
getLogger
(
"pytorch_transformers.modeling_utils"
).
setLevel
(
logging
.
WARN
)
# Reduce logging
logger
.
info
(
"Evaluate the following checkpoints: %s"
,
checkpoints
)
logger
.
info
(
"Evaluate the following checkpoints: %s"
,
checkpoints
)
results
=
{}
for
checkpoint
in
checkpoints
:
for
checkpoint
in
checkpoints
:
global_step
=
int
(
checkpoint
.
split
(
'-'
)[
-
1
]
)
global_step
=
checkpoint
.
split
(
'-'
)[
-
1
]
model
=
model_class
.
from_pretrained
(
checkpoint
)
model
=
model_class
.
from_pretrained
(
checkpoint
)
model
.
to
(
args
.
device
)
model
.
to
(
args
.
device
)
result
=
evaluate
(
args
,
model
,
tokenizer
,
prefix
=
global_step
)
result
=
evaluate
(
args
,
model
,
tokenizer
,
prefix
=
global_step
)
result
=
dict
((
k
+
'_{}'
.
format
(
global_step
),
v
)
for
k
,
v
in
result
.
items
())
result
=
dict
((
k
+
'_{}'
.
format
(
global_step
),
v
)
for
k
,
v
in
result
.
items
())
results
.
update
(
result
)
results
.
update
(
result
)
return
results
return
results
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
examples/test_examples.py
View file @
ccb6947d
...
@@ -45,9 +45,18 @@ class ExamplesTests(unittest.TestCase):
...
@@ -45,9 +45,18 @@ class ExamplesTests(unittest.TestCase):
stream_handler
=
logging
.
StreamHandler
(
sys
.
stdout
)
stream_handler
=
logging
.
StreamHandler
(
sys
.
stdout
)
logger
.
addHandler
(
stream_handler
)
logger
.
addHandler
(
stream_handler
)
testargs
=
[
"run_glue.py"
,
"--data_dir=./examples/tests_samples/MRPC/"
,
testargs
=
[
"run_glue.py"
,
"--task_name=mrpc"
,
"--do_train"
,
"--do_eval"
,
"--output_dir=./examples/tests_samples/temp_dir"
,
"--data_dir=./examples/tests_samples/MRPC/"
,
"--train_batch_size=4"
,
"--eval_batch_size=2"
,
"--num_train_epochs=2.0"
,
"--overwrite_output_dir"
]
"--task_name=mrpc"
,
"--do_train"
,
"--do_eval"
,
"--output_dir=./examples/tests_samples/temp_dir"
,
"--per_gpu_train_batch_size=2"
,
"--per_gpu_eval_batch_size=1"
,
"--learning_rate=1e-4"
,
"--max_steps=10"
,
"--warmup_steps=2"
,
"--overwrite_output_dir"
]
model_name
=
"--model_name=bert-base-uncased"
model_name
=
"--model_name=bert-base-uncased"
with
patch
.
object
(
sys
,
'argv'
,
testargs
+
[
model_name
]):
with
patch
.
object
(
sys
,
'argv'
,
testargs
+
[
model_name
]):
result
=
run_glue
.
main
()
result
=
run_glue
.
main
()
...
...
pytorch_transformers/optimization.py
View file @
ccb6947d
...
@@ -25,7 +25,7 @@ logger = logging.getLogger(__name__)
...
@@ -25,7 +25,7 @@ logger = logging.getLogger(__name__)
class
ConstantLRSchedule
(
LambdaLR
):
class
ConstantLRSchedule
(
LambdaLR
):
def
__init__
(
self
,
optimizer
,
last_epoch
=-
1
):
def
__init__
(
self
,
optimizer
,
last_epoch
=-
1
):
super
(
ConstantLRSchedule
,
self
).
__init__
(
optimizer
,
lambda
x
:
x
,
last_epoch
=
last_epoch
)
super
(
ConstantLRSchedule
,
self
).
__init__
(
optimizer
,
lambda
_
:
1.0
,
last_epoch
=
last_epoch
)
class
WarmupCosineSchedule
(
LambdaLR
):
class
WarmupCosineSchedule
(
LambdaLR
):
"""
"""
...
@@ -42,10 +42,10 @@ class WarmupCosineSchedule(LambdaLR):
...
@@ -42,10 +42,10 @@ class WarmupCosineSchedule(LambdaLR):
def
lr_lambda
(
step
):
def
lr_lambda
(
step
):
if
step
<
warmup_steps
:
if
step
<
warmup_steps
:
return
step
/
max
(
1
,
warmup_steps
)
return
float
(
step
)
/
float
(
max
(
1
.0
,
warmup_steps
)
)
else
:
else
:
progress
=
(
step
-
warmup_steps
)
/
max
(
1
,
t_total
-
warmup_steps
)
# progress after warmup
progress
=
float
(
step
-
warmup_steps
)
/
float
(
max
(
1
,
t_total
-
warmup_steps
)
)
# progress after warmup
return
0.5
*
(
1.
+
math
.
cos
(
math
.
pi
*
cycles
*
2
*
progress
))
return
0.5
*
(
1.
+
math
.
cos
(
math
.
pi
*
float
(
cycles
)
*
2
.0
*
progress
))
super
(
WarmupCosineSchedule
,
self
).
__init__
(
optimizer
,
lr_lambda
,
last_epoch
=
last_epoch
)
super
(
WarmupCosineSchedule
,
self
).
__init__
(
optimizer
,
lr_lambda
,
last_epoch
=
last_epoch
)
...
@@ -59,11 +59,12 @@ class WarmupCosineWithHardRestartsSchedule(LambdaLR):
...
@@ -59,11 +59,12 @@ class WarmupCosineWithHardRestartsSchedule(LambdaLR):
def
lr_lambda
(
step
):
def
lr_lambda
(
step
):
if
step
<
warmup_steps
:
if
step
<
warmup_steps
:
return
step
/
max
(
1
,
warmup_steps
)
return
float
(
step
)
/
float
(
max
(
1
,
warmup_steps
)
)
else
:
else
:
progress
=
(
step
-
warmup_steps
)
/
max
(
1
,
t_total
-
warmup_steps
)
# progress after warmup
progress
=
float
(
step
-
warmup_steps
)
/
float
(
max
(
1
,
t_total
-
warmup_steps
))
# progress after warmup
ret
=
0.5
*
(
1.
+
math
.
cos
(
math
.
pi
*
((
cycles
*
progress
)
%
1
)))
if
progress
>=
1.0
:
return
ret
return
0.0
return
0.5
*
(
1.
+
math
.
cos
(
math
.
pi
*
((
float
(
cycles
)
*
progress
)
%
1.0
)))
super
(
WarmupCosineWithHardRestartsSchedule
,
self
).
__init__
(
optimizer
,
lr_lambda
,
last_epoch
=
last_epoch
)
super
(
WarmupCosineWithHardRestartsSchedule
,
self
).
__init__
(
optimizer
,
lr_lambda
,
last_epoch
=
last_epoch
)
...
@@ -77,7 +78,7 @@ class WarmupConstantSchedule(LambdaLR):
...
@@ -77,7 +78,7 @@ class WarmupConstantSchedule(LambdaLR):
def
lr_lambda
(
step
):
def
lr_lambda
(
step
):
if
step
<
warmup_steps
:
if
step
<
warmup_steps
:
return
step
/
warmup_steps
return
float
(
step
)
/
float
(
max
(
1.0
,
warmup_steps
))
return
1.
return
1.
super
(
WarmupConstantSchedule
,
self
).
__init__
(
optimizer
,
lr_lambda
,
last_epoch
=
last_epoch
)
super
(
WarmupConstantSchedule
,
self
).
__init__
(
optimizer
,
lr_lambda
,
last_epoch
=
last_epoch
)
...
@@ -92,8 +93,8 @@ class WarmupLinearSchedule(LambdaLR):
...
@@ -92,8 +93,8 @@ class WarmupLinearSchedule(LambdaLR):
def
lr_lambda
(
step
):
def
lr_lambda
(
step
):
if
step
<
warmup_steps
:
if
step
<
warmup_steps
:
return
step
/
max
(
1
,
warmup_steps
)
return
float
(
step
)
/
float
(
max
(
1
,
warmup_steps
)
)
return
(
t_total
-
step
)
/
max
(
1
,
t_total
-
warmup_steps
)
return
float
(
t_total
-
step
)
/
float
(
max
(
1
.0
,
t_total
-
warmup_steps
)
)
super
(
WarmupLinearSchedule
,
self
).
__init__
(
optimizer
,
lr_lambda
,
last_epoch
=
last_epoch
)
super
(
WarmupLinearSchedule
,
self
).
__init__
(
optimizer
,
lr_lambda
,
last_epoch
=
last_epoch
)
...
...
pytorch_transformers/tests/optimization_test.py
View file @
ccb6947d
...
@@ -26,6 +26,13 @@ from pytorch_transformers import (AdamW, ConstantLRSchedule, WarmupConstantSched
...
@@ -26,6 +26,13 @@ from pytorch_transformers import (AdamW, ConstantLRSchedule, WarmupConstantSched
import
numpy
as
np
import
numpy
as
np
def
unwrap_schedule
(
scheduler
,
num_steps
=
10
):
lrs
=
[]
for
_
in
range
(
num_steps
):
scheduler
.
step
()
lrs
.
append
(
scheduler
.
get_lr
())
return
lrs
class
OptimizationTest
(
unittest
.
TestCase
):
class
OptimizationTest
(
unittest
.
TestCase
):
def
assertListAlmostEqual
(
self
,
list1
,
list2
,
tol
):
def
assertListAlmostEqual
(
self
,
list1
,
list2
,
tol
):
...
@@ -38,9 +45,7 @@ class OptimizationTest(unittest.TestCase):
...
@@ -38,9 +45,7 @@ class OptimizationTest(unittest.TestCase):
target
=
torch
.
tensor
([
0.4
,
0.2
,
-
0.5
])
target
=
torch
.
tensor
([
0.4
,
0.2
,
-
0.5
])
criterion
=
torch
.
nn
.
MSELoss
()
criterion
=
torch
.
nn
.
MSELoss
()
# No warmup, constant schedule, no gradient clipping
# No warmup, constant schedule, no gradient clipping
optimizer
=
AdamW
(
params
=
[
w
],
lr
=
2e-1
,
optimizer
=
AdamW
(
params
=
[
w
],
lr
=
2e-1
,
weight_decay
=
0.0
)
weight_decay
=
0.0
,
max_grad_norm
=-
1
)
for
_
in
range
(
100
):
for
_
in
range
(
100
):
loss
=
criterion
(
w
,
target
)
loss
=
criterion
(
w
,
target
)
loss
.
backward
()
loss
.
backward
()
...
@@ -51,29 +56,49 @@ class OptimizationTest(unittest.TestCase):
...
@@ -51,29 +56,49 @@ class OptimizationTest(unittest.TestCase):
class
ScheduleInitTest
(
unittest
.
TestCase
):
class
ScheduleInitTest
(
unittest
.
TestCase
):
def
test_sched_init
(
self
):
m
=
torch
.
nn
.
Linear
(
50
,
50
)
m
=
torch
.
nn
.
Linear
(
50
,
50
)
optimizer
=
AdamW
(
m
.
parameters
(),
lr
=
10.
)
optim
=
AdamW
(
m
.
parameters
(),
lr
=
0.001
,
warmup
=
.
1
,
t_total
=
1000
,
schedule
=
None
)
num_steps
=
10
self
.
assertTrue
(
isinstance
(
optim
.
param_groups
[
0
][
"schedule"
],
ConstantLR
))
optim
=
AdamW
(
m
.
parameters
(),
lr
=
0.001
,
warmup
=
.
1
,
t_total
=
1000
,
schedule
=
"none"
)
def
assertListAlmostEqual
(
self
,
list1
,
list2
,
tol
):
self
.
assertTrue
(
isinstance
(
optim
.
param_groups
[
0
][
"schedule"
],
ConstantLR
))
self
.
assertEqual
(
len
(
list1
),
len
(
list2
))
optim
=
AdamW
(
m
.
parameters
(),
lr
=
0.001
,
warmup
=
.
01
,
t_total
=
1000
)
for
a
,
b
in
zip
(
list1
,
list2
):
self
.
assertTrue
(
isinstance
(
optim
.
param_groups
[
0
][
"schedule"
],
WarmupLinearSchedule
))
self
.
assertAlmostEqual
(
a
,
b
,
delta
=
tol
)
# shouldn't fail
def
test_constant_scheduler
(
self
):
scheduler
=
ConstantLRSchedule
(
self
.
optimizer
)
class
WarmupCosineWithRestartsTest
(
unittest
.
TestCase
):
lrs
=
unwrap_schedule
(
scheduler
,
self
.
num_steps
)
def
test_it
(
self
):
expected_learning_rates
=
[
10.
]
*
self
.
num_steps
m
=
WarmupCosineWithWarmupRestartsSchedule
(
warmup
=
0.05
,
t_total
=
1000.
,
cycles
=
5
)
self
.
assertEqual
(
len
(
lrs
[
0
]),
1
)
x
=
np
.
arange
(
0
,
1000
)
self
.
assertListEqual
([
l
[
0
]
for
l
in
lrs
],
expected_learning_rates
)
y
=
[
m
.
get_lr
(
xe
)
for
xe
in
x
]
y
=
np
.
asarray
(
y
)
def
test_warmup_constant_scheduler
(
self
):
expected_zeros
=
y
[[
0
,
200
,
400
,
600
,
800
]]
scheduler
=
WarmupConstantSchedule
(
self
.
optimizer
,
warmup_steps
=
4
)
print
(
expected_zeros
)
lrs
=
unwrap_schedule
(
scheduler
,
self
.
num_steps
)
expected_ones
=
y
[[
50
,
250
,
450
,
650
,
850
]]
expected_learning_rates
=
[
2.5
,
5.0
,
7.5
,
10.0
,
10.0
,
10.0
,
10.0
,
10.0
,
10.0
,
10.0
]
print
(
expected_ones
)
self
.
assertEqual
(
len
(
lrs
[
0
]),
1
)
self
.
assertTrue
(
np
.
allclose
(
expected_ones
,
1
))
self
.
assertListEqual
([
l
[
0
]
for
l
in
lrs
],
expected_learning_rates
)
self
.
assertTrue
(
np
.
allclose
(
expected_zeros
,
0
))
def
test_warmup_linear_scheduler
(
self
):
scheduler
=
WarmupLinearSchedule
(
self
.
optimizer
,
warmup_steps
=
2
,
t_total
=
10
)
lrs
=
unwrap_schedule
(
scheduler
,
self
.
num_steps
)
expected_learning_rates
=
[
5.0
,
10.0
,
8.75
,
7.5
,
6.25
,
5.0
,
3.75
,
2.5
,
1.25
,
0.0
]
self
.
assertEqual
(
len
(
lrs
[
0
]),
1
)
self
.
assertListEqual
([
l
[
0
]
for
l
in
lrs
],
expected_learning_rates
)
def
test_warmup_cosine_scheduler
(
self
):
scheduler
=
WarmupCosineSchedule
(
self
.
optimizer
,
warmup_steps
=
2
,
t_total
=
10
)
lrs
=
unwrap_schedule
(
scheduler
,
self
.
num_steps
)
expected_learning_rates
=
[
5.0
,
10.0
,
9.61
,
8.53
,
6.91
,
5.0
,
3.08
,
1.46
,
0.38
,
0.0
]
self
.
assertEqual
(
len
(
lrs
[
0
]),
1
)
self
.
assertListAlmostEqual
([
l
[
0
]
for
l
in
lrs
],
expected_learning_rates
,
tol
=
1e-2
)
def
test_warmup_cosine_hard_restart_scheduler
(
self
):
scheduler
=
WarmupCosineWithHardRestartsSchedule
(
self
.
optimizer
,
warmup_steps
=
2
,
cycles
=
2
,
t_total
=
10
)
lrs
=
unwrap_schedule
(
scheduler
,
self
.
num_steps
)
expected_learning_rates
=
[
5.0
,
10.0
,
8.53
,
5.0
,
1.46
,
10.0
,
8.53
,
5.0
,
1.46
,
0.0
]
self
.
assertEqual
(
len
(
lrs
[
0
]),
1
)
self
.
assertListAlmostEqual
([
l
[
0
]
for
l
in
lrs
],
expected_learning_rates
,
tol
=
1e-2
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment