Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
92a782b1
"examples/nas/oneshot/vscode:/vscode.git/clone" did not exist on "f27b8741900b434c484cdfd1a656443f7a824a7c"
Commit
92a782b1
authored
Jul 11, 2019
by
thomwolf
Browse files
fix run_glue test
parent
ccb6947d
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
17 additions
and
11 deletions
+17
-11
examples/run_glue.py
examples/run_glue.py
+12
-6
pytorch_transformers/optimization.py
pytorch_transformers/optimization.py
+5
-5
No files found.
examples/run_glue.py
View file @
92a782b1
...
...
@@ -53,6 +53,15 @@ MODEL_CLASSES = {
'xlm'
:
(
XLMConfig
,
XLMForSequenceClassification
,
XLMTokenizer
),
}
def
set_seed
(
args
):
random
.
seed
(
args
.
seed
)
np
.
random
.
seed
(
args
.
seed
)
torch
.
manual_seed
(
args
.
seed
)
if
args
.
n_gpu
>
0
:
torch
.
cuda
.
manual_seed_all
(
args
.
seed
)
def
train
(
args
,
train_dataset
,
model
,
tokenizer
):
""" Train the model """
if
args
.
local_rank
in
[
-
1
,
0
]:
...
...
@@ -97,6 +106,7 @@ def train(args, train_dataset, model, tokenizer):
tr_loss
,
logging_loss
=
0.0
,
0.0
model
.
zero_grad
()
train_iterator
=
trange
(
int
(
args
.
num_train_epochs
),
desc
=
"Epoch"
,
disable
=
args
.
local_rank
not
in
[
-
1
,
0
])
set_seed
(
args
)
# Added here for reproductibility (even between python 2 and 3)
for
_
in
train_iterator
:
epoch_iterator
=
tqdm
(
train_dataloader
,
desc
=
"Iteration"
,
disable
=
args
.
local_rank
not
in
[
-
1
,
0
])
for
step
,
batch
in
enumerate
(
epoch_iterator
):
...
...
@@ -371,12 +381,8 @@ def main():
logger
.
warning
(
"Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s"
,
args
.
local_rank
,
device
,
args
.
n_gpu
,
bool
(
args
.
local_rank
!=
-
1
),
args
.
fp16
)
# Setup seeds
random
.
seed
(
args
.
seed
)
np
.
random
.
seed
(
args
.
seed
)
torch
.
manual_seed
(
args
.
seed
)
if
args
.
n_gpu
>
0
:
torch
.
cuda
.
manual_seed_all
(
args
.
seed
)
# Set seed
set_seed
(
args
)
# Prepare GLUE task
args
.
task_name
=
args
.
task_name
.
lower
()
...
...
pytorch_transformers/optimization.py
View file @
92a782b1
...
...
@@ -167,14 +167,14 @@ class AdamW(Optimizer):
# Decay the first and second moment running average coefficient
# In-place operations to update the averages at the same time
exp_avg
.
mul_
(
beta1
).
add_
(
1
-
beta1
,
grad
)
exp_avg_sq
.
mul_
(
beta2
).
addcmul_
(
1
-
beta2
,
grad
,
grad
)
exp_avg
.
mul_
(
beta1
).
add_
(
1
.0
-
beta1
,
grad
)
exp_avg_sq
.
mul_
(
beta2
).
addcmul_
(
1
.0
-
beta2
,
grad
,
grad
)
denom
=
exp_avg_sq
.
sqrt
().
add_
(
group
[
'eps'
])
step_size
=
group
[
'lr'
]
if
group
[
'correct_bias'
]:
# No bias correction for Bert
bias_correction1
=
1
-
beta1
**
state
[
'step'
]
bias_correction2
=
1
-
beta2
**
state
[
'step'
]
bias_correction1
=
1
.0
-
beta1
**
state
[
'step'
]
bias_correction2
=
1
.0
-
beta2
**
state
[
'step'
]
step_size
=
step_size
*
math
.
sqrt
(
bias_correction2
)
/
bias_correction1
p
.
data
.
addcdiv_
(
-
step_size
,
exp_avg
,
denom
)
...
...
@@ -187,7 +187,7 @@ class AdamW(Optimizer):
# with the m/v parameters. This is equivalent to adding the square
# of the weights to the loss with plain (non-momentum) SGD.
# Add weight decay at the end (fixed version)
if
group
[
'weight_decay'
]
>
0
:
if
group
[
'weight_decay'
]
>
0.
0
:
p
.
data
.
add_
(
-
group
[
'lr'
]
*
group
[
'weight_decay'
],
p
.
data
)
return
loss
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment