Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
chenpangpang
transformers
Commits
34bdc8b5
"...git@developer.sourcefind.cn:chenpangpang/transformers.git" did not exist on "dd4df80f0b77c8f8e07e502298df0121cada9ce8"
Commit
34bdc8b5
authored
Nov 09, 2018
by
thomwolf
Browse files
remove duplicate accumulate gradient step arguments
parent
0c24db9d
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
8 additions
and
16 deletions
+8
-16
run_classifier.py
run_classifier.py
+4
-8
run_squad.py
run_squad.py
+4
-8
No files found.
run_classifier.py
View file @
34bdc8b5
...
...
@@ -392,10 +392,6 @@ def main():
default
=
False
,
action
=
'store_true'
,
help
=
"Whether not to use CUDA when available"
)
parser
.
add_argument
(
"--accumulate_gradients"
,
type
=
int
,
default
=
1
,
help
=
"Number of steps to accumulate gradient on (divide the batch_size and accumulate)"
)
parser
.
add_argument
(
"--local_rank"
,
type
=
int
,
default
=-
1
,
...
...
@@ -426,11 +422,11 @@ def main():
torch
.
distributed
.
init_process_group
(
backend
=
'nccl'
)
logger
.
info
(
"device %s n_gpu %d distributed training %r"
,
device
,
n_gpu
,
bool
(
args
.
local_rank
!=
-
1
))
if
args
.
accumulate_gradient
s
<
1
:
raise
ValueError
(
"Invalid
accumulate_gradient
s parameter: {}, should be >= 1"
.
format
(
args
.
accumulate_gradient
s
))
if
args
.
gradient_accumulation_step
s
<
1
:
raise
ValueError
(
"Invalid
gradient_accumulation_step
s parameter: {}, should be >= 1"
.
format
(
args
.
gradient_accumulation_step
s
))
args
.
train_batch_size
=
int
(
args
.
train_batch_size
/
args
.
accumulate_gradient
s
)
args
.
train_batch_size
=
int
(
args
.
train_batch_size
/
args
.
gradient_accumulation_step
s
)
random
.
seed
(
args
.
seed
)
np
.
random
.
seed
(
args
.
seed
)
...
...
run_squad.py
View file @
34bdc8b5
...
...
@@ -731,10 +731,6 @@ def main():
type
=
int
,
default
=-
1
,
help
=
"local_rank for distributed training on gpus"
)
parser
.
add_argument
(
"--accumulate_gradients"
,
type
=
int
,
default
=
1
,
help
=
"Number of steps to accumulate gradient on (divide the batch_size and accumulate)"
)
parser
.
add_argument
(
'--seed'
,
type
=
int
,
default
=
42
,
...
...
@@ -756,11 +752,11 @@ def main():
torch
.
distributed
.
init_process_group
(
backend
=
'nccl'
)
logger
.
info
(
"device %s n_gpu %d distributed training %r"
,
device
,
n_gpu
,
bool
(
args
.
local_rank
!=
-
1
))
if
args
.
accumulate_gradient
s
<
1
:
raise
ValueError
(
"Invalid
accumulate_gradient
s parameter: {}, should be >= 1"
.
format
(
args
.
accumulate_gradient
s
))
if
args
.
gradient_accumulation_step
s
<
1
:
raise
ValueError
(
"Invalid
gradient_accumulation_step
s parameter: {}, should be >= 1"
.
format
(
args
.
gradient_accumulation_step
s
))
args
.
train_batch_size
=
int
(
args
.
train_batch_size
/
args
.
accumulate_gradient
s
)
args
.
train_batch_size
=
int
(
args
.
train_batch_size
/
args
.
gradient_accumulation_step
s
)
random
.
seed
(
args
.
seed
)
np
.
random
.
seed
(
args
.
seed
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment