Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
91ff146d
Commit
91ff146d
authored
Jan 25, 2016
by
Lukasz Kaiser
Browse files
Move to batch-size 32 to fit into 4GB GPUs, start adapting hyperparameters.
parent
47ab157a
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
6 additions
and
6 deletions
+6
-6
neural_gpu/neural_gpu.py
neural_gpu/neural_gpu.py
+1
-1
neural_gpu/neural_gpu_trainer.py
neural_gpu/neural_gpu_trainer.py
+5
-5
No files found.
neural_gpu/neural_gpu.py
View file @
91ff146d
...
...
@@ -151,7 +151,7 @@ class NeuralGPU(object):
tf
.
constant
(
0
,
dtype
=
tf
.
int32
,
shape
=
[
1
]),
tf
.
zeros
([
1
,
vec_size
]))
adam
=
tf
.
train
.
AdamOptimizer
(
0.01
*
self
.
lr
,
epsilon
=
1e-
5
)
adam
=
tf
.
train
.
AdamOptimizer
(
0.01
*
self
.
lr
,
epsilon
=
1e-
4
)
# Main graph creation loop, for every bin in data_utils.
self
.
steps
=
[]
...
...
neural_gpu/neural_gpu_trainer.py
View file @
91ff146d
...
...
@@ -31,16 +31,16 @@ from tensorflow.python.platform import gfile
import
data_utils
as
data
import
neural_gpu
tf
.
app
.
flags
.
DEFINE_float
(
"lr"
,
0.
1
,
"Learning rate."
)
tf
.
app
.
flags
.
DEFINE_float
(
"lr"
,
0.
3
,
"Learning rate."
)
tf
.
app
.
flags
.
DEFINE_float
(
"init_weight"
,
1.0
,
"Initial weights deviation."
)
tf
.
app
.
flags
.
DEFINE_float
(
"max_grad_norm"
,
0.05
,
"Clip gradients to this norm."
)
tf
.
app
.
flags
.
DEFINE_float
(
"cutoff"
,
1.2
,
"Cutoff at the gates."
)
tf
.
app
.
flags
.
DEFINE_float
(
"pull"
,
0.0005
,
"Starting pull of the relaxations."
)
tf
.
app
.
flags
.
DEFINE_float
(
"pull_incr"
,
1.2
,
"Increase pull by that much."
)
tf
.
app
.
flags
.
DEFINE_float
(
"curriculum_bound"
,
0.0
6
,
"Move curriculum < this."
)
tf
.
app
.
flags
.
DEFINE_float
(
"curriculum_bound"
,
0.0
8
,
"Move curriculum < this."
)
tf
.
app
.
flags
.
DEFINE_float
(
"dropout"
,
0.15
,
"Dropout that much."
)
tf
.
app
.
flags
.
DEFINE_float
(
"grad_noise_scale"
,
1.0
,
"Gradient noise scale."
)
tf
.
app
.
flags
.
DEFINE_integer
(
"batch_size"
,
64
,
"Batch size."
)
tf
.
app
.
flags
.
DEFINE_integer
(
"batch_size"
,
32
,
"Batch size."
)
tf
.
app
.
flags
.
DEFINE_integer
(
"low_batch_size"
,
16
,
"Low batch size."
)
tf
.
app
.
flags
.
DEFINE_integer
(
"steps_per_checkpoint"
,
200
,
"Steps per epoch."
)
tf
.
app
.
flags
.
DEFINE_integer
(
"nmaps"
,
24
,
"Number of floats in each cell."
)
...
...
@@ -256,7 +256,7 @@ def train():
if
max_cur_length
<
max_length
:
prev_acc_perp
.
append
(
1000000
)
# Either increase pull or, if it's large, average parameters.
if
pull
<
1
:
if
pull
<
0.
1
:
sess
.
run
(
model
.
pull_incr_op
)
else
:
data
.
print_out
(
" Averaging parameters."
)
...
...
@@ -283,7 +283,7 @@ def train():
l
+=
1
while
l
<
bound
+
1
and
not
data
.
test_set
[
t
][
l
]:
l
+=
1
if
seq_err
<
0.5
:
# Run larger test if we're good enough.
if
seq_err
<
0.
0
5
:
# Run larger test if we're good enough.
_
,
seq_err
=
multi_test
(
data
.
forward_max
,
model
,
sess
,
t
,
FLAGS
.
nprint
,
batch_size
*
4
)
if
seq_err
<
0.01
:
# Super-large test on 1-task large-forward models.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment