Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
48b412c3
Commit
48b412c3
authored
Nov 01, 2020
by
Vishnu Banna
Browse files
currently training darknet
parent
6d3cfef4
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
29 additions
and
24 deletions
+29
-24
official/vision/beta/projects/yolo/configs/experiments/darknet53_tfds.yaml
...eta/projects/yolo/configs/experiments/darknet53_tfds.yaml
+13
-9
official/vision/beta/projects/yolo/tasks/image_classification.py
...l/vision/beta/projects/yolo/tasks/image_classification.py
+3
-2
training_dir/params.yaml
training_dir/params.yaml
+13
-13
No files found.
official/vision/beta/projects/yolo/configs/experiments/darknet53_tfds.yaml
View file @
48b412c3
runtime
:
distribution_strategy
:
'
mirrored'
mixed_precision_dtype
:
'
float16'
loss_scale
:
'
dynamic'
num_gpus
:
2
task
:
model
:
num_classes
:
1001
...
...
@@ -18,21 +20,23 @@ task:
tfds_data_dir
:
'
~/tensorflow_datasets/'
tfds_download
:
true
is_training
:
true
global_batch_size
:
128
global_batch_size
:
16
#default =
128
dtype
:
'
float16'
shuffle_buffer_size
:
100
validation_data
:
tfds_name
:
'
imagenet2012'
tfds_split
:
'
validation'
tfds_data_dir
:
'
~/tensorflow_datasets/'
tfds_download
:
true
is_training
:
true
global_batch_size
:
128
global_batch_size
:
16
#default =
128
dtype
:
'
float16'
drop_remainder
:
false
shuffle_buffer_size
:
100
trainer
:
train_steps
:
8
00000
# epochs: 80
validation_steps
:
4
00
# size of validation data
validation_interval
:
5
00
#10000
train_steps
:
64
00000
# epochs: 80
, 800000 * 128/batchsize
validation_steps
:
32
00
# size of validation data
, 400 * 128/batchsize
validation_interval
:
100
00
#10000
steps_per_loop
:
10000
summary_interval
:
10000
checkpoint_interval
:
10000
...
...
@@ -44,11 +48,11 @@ trainer:
learning_rate
:
type
:
'
polynomial'
polynomial
:
initial_learning_rate
:
0.1
end_learning_rate
:
0.0001
initial_learning_rate
:
0.0125
# 0.1 * batchsize/128, default =
0.1
end_learning_rate
:
0.0000125
# 0.0001 * batchsize/128, default =
0.0001
power
:
4.0
decay_steps
:
799000
decay_steps
:
6392000
# 790000 * 128/batchsize, default = 800000 - 1000 =
799000
warmup
:
type
:
'
linear'
linear
:
warmup_steps
:
1
000
#
learning rate rises from 0 to 0.1 over 1000 steps
warmup_steps
:
8
000
#
0 to 0.1 over 1000 * 128/batchsize, default = 128
official/vision/beta/projects/yolo/tasks/image_classification.py
View file @
48b412c3
...
...
@@ -141,7 +141,8 @@ class ImageClassificationTask(base_task.Task):
# Computes per-replica loss.
loss
=
self
.
build_losses
(
model_outputs
=
outputs
,
labels
=
labels
,
aux_losses
=
model
.
losses
)
# Scales loss as the default gradients allreduce performs sum inside the
#Scales loss as the default gradients allreduce performs sum inside the
# optimizer.
scaled_loss
=
loss
/
num_replicas
...
...
@@ -150,7 +151,7 @@ class ImageClassificationTask(base_task.Task):
if
isinstance
(
optimizer
,
tf
.
keras
.
mixed_precision
.
experimental
.
LossScaleOptimizer
):
scaled_loss
=
optimizer
.
get_scaled_loss
(
scaled_loss
)
tf
.
print
(
"batch loss: "
,
loss
,
end
=
"
\r
"
)
tvars
=
model
.
trainable_variables
grads
=
tape
.
gradient
(
scaled_loss
,
tvars
)
# Scales back gradient before apply_gradients when LossScaleOptimizer is
...
...
training_dir/params.yaml
View file @
48b412c3
...
...
@@ -6,10 +6,10 @@ runtime:
distribution_strategy
:
mirrored
enable_xla
:
false
gpu_thread_mode
:
null
loss_scale
:
null
loss_scale
:
dynamic
mixed_precision_dtype
:
float16
num_cores_per_replica
:
1
num_gpus
:
0
num_gpus
:
2
num_packs
:
1
per_gpu_thread_count
:
0
run_eagerly
:
false
...
...
@@ -46,11 +46,11 @@ task:
drop_remainder
:
true
dtype
:
float16
enable_tf_data_service
:
false
global_batch_size
:
1
28
global_batch_size
:
1
6
input_path
:
'
'
is_training
:
true
sharding
:
true
shuffle_buffer_size
:
100
00
shuffle_buffer_size
:
100
tf_data_service_address
:
null
tf_data_service_job_name
:
null
tfds_as_supervised
:
false
...
...
@@ -67,11 +67,11 @@ task:
drop_remainder
:
false
dtype
:
float16
enable_tf_data_service
:
false
global_batch_size
:
1
28
global_batch_size
:
1
6
input_path
:
'
'
is_training
:
true
sharding
:
true
shuffle_buffer_size
:
100
00
shuffle_buffer_size
:
100
tf_data_service_address
:
null
tf_data_service_job_name
:
null
tfds_as_supervised
:
false
...
...
@@ -94,9 +94,9 @@ trainer:
learning_rate
:
polynomial
:
cycle
:
false
decay_steps
:
799
000
end_learning_rate
:
0.0001
initial_learning_rate
:
0.
1
decay_steps
:
6392
000
end_learning_rate
:
1.25e-05
initial_learning_rate
:
0.
0125
name
:
PolynomialDecay
power
:
4.0
type
:
polynomial
...
...
@@ -113,12 +113,12 @@ trainer:
linear
:
name
:
linear
warmup_learning_rate
:
0
warmup_steps
:
1
000
warmup_steps
:
8
000
type
:
linear
steps_per_loop
:
10000
summary_interval
:
10000
train_steps
:
8
00000
train_steps
:
64
00000
train_tf_function
:
true
train_tf_while_loop
:
true
validation_interval
:
5
00
validation_steps
:
4
00
validation_interval
:
100
00
validation_steps
:
32
00
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment