Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
c88fcb2b
Commit
c88fcb2b
authored
Oct 11, 2018
by
Shawn Wang
Browse files
Use flagfile to pass flags to data async generation process.
parent
d4ac494f
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
49 additions
and
91 deletions
+49
-91
official/recommendation/data_async_generation.py
official/recommendation/data_async_generation.py
+13
-32
official/recommendation/data_preprocessing.py
official/recommendation/data_preprocessing.py
+36
-59
No files found.
official/recommendation/data_async_generation.py
View file @
c88fcb2b
...
...
@@ -51,7 +51,7 @@ _log_file = None
def
log_msg
(
msg
):
"""Include timestamp info when logging messages to a file."""
if
flags
.
FLAGS
.
use_
command_file
:
if
flags
.
FLAGS
.
use_
tf_logging
:
tf
.
logging
.
info
(
msg
)
return
...
...
@@ -440,44 +440,26 @@ def _generation_loop(num_workers, # type: int
gc
.
collect
()
def
_se
t
_flag
s_with_command_
file
():
"""
Use arguments from COMMAND_FILE when use_command_file is Tru
e."""
command_
file
=
os
.
path
.
join
(
flags
.
FLAGS
.
data_dir
,
rconst
.
COMMAND_
FILE
)
tf
.
logging
.
info
(
"Waiting for
command
file to appear at {}..."
.
format
(
command_
file
))
while
not
tf
.
gfile
.
Exists
(
command_
file
):
def
_
par
se_flagfile
():
"""
Fill flags with flagfil
e."""
flag
file
=
os
.
path
.
join
(
flags
.
FLAGS
.
data_dir
,
rconst
.
FLAG
FILE
)
tf
.
logging
.
info
(
"Waiting for
flag
file to appear at {}..."
.
format
(
flag
file
))
while
not
tf
.
gfile
.
Exists
(
flag
file
):
time
.
sleep
(
1
)
tf
.
logging
.
info
(
"Command file found."
)
with
tf
.
gfile
.
Open
(
command_file
,
"r"
)
as
f
:
command
=
json
.
load
(
f
)
flags
.
FLAGS
.
num_workers
=
command
[
"num_workers"
]
assert
flags
.
FLAGS
.
data_dir
==
command
[
"data_dir"
]
flags
.
FLAGS
.
cache_id
=
command
[
"cache_id"
]
flags
.
FLAGS
.
num_readers
=
command
[
"num_readers"
]
flags
.
FLAGS
.
num_neg
=
command
[
"num_neg"
]
flags
.
FLAGS
.
num_train_positives
=
command
[
"num_train_positives"
]
flags
.
FLAGS
.
num_items
=
command
[
"num_items"
]
flags
.
FLAGS
.
epochs_per_cycle
=
command
[
"epochs_per_cycle"
]
flags
.
FLAGS
.
train_batch_size
=
command
[
"train_batch_size"
]
flags
.
FLAGS
.
eval_batch_size
=
command
[
"eval_batch_size"
]
flags
.
FLAGS
.
spillover
=
command
[
"spillover"
]
flags
.
FLAGS
.
redirect_logs
=
command
[
"redirect_logs"
]
assert
flags
.
FLAGS
.
redirect_logs
is
False
if
"seed"
in
command
:
flags
.
FLAGS
.
seed
=
command
[
"seed"
]
tf
.
logging
.
info
(
"flagfile found."
)
flags
.
FLAGS
([
__file__
,
"--flagfile"
,
flagfile
])
def
main
(
_
):
global
_log_file
if
flags
.
FLAGS
.
use_command_file
is
not
None
:
_set_flags_with_command_file
()
_parse_flagfile
()
redirect_logs
=
flags
.
FLAGS
.
redirect_logs
cache_paths
=
rconst
.
Paths
(
data_dir
=
flags
.
FLAGS
.
data_dir
,
cache_id
=
flags
.
FLAGS
.
cache_id
)
log_file_name
=
"data_gen_proc_{}.log"
.
format
(
cache_paths
.
cache_id
)
log_path
=
os
.
path
.
join
(
cache_paths
.
data_dir
,
log_file_name
)
if
log_path
.
startswith
(
"gs://"
)
and
redirect_logs
:
...
...
@@ -559,12 +541,11 @@ def define_flags():
flags
.
DEFINE_boolean
(
name
=
"redirect_logs"
,
default
=
False
,
help
=
"Catch logs and write them to a file. "
"(Useful if this is run as a subprocess)"
)
flags
.
DEFINE_boolean
(
name
=
"use_tf_logging"
,
default
=
False
,
help
=
"Use tf.logging instead of log file."
)
flags
.
DEFINE_integer
(
name
=
"seed"
,
default
=
None
,
help
=
"NumPy random seed to set at startup. If not "
"specified, a seed will not be set."
)
flags
.
DEFINE_boolean
(
name
=
"use_command_file"
,
default
=
False
,
help
=
"Use command arguments from json at command_path. "
"All arguments other than data_dir will be ignored."
)
if
__name__
==
"__main__"
:
...
...
official/recommendation/data_preprocessing.py
View file @
c88fcb2b
...
...
@@ -430,77 +430,54 @@ def instantiate_pipeline(dataset, data_dir, batch_size, eval_batch_size,
# pool underlying the training generation doesn't starve other processes.
num_workers
=
int
(
multiprocessing
.
cpu_count
()
*
0.75
)
or
1
flags_
=
{
"data_dir"
:
data_dir
,
"cache_id"
:
ncf_dataset
.
cache_paths
.
cache_id
,
"num_neg"
:
num_neg
,
"num_train_positives"
:
ncf_dataset
.
num_train_positives
,
"num_items"
:
ncf_dataset
.
num_items
,
"num_readers"
:
ncf_dataset
.
num_data_readers
,
"epochs_per_cycle"
:
epochs_per_cycle
,
"train_batch_size"
:
batch_size
,
"eval_batch_size"
:
eval_batch_size
,
"num_workers"
:
num_workers
,
# This allows the training input function to guarantee batch size and
# significantly improves performance. (~5% increase in examples/sec on
# GPU, and needed for TPU XLA.)
"spillover"
:
True
,
"redirect_logs"
:
use_subprocess
,
"use_tf_logging"
:
not
use_subprocess
,
}
if
ncf_dataset
.
deterministic
:
flags_
[
"seed"
]
=
stat_utils
.
random_int32
()
# We write to a temp file then atomically rename it to the final file,
# because writing directly to the final file can cause the data generation
# async process to read a partially written JSON file.
flagfile_temp
=
os
.
path
.
join
(
flags
.
FLAGS
.
data_dir
,
rconst
.
FLAGFILE_TEMP
)
tf
.
logging
.
info
(
"Preparing flagfile for async data generation in {} ..."
.
format
(
flagfile_temp
))
with
tf
.
gfile
.
Open
(
flagfile_temp
,
"w"
)
as
f
:
for
k
,
v
in
six
.
iteritems
(
flags_
):
f
.
write
(
"--{}={}
\n
"
.
format
(
k
,
v
))
flagfile
=
os
.
path
.
join
(
data_dir
,
rconst
.
FLAGFILE
)
tf
.
gfile
.
Rename
(
flagfile_temp
,
flagfile
)
tf
.
logging
.
info
(
"Wrote flagfile for async data generation in {}."
.
format
(
flagfile
))
if
use_subprocess
:
tf
.
logging
.
info
(
"Creating training file subprocess."
)
subproc_env
=
os
.
environ
.
copy
()
# The subprocess uses TensorFlow for tf.gfile, but it does not need GPU
# resources and by default will try to allocate GPU memory. This would cause
# contention with the main training process.
subproc_env
[
"CUDA_VISIBLE_DEVICES"
]
=
""
subproc_args
=
popen_helper
.
INVOCATION
+
[
"--data_dir"
,
data_dir
,
"--cache_id"
,
str
(
ncf_dataset
.
cache_paths
.
cache_id
),
"--num_neg"
,
str
(
num_neg
),
"--num_train_positives"
,
str
(
ncf_dataset
.
num_train_positives
),
"--num_items"
,
str
(
ncf_dataset
.
num_items
),
"--num_readers"
,
str
(
ncf_dataset
.
num_data_readers
),
"--epochs_per_cycle"
,
str
(
epochs_per_cycle
),
"--train_batch_size"
,
str
(
batch_size
),
"--eval_batch_size"
,
str
(
eval_batch_size
),
"--num_workers"
,
str
(
num_workers
),
# This allows the training input function to guarantee batch size and
# significantly improves performance. (~5% increase in examples/sec on
# GPU, and needed for TPU XLA.)
"--spillover"
,
"True"
,
"--redirect_logs"
,
"True"
]
if
ncf_dataset
.
deterministic
:
subproc_args
.
extend
([
"--seed"
,
str
(
int
(
stat_utils
.
random_int32
()))])
"--data_dir"
,
data_dir
]
tf
.
logging
.
info
(
"Generation subprocess command: {}"
.
format
(
" "
.
join
(
subproc_args
)))
proc
=
subprocess
.
Popen
(
args
=
subproc_args
,
shell
=
False
,
env
=
subproc_env
)
else
:
# We write to a temp file then atomically rename it to the final file,
# because writing directly to the final file can cause the data generation
# async process to read a partially written JSON file.
command_file_temp
=
os
.
path
.
join
(
data_dir
,
rconst
.
COMMAND_FILE_TEMP
)
tf
.
logging
.
info
(
"Generation subprocess command at {} ..."
.
format
(
command_file_temp
))
with
tf
.
gfile
.
Open
(
command_file_temp
,
"w"
)
as
f
:
command
=
{
"data_dir"
:
data_dir
,
"cache_id"
:
ncf_dataset
.
cache_paths
.
cache_id
,
"num_neg"
:
num_neg
,
"num_train_positives"
:
ncf_dataset
.
num_train_positives
,
"num_items"
:
ncf_dataset
.
num_items
,
"num_readers"
:
ncf_dataset
.
num_data_readers
,
"epochs_per_cycle"
:
epochs_per_cycle
,
"train_batch_size"
:
batch_size
,
"eval_batch_size"
:
eval_batch_size
,
"num_workers"
:
num_workers
,
# This allows the training input function to guarantee batch size and
# significantly improves performance. (~5% increase in examples/sec on
# GPU, and needed for TPU XLA.)
"spillover"
:
True
,
"redirect_logs"
:
False
}
if
ncf_dataset
.
deterministic
:
command
[
"seed"
]
=
stat_utils
.
random_int32
()
json
.
dump
(
command
,
f
)
command_file
=
os
.
path
.
join
(
data_dir
,
rconst
.
COMMAND_FILE
)
tf
.
gfile
.
Rename
(
command_file_temp
,
command_file
)
tf
.
logging
.
info
(
"Generation subprocess command saved to: {}"
.
format
(
command_file
))
cleanup_called
=
{
"finished"
:
False
}
@
atexit
.
register
def
cleanup
():
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment