Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
acba9b0b
"tests/vscode:/vscode.git/clone" did not exist on "b4f7b53ecb88c8d42b9e136dd9bd40dbcfaf7257"
Unverified
Commit
acba9b0b
authored
Oct 11, 2018
by
shizhiw
Committed by
GitHub
Oct 11, 2018
Browse files
Merge branch 'master' into shizhiw
parents
1980a0da
86c0ad3a
Changes
6
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
149 additions
and
28 deletions
+149
-28
official/recommendation/data_preprocessing.py
official/recommendation/data_preprocessing.py
+84
-2
official/recommendation/data_test.py
official/recommendation/data_test.py
+2
-0
official/recommendation/ncf_main.py
official/recommendation/ncf_main.py
+28
-17
official/recommendation/neumf_model.py
official/recommendation/neumf_model.py
+26
-0
research/marco/Automated_Marco.py
research/marco/Automated_Marco.py
+7
-7
research/skip_thoughts/skip_thoughts/data/preprocess_dataset.py
...ch/skip_thoughts/skip_thoughts/data/preprocess_dataset.py
+2
-2
No files found.
official/recommendation/data_preprocessing.py
View file @
acba9b0b
...
@@ -48,6 +48,18 @@ from official.recommendation import stat_utils
...
@@ -48,6 +48,18 @@ from official.recommendation import stat_utils
from
official.recommendation
import
popen_helper
from
official.recommendation
import
popen_helper
DATASET_TO_NUM_USERS_AND_ITEMS
=
{
"ml-1m"
:
(
6040
,
3706
),
"ml-20m"
:
(
138493
,
26744
)
}
# Number of batches to run per epoch when using synthetic data. At high batch
# sizes, we run for more batches than with real data, which is good since
# running more batches reduces noise when measuring the average batches/second.
_SYNTHETIC_BATCHES_PER_EPOCH
=
2000
class
NCFDataset
(
object
):
class
NCFDataset
(
object
):
"""Container for training and testing data."""
"""Container for training and testing data."""
...
@@ -376,6 +388,14 @@ def construct_cache(dataset, data_dir, num_data_readers, match_mlperf,
...
@@ -376,6 +388,14 @@ def construct_cache(dataset, data_dir, num_data_readers, match_mlperf,
raw_rating_path
=
os
.
path
.
join
(
data_dir
,
dataset
,
movielens
.
RATINGS_FILE
)
raw_rating_path
=
os
.
path
.
join
(
data_dir
,
dataset
,
movielens
.
RATINGS_FILE
)
df
,
user_map
,
item_map
=
_filter_index_sort
(
raw_rating_path
,
match_mlperf
)
df
,
user_map
,
item_map
=
_filter_index_sort
(
raw_rating_path
,
match_mlperf
)
num_users
,
num_items
=
DATASET_TO_NUM_USERS_AND_ITEMS
[
dataset
]
if
num_users
!=
len
(
user_map
):
raise
ValueError
(
"Expected to find {} users, but found {}"
.
format
(
num_users
,
len
(
user_map
)))
if
num_items
!=
len
(
item_map
):
raise
ValueError
(
"Expected to find {} items, but found {}"
.
format
(
num_items
,
len
(
item_map
)))
generate_train_eval_data
(
df
=
df
,
approx_num_shards
=
approx_num_shards
,
generate_train_eval_data
(
df
=
df
,
approx_num_shards
=
approx_num_shards
,
num_items
=
len
(
item_map
),
cache_paths
=
cache_paths
,
num_items
=
len
(
item_map
),
cache_paths
=
cache_paths
,
...
@@ -587,9 +607,12 @@ def hash_pipeline(dataset, deterministic):
...
@@ -587,9 +607,12 @@ def hash_pipeline(dataset, deterministic):
def
make_train_input_fn
(
ncf_dataset
):
def
make_train_input_fn
(
ncf_dataset
):
# type: (NCFDataset) -> (typing.Callable, str, int)
# type: (
typing.Optional[
NCFDataset
]
) -> (typing.Callable, str, int)
"""Construct training input_fn for the current epoch."""
"""Construct training input_fn for the current epoch."""
if
ncf_dataset
is
None
:
return
make_train_synthetic_input_fn
()
if
not
tf
.
gfile
.
Exists
(
ncf_dataset
.
cache_paths
.
subproc_alive
):
if
not
tf
.
gfile
.
Exists
(
ncf_dataset
.
cache_paths
.
subproc_alive
):
# The generation subprocess must have been alive at some point, because we
# The generation subprocess must have been alive at some point, because we
# earlier checked that the subproc_alive file existed.
# earlier checked that the subproc_alive file existed.
...
@@ -661,10 +684,40 @@ def make_train_input_fn(ncf_dataset):
...
@@ -661,10 +684,40 @@ def make_train_input_fn(ncf_dataset):
return
input_fn
,
record_dir
,
batch_count
return
input_fn
,
record_dir
,
batch_count
def
make_train_synthetic_input_fn
():
"""Construct training input_fn that uses synthetic data."""
def
input_fn
(
params
):
"""Generated input_fn for the given epoch."""
batch_size
=
params
[
"batch_size"
]
num_users
=
params
[
"num_users"
]
num_items
=
params
[
"num_items"
]
users
=
tf
.
random_uniform
([
batch_size
],
dtype
=
tf
.
int32
,
minval
=
0
,
maxval
=
num_users
)
items
=
tf
.
random_uniform
([
batch_size
],
dtype
=
tf
.
int32
,
minval
=
0
,
maxval
=
num_items
)
labels
=
tf
.
random_uniform
([
batch_size
],
dtype
=
tf
.
int32
,
minval
=
0
,
maxval
=
2
)
data
=
{
movielens
.
USER_COLUMN
:
users
,
movielens
.
ITEM_COLUMN
:
items
,
},
labels
dataset
=
tf
.
data
.
Dataset
.
from_tensors
(
data
).
repeat
(
_SYNTHETIC_BATCHES_PER_EPOCH
)
dataset
=
dataset
.
prefetch
(
32
)
return
dataset
return
input_fn
,
None
,
_SYNTHETIC_BATCHES_PER_EPOCH
def
make_pred_input_fn
(
ncf_dataset
):
def
make_pred_input_fn
(
ncf_dataset
):
# type: (NCFDataset) -> typing.Callable
# type: (
typing.Optional[
NCFDataset
]
) -> typing.Callable
"""Construct input_fn for metric evaluation."""
"""Construct input_fn for metric evaluation."""
if
ncf_dataset
is
None
:
return
make_synthetic_pred_input_fn
()
def
input_fn
(
params
):
def
input_fn
(
params
):
"""Input function based on eval batch size."""
"""Input function based on eval batch size."""
...
@@ -689,3 +742,32 @@ def make_pred_input_fn(ncf_dataset):
...
@@ -689,3 +742,32 @@ def make_pred_input_fn(ncf_dataset):
return
dataset
return
dataset
return
input_fn
return
input_fn
def
make_synthetic_pred_input_fn
():
"""Construct input_fn for metric evaluation that uses synthetic data."""
def
input_fn
(
params
):
"""Generated input_fn for the given epoch."""
batch_size
=
params
[
"eval_batch_size"
]
num_users
=
params
[
"num_users"
]
num_items
=
params
[
"num_items"
]
users
=
tf
.
random_uniform
([
batch_size
],
dtype
=
tf
.
int32
,
minval
=
0
,
maxval
=
num_users
)
items
=
tf
.
random_uniform
([
batch_size
],
dtype
=
tf
.
int32
,
minval
=
0
,
maxval
=
num_items
)
dupe_mask
=
tf
.
cast
(
tf
.
random_uniform
([
batch_size
],
dtype
=
tf
.
int32
,
minval
=
0
,
maxval
=
2
),
tf
.
bool
)
data
=
{
movielens
.
USER_COLUMN
:
users
,
movielens
.
ITEM_COLUMN
:
items
,
rconst
.
DUPLICATE_MASK
:
dupe_mask
,
}
dataset
=
tf
.
data
.
Dataset
.
from_tensors
(
data
).
repeat
(
_SYNTHETIC_BATCHES_PER_EPOCH
)
dataset
=
dataset
.
prefetch
(
16
)
return
dataset
return
input_fn
official/recommendation/data_test.py
View file @
acba9b0b
...
@@ -80,6 +80,8 @@ class BaseTest(tf.test.TestCase):
...
@@ -80,6 +80,8 @@ class BaseTest(tf.test.TestCase):
movielens
.
download
=
mock_download
movielens
.
download
=
mock_download
movielens
.
NUM_RATINGS
[
DATASET
]
=
NUM_PTS
movielens
.
NUM_RATINGS
[
DATASET
]
=
NUM_PTS
data_preprocessing
.
DATASET_TO_NUM_USERS_AND_ITEMS
[
DATASET
]
=
(
NUM_USERS
,
NUM_ITEMS
)
def
test_preprocessing
(
self
):
def
test_preprocessing
(
self
):
# For the most part the necessary checks are performed within
# For the most part the necessary checks are performed within
...
...
official/recommendation/ncf_main.py
View file @
acba9b0b
...
@@ -118,7 +118,7 @@ def main(_):
...
@@ -118,7 +118,7 @@ def main(_):
def
run_ncf
(
_
):
def
run_ncf
(
_
):
"""Run NCF training and eval loop."""
"""Run NCF training and eval loop."""
if
FLAGS
.
download_if_missing
:
if
FLAGS
.
download_if_missing
and
not
FLAGS
.
use_synthetic_data
:
movielens
.
download
(
FLAGS
.
dataset
,
FLAGS
.
data_dir
)
movielens
.
download
(
FLAGS
.
dataset
,
FLAGS
.
data_dir
)
if
FLAGS
.
seed
is
not
None
:
if
FLAGS
.
seed
is
not
None
:
...
@@ -137,6 +137,13 @@ def run_ncf(_):
...
@@ -137,6 +137,13 @@ def run_ncf(_):
"eval examples per user does not evenly divide eval_batch_size. "
"eval examples per user does not evenly divide eval_batch_size. "
"Overriding to {}"
.
format
(
eval_batch_size
))
"Overriding to {}"
.
format
(
eval_batch_size
))
if
FLAGS
.
use_synthetic_data
:
ncf_dataset
=
None
cleanup_fn
=
lambda
:
None
num_users
,
num_items
=
data_preprocessing
.
DATASET_TO_NUM_USERS_AND_ITEMS
[
FLAGS
.
dataset
]
approx_train_steps
=
None
else
:
ncf_dataset
,
cleanup_fn
=
data_preprocessing
.
instantiate_pipeline
(
ncf_dataset
,
cleanup_fn
=
data_preprocessing
.
instantiate_pipeline
(
dataset
=
FLAGS
.
dataset
,
data_dir
=
FLAGS
.
data_dir
,
dataset
=
FLAGS
.
dataset
,
data_dir
=
FLAGS
.
data_dir
,
batch_size
=
batch_size
,
batch_size
=
batch_size
,
...
@@ -146,6 +153,10 @@ def run_ncf(_):
...
@@ -146,6 +153,10 @@ def run_ncf(_):
match_mlperf
=
FLAGS
.
ml_perf
,
match_mlperf
=
FLAGS
.
ml_perf
,
deterministic
=
FLAGS
.
seed
is
not
None
,
deterministic
=
FLAGS
.
seed
is
not
None
,
use_subprocess
=
FLAGS
.
use_subprocess
)
use_subprocess
=
FLAGS
.
use_subprocess
)
num_users
=
ncf_dataset
.
num_users
num_items
=
ncf_dataset
.
num_items
approx_train_steps
=
int
(
ncf_dataset
.
num_train_positives
*
(
1
+
FLAGS
.
num_neg
)
//
FLAGS
.
batch_size
)
model_helpers
.
apply_clean
(
flags
.
FLAGS
)
model_helpers
.
apply_clean
(
flags
.
FLAGS
)
...
@@ -154,9 +165,10 @@ def run_ncf(_):
...
@@ -154,9 +165,10 @@ def run_ncf(_):
"use_seed"
:
FLAGS
.
seed
is
not
None
,
"use_seed"
:
FLAGS
.
seed
is
not
None
,
"hash_pipeline"
:
FLAGS
.
hash_pipeline
,
"hash_pipeline"
:
FLAGS
.
hash_pipeline
,
"batch_size"
:
batch_size
,
"batch_size"
:
batch_size
,
"eval_batch_size"
:
eval_batch_size
,
"learning_rate"
:
FLAGS
.
learning_rate
,
"learning_rate"
:
FLAGS
.
learning_rate
,
"num_users"
:
ncf_dataset
.
num_users
,
"num_users"
:
num_users
,
"num_items"
:
ncf_dataset
.
num_items
,
"num_items"
:
num_items
,
"mf_dim"
:
FLAGS
.
num_factors
,
"mf_dim"
:
FLAGS
.
num_factors
,
"model_layers"
:
[
int
(
layer
)
for
layer
in
FLAGS
.
layers
],
"model_layers"
:
[
int
(
layer
)
for
layer
in
FLAGS
.
layers
],
"mf_regularization"
:
FLAGS
.
mf_regularization
,
"mf_regularization"
:
FLAGS
.
mf_regularization
,
...
@@ -193,8 +205,6 @@ def run_ncf(_):
...
@@ -193,8 +205,6 @@ def run_ncf(_):
run_params
=
run_params
,
run_params
=
run_params
,
test_id
=
FLAGS
.
benchmark_test_id
)
test_id
=
FLAGS
.
benchmark_test_id
)
approx_train_steps
=
int
(
ncf_dataset
.
num_train_positives
*
(
1
+
FLAGS
.
num_neg
)
//
FLAGS
.
batch_size
)
pred_input_fn
=
data_preprocessing
.
make_pred_input_fn
(
ncf_dataset
=
ncf_dataset
)
pred_input_fn
=
data_preprocessing
.
make_pred_input_fn
(
ncf_dataset
=
ncf_dataset
)
total_training_cycle
=
FLAGS
.
train_epochs
//
FLAGS
.
epochs_between_evals
total_training_cycle
=
FLAGS
.
train_epochs
//
FLAGS
.
epochs_between_evals
...
@@ -206,13 +216,14 @@ def run_ncf(_):
...
@@ -206,13 +216,14 @@ def run_ncf(_):
train_input_fn
,
train_record_dir
,
batch_count
=
\
train_input_fn
,
train_record_dir
,
batch_count
=
\
data_preprocessing
.
make_train_input_fn
(
ncf_dataset
=
ncf_dataset
)
data_preprocessing
.
make_train_input_fn
(
ncf_dataset
=
ncf_dataset
)
if
np
.
abs
(
approx_train_steps
-
batch_count
)
>
1
:
if
approx_train_steps
and
np
.
abs
(
approx_train_steps
-
batch_count
)
>
1
:
tf
.
logging
.
warning
(
tf
.
logging
.
warning
(
"Estimated ({}) and reported ({}) number of batches differ by more "
"Estimated ({}) and reported ({}) number of batches differ by more "
"than one"
.
format
(
approx_train_steps
,
batch_count
))
"than one"
.
format
(
approx_train_steps
,
batch_count
))
train_estimator
.
train
(
input_fn
=
train_input_fn
,
hooks
=
train_hooks
,
train_estimator
.
train
(
input_fn
=
train_input_fn
,
hooks
=
train_hooks
,
steps
=
batch_count
)
steps
=
batch_count
)
if
train_record_dir
:
tf
.
gfile
.
DeleteRecursively
(
train_record_dir
)
tf
.
gfile
.
DeleteRecursively
(
train_record_dir
)
tf
.
logging
.
info
(
"Beginning evaluation."
)
tf
.
logging
.
info
(
"Beginning evaluation."
)
...
@@ -246,7 +257,7 @@ def define_ncf_flags():
...
@@ -246,7 +257,7 @@ def define_ncf_flags():
num_parallel_calls
=
False
,
num_parallel_calls
=
False
,
inter_op
=
False
,
inter_op
=
False
,
intra_op
=
False
,
intra_op
=
False
,
synthetic_data
=
Fals
e
,
synthetic_data
=
Tru
e
,
max_train_steps
=
False
,
max_train_steps
=
False
,
dtype
=
False
,
dtype
=
False
,
all_reduce_alg
=
False
all_reduce_alg
=
False
...
...
official/recommendation/neumf_model.py
View file @
acba9b0b
...
@@ -44,6 +44,31 @@ from official.recommendation import constants as rconst
...
@@ -44,6 +44,31 @@ from official.recommendation import constants as rconst
from
official.recommendation
import
stat_utils
from
official.recommendation
import
stat_utils
def
_sparse_to_dense_grads
(
grads_and_vars
):
"""Convert sparse gradients to dense gradients.
All sparse gradients, which are represented as instances of tf.IndexedSlices,
are converted to dense Tensors. Dense gradients, which are represents as
Tensors, are unchanged.
The purpose of this conversion is that for small embeddings, which are used by
this model, applying dense gradients with the AdamOptimizer is faster than
applying sparse gradients.
Args
grads_and_vars: A list of (gradient, variable) tuples. Each gradient can
be a Tensor or an IndexedSlices. Tensors are unchanged, and IndexedSlices
are converted to dense Tensors.
Returns:
The same list of (gradient, variable) as `grads_and_vars`, except each
IndexedSlices gradient is converted to a Tensor.
"""
# Calling convert_to_tensor changes IndexedSlices into Tensors, and leaves
# Tensors unchanged.
return
[(
tf
.
convert_to_tensor
(
g
),
v
)
for
g
,
v
in
grads_and_vars
]
def
neumf_model_fn
(
features
,
labels
,
mode
,
params
):
def
neumf_model_fn
(
features
,
labels
,
mode
,
params
):
"""Model Function for NeuMF estimator."""
"""Model Function for NeuMF estimator."""
if
params
.
get
(
"use_seed"
):
if
params
.
get
(
"use_seed"
):
...
@@ -94,6 +119,7 @@ def neumf_model_fn(features, labels, mode, params):
...
@@ -94,6 +119,7 @@ def neumf_model_fn(features, labels, mode, params):
tvars
=
tf
.
trainable_variables
()
tvars
=
tf
.
trainable_variables
()
gradients
=
optimizer
.
compute_gradients
(
gradients
=
optimizer
.
compute_gradients
(
loss
,
tvars
,
colocate_gradients_with_ops
=
True
)
loss
,
tvars
,
colocate_gradients_with_ops
=
True
)
gradients
=
_sparse_to_dense_grads
(
gradients
)
minimize_op
=
optimizer
.
apply_gradients
(
minimize_op
=
optimizer
.
apply_gradients
(
gradients
,
global_step
=
global_step
,
name
=
"train"
)
gradients
,
global_step
=
global_step
,
name
=
"train"
)
update_ops
=
tf
.
get_collection
(
tf
.
GraphKeys
.
UPDATE_OPS
)
update_ops
=
tf
.
get_collection
(
tf
.
GraphKeys
.
UPDATE_OPS
)
...
...
research/marco/Automated_Marco.py
View file @
acba9b0b
...
@@ -22,9 +22,7 @@ import argparse
...
@@ -22,9 +22,7 @@ import argparse
"""
"""
usage:
usage:
Processes all .jpg, .png, .bmp and .gif files found in the specified directory and its subdirectories.
Processes all .jpg, .png, .bmp and .gif files found in the specified directory and its subdirectories.
--PATH ( Path to directory of images or path to directory with subdirectory of images). e.g Path/To/Directory/
--PATH ( Path to directory of images or path to directory with subdirectory of images). e.g Path/To/Directory/
--Model_PATH path to the tensorflow model
--Model_PATH path to the tensorflow model
"""
"""
...
@@ -45,8 +43,8 @@ size = len(crystal_images)
...
@@ -45,8 +43,8 @@ size = len(crystal_images)
def
load_images
(
file_list
):
def
load_images
(
file_list
):
for
i
in
file_list
:
for
i
in
file_list
:
file
=
open
(
i
)
file
s
=
open
(
i
,
'rb'
)
yield
{
"image_bytes"
:[
file
.
read
()]},
i
yield
{
"image_bytes"
:[
file
s
.
read
()]},
i
...
@@ -66,7 +64,9 @@ with open(PATH +'results.csv', 'w') as csvfile:
...
@@ -66,7 +64,9 @@ with open(PATH +'results.csv', 'w') as csvfile:
results
=
predicter
(
data
)
results
=
predicter
(
data
)
vals
=
results
[
'scores'
][
0
]
vals
=
results
[
'scores'
][
0
]
vals
=
vals
*
100
classes
=
results
[
'classes'
][
0
]
print
(
'Image path: '
+
name
,
'Crystal: '
+
str
(
vals
[
0
]),
'Other: '
+
str
(
vals
[
1
]),
'Precipitate: '
+
str
(
vals
[
2
]),
'Clear '
+
str
(
vals
[
3
]))
dictionary
=
dict
(
zip
(
classes
,
vals
))
Writer
.
writerow
([
'Image path: '
+
name
,
'Crystal: '
+
str
(
vals
[
0
]),
'Other: '
+
str
(
vals
[
1
]),
'Precipitate: '
+
str
(
vals
[
2
]),
'Clear: '
+
str
(
vals
[
3
])])
print
(
'Image path: '
+
name
+
' Crystal: '
+
str
(
dictionary
[
b
'Crystals'
])
+
' Other: '
+
str
(
dictionary
[
b
'Other'
])
+
' Precipitate: '
+
str
(
dictionary
[
b
'Precipitate'
])
+
' Clear: '
+
str
(
dictionary
[
b
'Clear'
]))
Writer
.
writerow
([
'Image path: '
+
name
,
'Crystal: '
+
str
(
dictionary
[
b
'Crystals'
]),
'Other: '
+
str
(
dictionary
[
b
'Other'
]),
'Precipitate: '
+
str
(
dictionary
[
b
'Precipitate'
]),
'Clear: '
+
str
(
dictionary
[
b
'Clear'
])])
research/skip_thoughts/skip_thoughts/data/preprocess_dataset.py
View file @
acba9b0b
...
@@ -121,8 +121,8 @@ def _build_vocabulary(input_files):
...
@@ -121,8 +121,8 @@ def _build_vocabulary(input_files):
tf
.
logging
.
info
(
"Processed %d sentences total"
,
num
)
tf
.
logging
.
info
(
"Processed %d sentences total"
,
num
)
words
=
wordcount
.
keys
(
)
words
=
list
(
wordcount
)
freqs
=
wordcount
.
values
()
freqs
=
list
(
wordcount
.
values
()
)
sorted_indices
=
np
.
argsort
(
freqs
)[::
-
1
]
sorted_indices
=
np
.
argsort
(
freqs
)[::
-
1
]
vocab
=
collections
.
OrderedDict
()
vocab
=
collections
.
OrderedDict
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment