Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ModelZoo
ResNet50_tensorflow
Commits
bba6134c
Commit
bba6134c
authored
Jul 21, 2021
by
A. Unique TensorFlower
Browse files
[NCF] Updating data preprocessing script.
PiperOrigin-RevId: 386140865
parent
31dc0f62
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
7 additions
and
9 deletions
+7
-9
official/recommendation/data_pipeline.py
official/recommendation/data_pipeline.py
+3
-3
official/recommendation/data_preprocessing.py
official/recommendation/data_preprocessing.py
+4
-6
No files found.
official/recommendation/data_pipeline.py
View file @
bba6134c
...
...
@@ -29,17 +29,16 @@ import timeit
import
traceback
import
typing
from
absl
import
logging
import
numpy
as
np
import
six
from
six.moves
import
queue
import
tensorflow
as
tf
from
absl
import
logging
from
tensorflow.python.tpu.datasets
import
StreamingFilesDataset
from
official.recommendation
import
constants
as
rconst
from
official.recommendation
import
movielens
from
official.recommendation
import
popen_helper
from
official.recommendation
import
stat_utils
from
tensorflow.python.tpu.datasets
import
StreamingFilesDataset
SUMMARY_TEMPLATE
=
"""General:
{spacer}Num users: {num_users}
...
...
@@ -119,6 +118,7 @@ class DatasetManager(object):
"""Convert NumPy arrays into a TFRecords entry."""
def
create_int_feature
(
values
):
values
=
np
.
squeeze
(
values
)
return
tf
.
train
.
Feature
(
int64_list
=
tf
.
train
.
Int64List
(
value
=
list
(
values
)))
feature_dict
=
{
...
...
official/recommendation/data_preprocessing.py
View file @
bba6134c
...
...
@@ -23,21 +23,19 @@ import os
import
pickle
import
time
import
timeit
# pylint: disable=wro
ng
-
import
-order
import
typing
from
typi
ng
import
Dict
,
Text
,
Tuple
from
absl
import
logging
import
numpy
as
np
import
pandas
as
pd
import
tensorflow
as
tf
import
typing
from
typing
import
Dict
,
Text
,
Tuple
# pylint: enable=wrong-import-order
from
official.recommendation
import
constants
as
rconst
from
official.recommendation
import
data_pipeline
from
official.recommendation
import
movielens
_EXPECTED_CACHE_KEYS
=
(
rconst
.
TRAIN_USER_KEY
,
rconst
.
TRAIN_ITEM_KEY
,
rconst
.
EVAL_USER_KEY
,
rconst
.
EVAL_ITEM_KEY
,
rconst
.
USER_MAP
,
rconst
.
ITEM_MAP
)
...
...
@@ -196,7 +194,7 @@ def _filter_index_sort(raw_rating_path: Text,
logging
.
info
(
"Writing raw data cache."
)
with
tf
.
io
.
gfile
.
GFile
(
cache_path
,
"wb"
)
as
f
:
pickle
.
dump
(
data
,
f
,
protocol
=
pickle
.
HIGHEST_PROTOCOL
)
pickle
.
dump
(
data
,
f
,
protocol
=
4
)
# TODO(robieta): MLPerf cache clear.
return
data
,
valid_cache
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment