Commit c708c1b4 authored by Reed's avatar Reed Committed by Taylor Robie
Browse files

Fix comments. (#4959)

parent d53c3e1d
...@@ -71,7 +71,7 @@ class NCFDataset(object): ...@@ -71,7 +71,7 @@ class NCFDataset(object):
def _filter_index_sort(raw_rating_path): def _filter_index_sort(raw_rating_path):
# type: (str) -> (pd.DataFrame, int, int) # type: (str) -> (pd.DataFrame, dict, dict)
"""Read in data CSV, and output structured data. """Read in data CSV, and output structured data.
This function reads in the raw CSV of positive items, and performs three This function reads in the raw CSV of positive items, and performs three
...@@ -98,8 +98,9 @@ def _filter_index_sort(raw_rating_path): ...@@ -98,8 +98,9 @@ def _filter_index_sort(raw_rating_path):
raw_rating_path: The path to the CSV which contains the raw dataset. raw_rating_path: The path to the CSV which contains the raw dataset.
Returns: Returns:
A filtered, zero-index remapped, sorted dataframe, as well as the number A filtered, zero-index remapped, sorted dataframe, a dict mapping raw user
of users and items in the processed dataset. IDs to regularized user IDs, and a dict mapping raw item IDs to regularized
item IDs.
""" """
with tf.gfile.Open(raw_rating_path) as f: with tf.gfile.Open(raw_rating_path) as f:
df = pd.read_csv(f) df = pd.read_csv(f)
...@@ -164,6 +165,8 @@ def _train_eval_map_fn(args): ...@@ -164,6 +165,8 @@ def _train_eval_map_fn(args):
shard pickle files. shard pickle files.
num_items: The cardinality of the item set, which determines the set from num_items: The cardinality of the item set, which determines the set from
which validation negatives should be drawn. which validation negatives should be drawn.
cache_paths: rconst.Paths object containing locations for various cache
files.
Returns: Returns:
A dict containing the evaluation data for a given shard. A dict containing the evaluation data for a given shard.
...@@ -250,6 +253,8 @@ def generate_train_eval_data(df, approx_num_shards, num_items, cache_paths): ...@@ -250,6 +253,8 @@ def generate_train_eval_data(df, approx_num_shards, num_items, cache_paths):
imbalance does not impact performance; however it does mean that one imbalance does not impact performance; however it does mean that one
should not expect approx_num_shards to be the ACTUAL number of shards. should not expect approx_num_shards to be the ACTUAL number of shards.
num_items: The cardinality of the item set. num_items: The cardinality of the item set.
cache_paths: rconst.Paths object containing locations for various cache
files.
Returns: Returns:
A tuple containing the validation data. A tuple containing the validation data.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment