"git@developer.sourcefind.cn:OpenDAS/vision.git" did not exist on "126e3e7527243fd838bf2929e6483326815283de"
Unverified Commit 541f2ba4 authored by Rhett Ying's avatar Rhett Ying Committed by GitHub
Browse files

[GraphBolt] make in_memory optional in OnDiskDataset (#6789)

parent 6db323b3
...@@ -118,18 +118,23 @@ def preprocess_ondisk_dataset( ...@@ -118,18 +118,23 @@ def preprocess_ondisk_dataset(
# the sampling-graph. # the sampling-graph.
if input_config["graph"].get("feature_data", None): if input_config["graph"].get("feature_data", None):
for graph_feature in input_config["graph"]["feature_data"]: for graph_feature in input_config["graph"]["feature_data"]:
in_memory = (
True
if "in_memory" not in graph_feature
else graph_feature["in_memory"]
)
if graph_feature["domain"] == "node": if graph_feature["domain"] == "node":
node_data = read_data( node_data = read_data(
os.path.join(dataset_dir, graph_feature["path"]), os.path.join(dataset_dir, graph_feature["path"]),
graph_feature["format"], graph_feature["format"],
in_memory=graph_feature["in_memory"], in_memory=in_memory,
) )
g.ndata[graph_feature["name"]] = node_data g.ndata[graph_feature["name"]] = node_data
if graph_feature["domain"] == "edge": if graph_feature["domain"] == "edge":
edge_data = read_data( edge_data = read_data(
os.path.join(dataset_dir, graph_feature["path"]), os.path.join(dataset_dir, graph_feature["path"]),
graph_feature["format"], graph_feature["format"],
in_memory=graph_feature["in_memory"], in_memory=in_memory,
) )
g.edata[graph_feature["name"]] = edge_data g.edata[graph_feature["name"]] = edge_data
...@@ -164,12 +169,15 @@ def preprocess_ondisk_dataset( ...@@ -164,12 +169,15 @@ def preprocess_ondisk_dataset(
out_feature["path"] = os.path.join( out_feature["path"] = os.path.join(
processed_dir_prefix, feature["path"].replace("pt", "npy") processed_dir_prefix, feature["path"].replace("pt", "npy")
) )
in_memory = (
True if "in_memory" not in feature else feature["in_memory"]
)
copy_or_convert_data( copy_or_convert_data(
os.path.join(dataset_dir, feature["path"]), os.path.join(dataset_dir, feature["path"]),
os.path.join(dataset_dir, out_feature["path"]), os.path.join(dataset_dir, out_feature["path"]),
feature["format"], feature["format"],
out_feature["format"], output_format=out_feature["format"],
feature["in_memory"], in_memory=in_memory,
is_feature=True, is_feature=True,
) )
......
...@@ -173,7 +173,6 @@ def random_homo_graphbolt_graph( ...@@ -173,7 +173,6 @@ def random_homo_graphbolt_graph(
type: null type: null
name: feat name: feat
format: numpy format: numpy
in_memory: true
path: {edge_feat_path} path: {edge_feat_path}
tasks: tasks:
- name: link_prediction - name: link_prediction
......
...@@ -1285,7 +1285,6 @@ def test_OnDiskDataset_preprocess_yaml_content_unix(): ...@@ -1285,7 +1285,6 @@ def test_OnDiskDataset_preprocess_yaml_content_unix():
type: null type: null
name: feat name: feat
format: numpy format: numpy
in_memory: true
path: data/edge-feat.npy path: data/edge-feat.npy
feature_data: feature_data:
- domain: node - domain: node
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment