Unverified Commit 8b839a23 authored by Andrei Ivanov's avatar Andrei Ivanov Committed by GitHub
Browse files

Improving data tests. (#6144)

parent 13204383
...@@ -7,6 +7,7 @@ from __future__ import absolute_import ...@@ -7,6 +7,7 @@ from __future__ import absolute_import
import os, sys import os, sys
import pickle as pkl import pickle as pkl
import warnings
import networkx as nx import networkx as nx
...@@ -34,10 +35,12 @@ backend = os.environ.get("DGLBACKEND", "pytorch") ...@@ -34,10 +35,12 @@ backend = os.environ.get("DGLBACKEND", "pytorch")
def _pickle_load(pkl_file): def _pickle_load(pkl_file):
if sys.version_info > (3, 0): with warnings.catch_warnings():
return pkl.load(pkl_file, encoding="latin1") warnings.simplefilter("ignore", category=DeprecationWarning)
else: if sys.version_info > (3, 0):
return pkl.load(pkl_file) return pkl.load(pkl_file, encoding="latin1")
else:
return pkl.load(pkl_file)
class CitationGraphDataset(DGLBuiltinDataset): class CitationGraphDataset(DGLBuiltinDataset):
......
...@@ -4,6 +4,7 @@ import os ...@@ -4,6 +4,7 @@ import os
import tarfile import tarfile
import tempfile import tempfile
import unittest import unittest
import warnings
import backend as F import backend as F
...@@ -736,54 +737,56 @@ def _test_construct_graphs_multiple(): ...@@ -736,54 +737,56 @@ def _test_construct_graphs_multiple():
assert expect_except assert expect_except
def _test_DefaultDataParser(): def _get_data_table(data_frame):
from dgl.data.csv_dataset_base import DefaultDataParser from dgl.data.csv_dataset_base import DefaultDataParser
# common csv
with tempfile.TemporaryDirectory() as test_dir: with tempfile.TemporaryDirectory() as test_dir:
csv_path = os.path.join(test_dir, "nodes.csv") csv_path = os.path.join(test_dir, "nodes.csv")
num_nodes = 5
num_labels = 3 data_frame.to_csv(csv_path, index=False)
num_dims = 2
node_id = np.arange(num_nodes)
label = np.random.randint(num_labels, size=num_nodes)
feat = np.random.rand(num_nodes, num_dims)
df = pd.DataFrame(
{
"node_id": node_id,
"label": label,
"feat": [line.tolist() for line in feat],
}
)
df.to_csv(csv_path, index=False)
dp = DefaultDataParser() dp = DefaultDataParser()
df = pd.read_csv(csv_path) df = pd.read_csv(csv_path)
dt = dp(df)
assert np.array_equal(node_id, dt["node_id"]) # Intercepting the warning: "Unamed column is found. Ignored...".
assert np.array_equal(label, dt["label"]) with warnings.catch_warnings():
assert np.array_equal(feat, dt["feat"]) warnings.simplefilter("ignore", category=UserWarning)
return dp(df)
def _test_DefaultDataParser():
# common csv
num_nodes = 5
num_labels = 3
num_dims = 2
node_id = np.arange(num_nodes)
label = np.random.randint(num_labels, size=num_nodes)
feat = np.random.rand(num_nodes, num_dims)
df = pd.DataFrame(
{
"node_id": node_id,
"label": label,
"feat": [line.tolist() for line in feat],
}
)
dt = _get_data_table(df)
assert np.array_equal(node_id, dt["node_id"])
assert np.array_equal(label, dt["label"])
assert np.array_equal(feat, dt["feat"])
# string consists of non-numeric values # string consists of non-numeric values
with tempfile.TemporaryDirectory() as test_dir: df = pd.DataFrame({"label": ["a", "b", "c"]})
csv_path = os.path.join(test_dir, "nodes.csv") expect_except = False
df = pd.DataFrame({"label": ["a", "b", "c"]}) try:
df.to_csv(csv_path, index=False) _get_data_table(df)
dp = DefaultDataParser() except:
df = pd.read_csv(csv_path) expect_except = True
expect_except = False assert expect_except
try:
dt = dp(df)
except:
expect_except = True
assert expect_except
# csv has index column which is ignored as it's unnamed # csv has index column which is ignored as it's unnamed
with tempfile.TemporaryDirectory() as test_dir: df = pd.DataFrame({"label": [1, 2, 3]})
csv_path = os.path.join(test_dir, "nodes.csv") dt = _get_data_table(df)
df = pd.DataFrame({"label": [1, 2, 3]}) assert len(dt) == 1
df.to_csv(csv_path)
dp = DefaultDataParser()
df = pd.read_csv(csv_path)
dt = dp(df)
assert len(dt) == 1
def _test_load_yaml_with_sanity_check(): def _test_load_yaml_with_sanity_check():
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment