Testing a data frame with unnamed columns. (#6186)

c7e252cd · Andrei Ivanov · GitHub · 9ef80a6e · c7e252cd · c7e252cd
Unverified Commit c7e252cd authored Aug 21, 2023 by Andrei Ivanov Committed by GitHub Aug 22, 2023
3 changed files
--- a/docs/source/guide/data-loadcsv.rst
+++ b/docs/source/guide/data-loadcsv.rst
@@ -461,7 +461,7 @@ To parse the string type labels, one can define a ``DataParser`` class as follow
            parsed = {}
            for header in df:
                if 'Unnamed' in header:  # Handle Unnamed column
-                    print("Unamed column is found. Ignored...")
+                    print("Unnamed column is found. Ignored...")
                    continue
                dt = df[header].to_numpy().squeeze()
                if header == 'label':

--- a/python/dgl/data/csv_dataset_base.py
+++ b/python/dgl/data/csv_dataset_base.py
@@ -376,7 +376,7 @@ class DefaultDataParser:
        data = {}
        for header in df:
            if "Unnamed" in header:
-                dgl_warning("Unamed column is found. Ignored...")
+                dgl_warning("Unnamed column is found. Ignored...")
                continue
            dt = df[header].to_numpy().squeeze()
            if len(dt) > 0 and isinstance(dt[0], str):

--- a/tests/python/common/data/test_data.py
+++ b/tests/python/common/data/test_data.py
@@ -737,17 +737,19 @@ def _test_construct_graphs_multiple():
    assert expect_except


-def _get_data_table(data_frame):
+def _get_data_table(data_frame, save_index=False):
    from dgl.data.csv_dataset_base import DefaultDataParser

    with tempfile.TemporaryDirectory() as test_dir:
        csv_path = os.path.join(test_dir, "nodes.csv")

-        data_frame.to_csv(csv_path, index=False)
+        data_frame.to_csv(csv_path, index=save_index)
        dp = DefaultDataParser()
        df = pd.read_csv(csv_path)

-    # Intercepting the warning: "Unamed column is found. Ignored...".
+    # Warning suppression : "Untitled column found. Ignored...",
+    # which appears when a CSV file is saved with an index:
+    #    data_frame.to_csv(csv_path, index=True).
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=UserWarning)
        return dp(df)
@@ -785,7 +787,7 @@ def _test_DefaultDataParser():

    # csv has index column which is ignored as it's unnamed
    df = pd.DataFrame({"label": [1, 2, 3]})
-    dt = _get_data_table(df)
+    dt = _get_data_table(df, True)
    assert len(dt) == 1