Unverified Commit b26da252 authored by Rhett Ying's avatar Rhett Ying Committed by GitHub
Browse files

[GraphBolt] remove pydantic_yaml from dependency (#6011)


Co-authored-by: default avatarHongzhi (Steve), Chen <chenhongzhi.nkcs@gmail.com>
parent d3bd4c61
...@@ -132,7 +132,7 @@ def preprocess_ondisk_dataset(input_config_path: str) -> str: ...@@ -132,7 +132,7 @@ def preprocess_ondisk_dataset(input_config_path: str) -> str:
# 6. Load the node/edge features and do necessary conversion. # 6. Load the node/edge features and do necessary conversion.
if input_config.get("feature_data", None): if input_config.get("feature_data", None):
for (feature, out_feature) in zip( for feature, out_feature in zip(
input_config["feature_data"], output_config["feature_data"] input_config["feature_data"], output_config["feature_data"]
): ):
# Always save the feature in numpy format. # Always save the feature in numpy format.
...@@ -270,8 +270,9 @@ class OnDiskDataset(Dataset): ...@@ -270,8 +270,9 @@ class OnDiskDataset(Dataset):
# Always call the preprocess function first. If already preprocessed, # Always call the preprocess function first. If already preprocessed,
# the function will return the original path directly. # the function will return the original path directly.
path = preprocess_ondisk_dataset(path) path = preprocess_ondisk_dataset(path)
with open(path, "r") as f: with open(path) as f:
self._meta = OnDiskMetaData.parse_raw(f.read(), proto="yaml") yaml_data = yaml.load(f, Loader=yaml.loader.SafeLoader)
self._meta = OnDiskMetaData(**yaml_data)
self._dataset_name = self._meta.dataset_name self._dataset_name = self._meta.dataset_name
self._num_classes = self._meta.num_classes self._num_classes = self._meta.num_classes
self._num_labels = self._meta.num_labels self._num_labels = self._meta.num_labels
......
"""Ondisk metadata of GraphBolt.""" """Ondisk metadata of GraphBolt."""
from enum import Enum
from typing import List, Optional from typing import List, Optional
import pydantic import pydantic
import pydantic_yaml
__all__ = [ __all__ = [
...@@ -17,7 +17,7 @@ __all__ = [ ...@@ -17,7 +17,7 @@ __all__ = [
] ]
class OnDiskFeatureDataFormat(pydantic_yaml.YamlStrEnum): class OnDiskFeatureDataFormat(str, Enum):
"""Enum of data format.""" """Enum of data format."""
TORCH = "torch" TORCH = "torch"
...@@ -27,13 +27,13 @@ class OnDiskFeatureDataFormat(pydantic_yaml.YamlStrEnum): ...@@ -27,13 +27,13 @@ class OnDiskFeatureDataFormat(pydantic_yaml.YamlStrEnum):
class OnDiskTVTSet(pydantic.BaseModel): class OnDiskTVTSet(pydantic.BaseModel):
"""Train-Validation-Test set.""" """Train-Validation-Test set."""
type: Optional[str] type: Optional[str] = None
format: OnDiskFeatureDataFormat format: OnDiskFeatureDataFormat
in_memory: Optional[bool] = True in_memory: Optional[bool] = True
path: str path: str
class OnDiskFeatureDataDomain(pydantic_yaml.YamlStrEnum): class OnDiskFeatureDataDomain(str, Enum):
"""Enum of feature data domain.""" """Enum of feature data domain."""
NODE = "node" NODE = "node"
...@@ -44,14 +44,14 @@ class OnDiskFeatureDataDomain(pydantic_yaml.YamlStrEnum): ...@@ -44,14 +44,14 @@ class OnDiskFeatureDataDomain(pydantic_yaml.YamlStrEnum):
class OnDiskFeatureData(pydantic.BaseModel): class OnDiskFeatureData(pydantic.BaseModel):
r"""The description of an on-disk feature.""" r"""The description of an on-disk feature."""
domain: OnDiskFeatureDataDomain domain: OnDiskFeatureDataDomain
type: Optional[str] type: Optional[str] = None
name: str name: str
format: OnDiskFeatureDataFormat format: OnDiskFeatureDataFormat
path: str path: str
in_memory: Optional[bool] = True in_memory: Optional[bool] = True
class OnDiskGraphTopologyType(pydantic_yaml.YamlStrEnum): class OnDiskGraphTopologyType(str, Enum):
"""Enum of graph topology type.""" """Enum of graph topology type."""
CSC_SAMPLING = "CSCSamplingGraph" CSC_SAMPLING = "CSCSamplingGraph"
...@@ -64,7 +64,7 @@ class OnDiskGraphTopology(pydantic.BaseModel): ...@@ -64,7 +64,7 @@ class OnDiskGraphTopology(pydantic.BaseModel):
path: str path: str
class OnDiskMetaData(pydantic_yaml.YamlModel): class OnDiskMetaData(pydantic.BaseModel):
"""Metadata specification in YAML. """Metadata specification in YAML.
As multiple node/edge types and multiple splits are supported, each TVT set As multiple node/edge types and multiple splits are supported, each TVT set
......
...@@ -19,7 +19,6 @@ dependencies: ...@@ -19,7 +19,6 @@ dependencies:
- psutil - psutil
- pyarrow - pyarrow
- pydantic - pydantic
- pydantic-yaml
- pytest - pytest
- pyyaml - pyyaml
- rdflib - rdflib
......
...@@ -1668,6 +1668,9 @@ def _test_NodeEdgeGraphData(): ...@@ -1668,6 +1668,9 @@ def _test_NodeEdgeGraphData():
reason="Datasets don't need to be tested on GPU.", reason="Datasets don't need to be tested on GPU.",
) )
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
@unittest.skipIf(
dgl.backend.backend_name == "tensorflow", reason="Skip Tensorflow"
)
def test_csvdataset(): def test_csvdataset():
_test_NodeEdgeGraphData() _test_NodeEdgeGraphData()
_test_construct_graphs_node_ids() _test_construct_graphs_node_ids()
...@@ -1866,6 +1869,9 @@ def test_as_linkpred_ogb(): ...@@ -1866,6 +1869,9 @@ def test_as_linkpred_ogb():
reason="Datasets don't need to be tested on GPU.", reason="Datasets don't need to be tested on GPU.",
) )
@unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet") @unittest.skipIf(dgl.backend.backend_name == "mxnet", reason="Skip MXNet")
@unittest.skipIf(
dgl.backend.backend_name == "tensorflow", reason="Skip Tensorflow"
)
def test_as_nodepred_csvdataset(): def test_as_nodepred_csvdataset():
with tempfile.TemporaryDirectory() as test_dir: with tempfile.TemporaryDirectory() as test_dir:
# generate YAML/CSVs # generate YAML/CSVs
......
...@@ -637,7 +637,7 @@ def test_OnDiskDataset_Graph_Exceptions(): ...@@ -637,7 +637,7 @@ def test_OnDiskDataset_Graph_Exceptions():
with pytest.raises( with pytest.raises(
pydantic.ValidationError, pydantic.ValidationError,
match="value is not a valid enumeration member", match="1 validation error for OnDiskMetaData",
): ):
_ = gb.OnDiskDataset(yaml_file) _ = gb.OnDiskDataset(yaml_file)
...@@ -847,7 +847,7 @@ def test_OnDiskDataset_preprocess_homogeneous(): ...@@ -847,7 +847,7 @@ def test_OnDiskDataset_preprocess_homogeneous():
output_file = gb.ondisk_dataset.preprocess_ondisk_dataset(yaml_file) output_file = gb.ondisk_dataset.preprocess_ondisk_dataset(yaml_file)
with open(output_file, "rb") as f: with open(output_file, "rb") as f:
processed_dataset = yaml.safe_load(f) processed_dataset = yaml.load(f, Loader=yaml.Loader)
assert processed_dataset["dataset_name"] == dataset_name assert processed_dataset["dataset_name"] == dataset_name
assert processed_dataset["num_classes"] == num_classes assert processed_dataset["num_classes"] == num_classes
......
...@@ -34,9 +34,6 @@ export PYTHONUNBUFFERED=1 ...@@ -34,9 +34,6 @@ export PYTHONUNBUFFERED=1
export OMP_NUM_THREADS=1 export OMP_NUM_THREADS=1
export DMLC_LOG_DEBUG=1 export DMLC_LOG_DEBUG=1
# Install required dependencies
python3 -m pip install pydantic-yaml==0.11.2
python3 -m pytest -v --capture=tee-sys --junitxml=pytest_distributed.xml --durations=100 tests/distributed/*.py || fail "distributed" python3 -m pytest -v --capture=tee-sys --junitxml=pytest_distributed.xml --durations=100 tests/distributed/*.py || fail "distributed"
PYTHONPATH=tools:tools/distpartitioning:$PYTHONPATH python3 -m pytest -v --capture=tee-sys --junitxml=pytest_tools.xml --durations=100 tests/tools/*.py || fail "tools" PYTHONPATH=tools:tools/distpartitioning:$PYTHONPATH python3 -m pytest -v --capture=tee-sys --junitxml=pytest_tools.xml --durations=100 tests/tools/*.py || fail "tools"
...@@ -14,7 +14,7 @@ SET DGLBACKEND=!BACKEND! ...@@ -14,7 +14,7 @@ SET DGLBACKEND=!BACKEND!
SET DGL_LIBRARY_PATH=!CD!\build SET DGL_LIBRARY_PATH=!CD!\build
SET DGL_DOWNLOAD_DIR=!CD! SET DGL_DOWNLOAD_DIR=!CD!
python -m pip install pytest psutil pandas pyyaml pydantic pydantic-yaml==0.11.2 rdflib torchmetrics || EXIT /B 1 python -m pip install pytest psutil pandas pyyaml pydantic rdflib torchmetrics || EXIT /B 1
python -m pytest -v --junitxml=pytest_backend.xml --durations=100 tests\python\!DGLBACKEND! || EXIT /B 1 python -m pytest -v --junitxml=pytest_backend.xml --durations=100 tests\python\!DGLBACKEND! || EXIT /B 1
python -m pytest -v --junitxml=pytest_common.xml --durations=100 tests\python\common || EXIT /B 1 python -m pytest -v --junitxml=pytest_common.xml --durations=100 tests\python\common || EXIT /B 1
ENDLOCAL ENDLOCAL
......
...@@ -33,9 +33,6 @@ fi ...@@ -33,9 +33,6 @@ fi
conda activate ${DGLBACKEND}-ci conda activate ${DGLBACKEND}-ci
# Install required dependencies
python3 -m pip install pydantic-yaml==0.11.2
if [ $DGLBACKEND == "mxnet" ] if [ $DGLBACKEND == "mxnet" ]
then then
python3 -m pytest -v --junitxml=pytest_compute.xml --durations=100 --ignore=tests/python/common/test_ffi.py tests/python/common || fail "common" python3 -m pytest -v --junitxml=pytest_compute.xml --durations=100 --ignore=tests/python/common/test_ffi.py tests/python/common || fail "common"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment