Unverified Commit 5f327ff4 authored by LastWhisper's avatar LastWhisper Committed by GitHub
Browse files

[GraphBolt] Ensure the contiguity of the `TorchBasedFeature._tensor`. (#6449)

parent e4bb6abc
......@@ -69,7 +69,8 @@ class TorchBasedFeature(Feature):
f"dimension of torch_feature in TorchBasedFeature must be greater "
f"than 1, but got {torch_feature.dim()} dimension."
)
self._tensor = torch_feature
# Make sure the tensor is contiguous.
self._tensor = torch_feature.contiguous()
def read(self, ids: torch.Tensor = None):
"""Read the feature by index.
......
......@@ -35,13 +35,25 @@ def save_data(data, path, fmt):
raise RuntimeError(f"Unsupported format: {fmt}")
# Perform necessary conversion.
if fmt == "numpy" and isinstance(element, torch.Tensor):
element = element.cpu().numpy()
elif fmt == "torch" and isinstance(element, np.ndarray):
element = torch.from_numpy(element).cpu()
if fmt == "numpy" and isinstance(data, torch.Tensor):
data = data.cpu().numpy()
elif fmt == "torch" and isinstance(data, np.ndarray):
data = torch.from_numpy(data).cpu()
# Save the data.
if fmt == "numpy":
if not data.flags["C_CONTIGUOUS"]:
Warning(
"The ndarray saved to disk is not contiguous, "
"so it will be copied to contiguous memory."
)
data = np.ascontiguousarray(data)
np.save(path, data)
elif fmt == "torch":
if not data.is_contiguous():
Warning(
"The tensor saved to disk is not contiguous, "
"so it will be copied to contiguous memory."
)
data = data.contiguous()
torch.save(data, path)
......@@ -86,6 +86,44 @@ def test_torch_based_feature(in_memory):
a = b = None
feature_a = feature_b = None
# Test loaded tensors' contiguity from C/Fortran contiguous ndarray.
contiguous_numpy = np.array([[1, 2, 3], [4, 5, 6]], order="C")
non_contiguous_numpy = np.array([[1, 2, 3], [4, 5, 6]], order="F")
assert contiguous_numpy.flags["C_CONTIGUOUS"]
assert non_contiguous_numpy.flags["F_CONTIGUOUS"]
np.save(
os.path.join(test_dir, "contiguous_numpy.npy"), contiguous_numpy
)
np.save(
os.path.join(test_dir, "non_contiguous_numpy.npy"),
non_contiguous_numpy,
)
cur_mmap_mode = None
if not in_memory:
cur_mmap_mode = "r+"
feature_a = gb.TorchBasedFeature(
torch.from_numpy(
np.load(
os.path.join(test_dir, "contiguous_numpy.npy"),
mmap_mode=cur_mmap_mode,
)
)
)
feature_b = gb.TorchBasedFeature(
torch.from_numpy(
np.load(
os.path.join(test_dir, "non_contiguous_numpy.npy"),
mmap_mode=cur_mmap_mode,
)
)
)
assert feature_a._tensor.is_contiguous()
assert feature_b._tensor.is_contiguous()
contiguous_numpy = non_contiguous_numpy = None
feature_a = feature_b = None
def write_tensor_to_disk(dir, name, t, fmt="torch"):
if fmt == "torch":
......
import os
import tempfile
import dgl.graphbolt.utils as utils
import numpy as np
import pytest
import torch
def test_read_torch_data():
with tempfile.TemporaryDirectory() as test_dir:
save_tensor = torch.tensor([[1, 2, 4], [2, 5, 3]])
file_name = os.path.join(test_dir, "save_tensor.pt")
torch.save(save_tensor, file_name)
read_tensor = utils.internal._read_torch_data(file_name)
assert torch.equal(save_tensor, read_tensor)
save_tensor = read_tensor = None
@pytest.mark.parametrize("in_memory", [True, False])
def test_read_numpy_data(in_memory):
with tempfile.TemporaryDirectory() as test_dir:
save_numpy = np.array([[1, 2, 4], [2, 5, 3]])
file_name = os.path.join(test_dir, "save_numpy.npy")
np.save(file_name, save_numpy)
read_tensor = utils.internal._read_numpy_data(file_name, in_memory)
assert torch.equal(torch.from_numpy(save_numpy), read_tensor)
save_numpy = read_tensor = None
@pytest.mark.parametrize("fmt", ["torch", "numpy"])
def test_read_data(fmt):
with tempfile.TemporaryDirectory() as test_dir:
data = np.array([[1, 2, 4], [2, 5, 3]])
type_name = "pt" if fmt == "torch" else "npy"
file_name = os.path.join(test_dir, f"save_data.{type_name}")
if fmt == "numpy":
np.save(file_name, data)
elif fmt == "torch":
torch.save(torch.from_numpy(data), file_name)
read_tensor = utils.read_data(file_name, fmt)
assert torch.equal(torch.from_numpy(data), read_tensor)
@pytest.mark.parametrize(
"data_fmt, save_fmt, contiguous",
[
("torch", "torch", True),
("torch", "torch", False),
("torch", "numpy", True),
("torch", "numpy", False),
("numpy", "torch", True),
("numpy", "torch", False),
("numpy", "numpy", True),
("numpy", "numpy", False),
],
)
def test_save_data(data_fmt, save_fmt, contiguous):
with tempfile.TemporaryDirectory() as test_dir:
data = np.array([[1, 2, 4], [2, 5, 3]])
if not contiguous:
data = np.asfortranarray(data)
tensor_data = torch.from_numpy(data)
type_name = "pt" if save_fmt == "torch" else "npy"
save_file_name = os.path.join(test_dir, f"save_data.{type_name}")
# Step1. Save the data.
if data_fmt == "torch":
utils.save_data(tensor_data, save_file_name, save_fmt)
elif data_fmt == "numpy":
utils.save_data(data, save_file_name, save_fmt)
# Step2. Load the data.
if save_fmt == "torch":
loaded_data = torch.load(save_file_name)
assert loaded_data.is_contiguous()
assert torch.equal(tensor_data, loaded_data)
elif save_fmt == "numpy":
loaded_data = np.load(save_file_name)
# Checks if the loaded data is C-contiguous.
assert loaded_data.flags["C_CONTIGUOUS"]
assert np.array_equal(tensor_data.numpy(), loaded_data)
data = tensor_data = loaded_data = None
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment