Unverified Commit 4091a49c authored by Muhammed Fatih BALIN's avatar Muhammed Fatih BALIN Committed by GitHub
Browse files

[GraphBolt][CUDA] Fix cudart destructor race in unpinning. (#7156)

parent 2d2ad71e
...@@ -76,10 +76,7 @@ class FusedCSCSamplingGraph(SamplingGraph): ...@@ -76,10 +76,7 @@ class FusedCSCSamplingGraph(SamplingGraph):
# https://github.com/pytorch/pytorch/issues/32167#issuecomment-753551842 # https://github.com/pytorch/pytorch/issues/32167#issuecomment-753551842
if hasattr(self, "_is_inplace_pinned"): if hasattr(self, "_is_inplace_pinned"):
for tensor in self._is_inplace_pinned: for tensor in self._is_inplace_pinned:
assert ( assert self._inplace_unpinner(tensor.data_ptr()) == 0
torch.cuda.cudart().cudaHostUnregister(tensor.data_ptr())
== 0
)
@property @property
def total_num_nodes(self) -> int: def total_num_nodes(self) -> int:
...@@ -1121,6 +1118,7 @@ class FusedCSCSamplingGraph(SamplingGraph): ...@@ -1121,6 +1118,7 @@ class FusedCSCSamplingGraph(SamplingGraph):
) )
self._is_inplace_pinned.add(x) self._is_inplace_pinned.add(x)
self._inplace_unpinner = cudart.cudaHostUnregister
return x return x
......
...@@ -93,9 +93,7 @@ class TorchBasedFeature(Feature): ...@@ -93,9 +93,7 @@ class TorchBasedFeature(Feature):
# cudaHostUnregister to unpin the tensor in the destructor. # cudaHostUnregister to unpin the tensor in the destructor.
# https://github.com/pytorch/pytorch/issues/32167#issuecomment-753551842 # https://github.com/pytorch/pytorch/issues/32167#issuecomment-753551842
for tensor in self._is_inplace_pinned: for tensor in self._is_inplace_pinned:
assert ( assert self._inplace_unpinner(tensor.data_ptr()) == 0
torch.cuda.cudart().cudaHostUnregister(tensor.data_ptr()) == 0
)
def read(self, ids: torch.Tensor = None): def read(self, ids: torch.Tensor = None):
"""Read the feature by index. """Read the feature by index.
...@@ -187,14 +185,16 @@ class TorchBasedFeature(Feature): ...@@ -187,14 +185,16 @@ class TorchBasedFeature(Feature):
assert ( assert (
x.is_contiguous() x.is_contiguous()
), "Tensor pinning is only supported for contiguous tensors." ), "Tensor pinning is only supported for contiguous tensors."
cudart = torch.cuda.cudart()
assert ( assert (
torch.cuda.cudart().cudaHostRegister( cudart.cudaHostRegister(
x.data_ptr(), x.numel() * x.element_size(), 0 x.data_ptr(), x.numel() * x.element_size(), 0
) )
== 0 == 0
) )
self._is_inplace_pinned.add(x) self._is_inplace_pinned.add(x)
self._inplace_unpinner = cudart.cudaHostUnregister
return self return self
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment