Merge branch 'dtk25.04' of http://developer.sourcefind.cn/codes/OpenDAS/dgl into 2.2.1

74d88bf8 · sangwz · 2a1ac588 · 314cedc1 · 74d88bf8 · 74d88bf8
Commit 74d88bf8 authored Feb 20, 2025 by sangwz
20 changed files
--- a/notebooks/stochastic_training/node_classification.ipynb
+++ b/notebooks/stochastic_training/node_classification.ipynb
--- a/python/dgl/_ffi/libinfo.py
+++ b/python/dgl/_ffi/libinfo.py
@@ -105,4 +105,5 @@ def find_lib_path(name=None, search_path=None, optional=False):
 # We use the version of the incoming release for code
 # that is under development.
 # The following line is set by dgl/python/update_version.py
 __version__ = "2.2.1"
--- a/python/dgl/_ffi/runtime_ctypes.py
+++ b/python/dgl/_ffi/runtime_ctypes.py
@@ -123,7 +123,7 @@ class DGLContext(ctypes.Structure):
        7: "vulkan",
        8: "metal",
        9: "vpi",
-        10: "rocm",
+        10: "gpu",
        11: "opengl",
        12: "ext_dev",
    }
@@ -131,9 +131,9 @@ class DGLContext(ctypes.Structure):
        "llvm": 1,
        "stackvm": 1,
        "cpu": 1,
-        "gpu": 2,
+        "gpu": 10,
-        "cuda": 2,
+        "cuda": 10,
-        "nvptx": 2,
+        "nvptx": 10,
        "cl": 4,
        "opencl": 4,
        "aocl": 5,

--- a/python/dgl/backend/pytorch/tensor.py
+++ b/python/dgl/backend/pytorch/tensor.py
@@ -116,7 +116,7 @@ def to_backend_ctx(dglctx):
    dev_type = dglctx.device_type
    if dev_type == 1:
        return th.device("cpu")
-    elif dev_type == 2:
+    elif dev_type == 2 or dev_type==10:
        return th.device("cuda", dglctx.device_id)
    else:
        raise ValueError("Unsupported DGL device context:", dglctx)

--- a/python/dgl/ndarray.py
+++ b/python/dgl/ndarray.py
@@ -80,7 +80,8 @@ def gpu(dev_id=0):
    ctx : DGLContext
        The created context
    """
-    return DGLContext(2, dev_id)
+    # device type for dcu is 10, nv is 2
+    return DGLContext(10, dev_id)
 def array(arr, ctx=cpu(0)):

--- a/python/update_version.py
+++ b/python/update_version.py
@@ -16,7 +16,10 @@ import re
 # (usually "aYYMMDD")
 # The environment variable DGL_VERSION_SUFFIX is the local version label
 # suffix for indicating CPU and CUDA versions as in PEP 440 (e.g. "+cu102")
 __version__ = "2.2.1" + os.getenv("DGL_PRERELEASE", "")
 __version__ += os.getenv("DGL_VERSION_SUFFIX", "")
 print(__version__)

--- a/src/array/arith.h
+++ b/src/array/arith.h
+// !!! This is a file automatically generated by hipify!!!
 /**
 *  Copyright (c) 2019 by Contributors
 * @file array/arith.h
@@ -6,13 +7,13 @@
 #ifndef DGL_ARRAY_ARITH_H_
 #define DGL_ARRAY_ARITH_H_
-#ifdef __CUDACC__
+#ifdef __HIPCC__
-#define DGLDEVICE __device__
+#define DGLDEVICE __device__ __host__
 #define DGLINLINE __forceinline__
 #else
 #define DGLDEVICE
 #define DGLINLINE inline
-#endif  // __CUDACC__
+#endif  // __HIPCC__
 namespace dgl {
 namespace aten {

--- a/src/array/array.cc
+++ b/src/array/array.cc
+// !!! This is a file automatically generated by hipify!!!
 /**
 *  Copyright (c) 2019-2022 by Contributors
 * @file array/array.cc
@@ -14,9 +15,9 @@
 #include <sstream>
 #include "../c_api_common.h"
-#include "./arith.h"
+#include "arith.h"
-#include "./array_op.h"
+#include "array_op.h"
-#include "./kernel_decl.h"
+#include "kernel_decl.h"
 using namespace dgl::runtime;
@@ -585,7 +586,7 @@ COOMatrix CSRRowWiseSampling(
    // prob_or_mask is pinned and rows on GPU is valid
    CHECK_VALID_CONTEXT(prob_or_mask, rows);
    ATEN_CSR_SWITCH_CUDA_UVA(mat, rows, XPU, IdType, "CSRRowWiseSampling", {
-      CHECK(!(prob_or_mask->dtype.bits == 8 && XPU == kDGLCUDA))
+      CHECK(!(prob_or_mask->dtype.bits == 8 && (XPU == kDGLCUDA || XPU == kDGLROCM)))
          << "GPU sampling with masks is currently not supported yet.";
      ATEN_FLOAT_INT8_UINT8_TYPE_SWITCH(
          prob_or_mask->dtype, FloatType, "probability or mask", {

--- a/src/array/array_arith.cc
+++ b/src/array/array_arith.cc
+// !!! This is a file automatically generated by hipify!!!
 /**
 *  Copyright (c) 2019 by Contributors
 * @file array/array_aritch.cc
@@ -8,8 +9,8 @@
 #include <dgl/runtime/ndarray.h>
 #include "../c_api_common.h"
-#include "./arith.h"
+#include "arith.h"
-#include "./array_op.h"
+#include "array_op.h"
 using namespace dgl::runtime;

--- a/src/array/cpu/array_cumsum.cc
+++ b/src/array/cpu/array_cumsum.cc
@@ -29,6 +29,7 @@ IdArray CumSum(IdArray array, bool prepend_zero) {
    IdType* out_d = ret.Ptr<IdType>();
    out_d[0] = in_d[0];
    for (int64_t i = 1; i < len; ++i) out_d[i] = out_d[i - 1] + in_d[i];
+    std::cout << "limm cpu ret : " << ret << std::endl;
    return ret;
  }
 }

--- a/src/array/cpu/array_sort.cc
+++ b/src/array/cpu/array_sort.cc
@@ -48,7 +48,7 @@ void swap(const PairRef<V1, V2>& r1, const PairRef<V1, V2>& r2) {
 }
 template <typename V1, typename V2>
-struct PairIterator
+__host__ struct PairIterator
    : public std::iterator<
          std::random_access_iterator_tag, std::pair<V1, V2>, std::ptrdiff_t,
          std::pair<V1*, V2*>, PairRef<V1, V2>> {

--- a/src/array/cpu/gather_mm.cc
+++ b/src/array/cpu/gather_mm.cc
+// !!! This is a file automatically generated by hipify!!!
 /**
 *  Copyright (c) 2020 by Contributors
 * @file kernel/cpu/gaher_mm.cc
 * @brief GatherMM C APIs and definitions.
 */
-#include "./gather_mm.h"
+#include "gather_mm.h"
 #include <dgl/array.h>

--- a/src/array/cpu/labor_sampling.cc
+++ b/src/array/cpu/labor_sampling.cc
+// !!! This is a file automatically generated by hipify!!!
 /*!
 *   Copyright (c) 2022, NVIDIA Corporation
 *   Copyright (c) 2022, GT-TDAlab (Muhammed Fatih Balin & Umit V. Catalyurek)
@@ -18,7 +19,7 @@
 * \file array/cuda/labor_sampling.cc
 * \brief labor sampling
 */
-#include "./labor_pick.h"
+#include "labor_pick.h"
 namespace dgl {
 namespace aten {

--- a/src/array/cpu/rowwise_sampling.cc
+++ b/src/array/cpu/rowwise_sampling.cc
+// !!! This is a file automatically generated by hipify!!!
 /**
 *  Copyright (c) 2020 by Contributors
 * @file array/cpu/rowwise_sampling.cc
@@ -7,7 +8,7 @@
 #include <numeric>
-#include "./rowwise_pick.h"
+#include "rowwise_pick.h"
 namespace dgl {
 namespace aten {

--- a/src/array/cpu/rowwise_topk.cc
+++ b/src/array/cpu/rowwise_topk.cc
+// !!! This is a file automatically generated by hipify!!!
 /**
 *  Copyright (c) 2020 by Contributors
 * @file array/cpu/rowwise_topk.cc
@@ -6,7 +7,7 @@
 #include <algorithm>
 #include <numeric>
-#include "./rowwise_pick.h"
+#include "rowwise_pick.h"
 namespace dgl {
 namespace aten {

--- a/src/array/cpu/sddmm.cc
+++ b/src/array/cpu/sddmm.cc
+// !!! This is a file automatically generated by hipify!!!
 /**
 *  Copyright (c) 2020 by Contributors
 * @file aten/cpu/sddmm.cc
 * @brief SDDMM C APIs and definitions.
 */
-#include "./sddmm.h"
+#include "sddmm.h"
 #include <dgl/array.h>

--- a/src/array/cpu/segment_reduce.cc
+++ b/src/array/cpu/segment_reduce.cc
+// !!! This is a file automatically generated by hipify!!!
 /**
 *  Copyright (c) 2020 by Contributors
 * @file kernel/cpu/segment_reduce.cc
 * @brief Segment reduce C APIs and definitions.
 */
-#include "./segment_reduce.h"
+#include "segment_reduce.h"
 #include <dgl/array.h>
 #include <string>
-#include "./spmm_binary_ops.h"
+#include "spmm_binary_ops.h"
 namespace dgl {
 namespace aten {

--- a/src/array/cpu/spmm.cc
+++ b/src/array/cpu/spmm.cc
+// !!! This is a file automatically generated by hipify!!!
 /**
 *  Copyright (c) 2020 by Contributors
 * @file kernel/cpu/spmm.cc
 * @brief SPMM C APIs and definitions.
 */
-#include "./spmm.h"
+#include "spmm.h"
 #include <dgl/array.h>

--- a/src/array/cpu/traversal.cc
+++ b/src/array/cpu/traversal.cc
+// !!! This is a file automatically generated by hipify!!!
 /**
 *  Copyright (c) 2020 by Contributors
 * @file array/cpu/traversal.cc
 * @brief Graph traversal implementation
 */
-#include "./traversal.h"
+#include "traversal.h"
 #include <dgl/graph_traversal.h>

--- a/src/array/cuda/array_cumsum.cu
+++ b/src/array/cuda/array_cumsum.cu
+// !!! This is a file automatically generated by hipify!!!
+#include "hip/hip_runtime.h"
 /**
 *  Copyright (c) 2020 by Contributors
 * @file array/cpu/array_cumsum.cu
 * @brief Array cumsum GPU implementation
 */
 #include <dgl/array.h>
+#include "../../../include/dgl/array.h"
-#include <cub/cub.cuh>
+#include <hipcub/hipcub.hpp>
 #include "../../runtime/cuda/cuda_common.h"
-#include "./utils.h"
+#include "utils.h"
 namespace dgl {
 using runtime::NDArray;
@@ -23,7 +26,7 @@ IdArray CumSum(IdArray array, bool prepend_zero) {
                         : aten::Full(0, 1, array->dtype.bits, array->ctx);
  auto device = runtime::DeviceAPI::Get(array->ctx);
-  cudaStream_t stream = runtime::getCurrentCUDAStream();
+  hipStream_t stream = runtime::getCurrentHIPStreamMasqueradingAsCUDA();
  const IdType* in_d = array.Ptr<IdType>();
  IdArray ret;
  IdType* out_d = nullptr;
@@ -36,16 +39,15 @@ IdArray CumSum(IdArray array, bool prepend_zero) {
  }
  // Allocate workspace
  size_t workspace_size = 0;
-  CUDA_CALL(cub::DeviceScan::InclusiveSum(
+  CUDA_CALL(hipcub::DeviceScan::InclusiveSum(
      nullptr, workspace_size, in_d, out_d, len, stream));
  void* workspace = device->AllocWorkspace(array->ctx, workspace_size);
  // Compute cumsum
-  CUDA_CALL(cub::DeviceScan::InclusiveSum(
+  CUDA_CALL(hipcub::DeviceScan::InclusiveSum(
      workspace, workspace_size, in_d, out_d, len, stream));
  device->FreeWorkspace(array->ctx, workspace);
  return ret;
 }