update warpsize to 64

3befaca2 · sangwzh · 910cec0c · 3befaca2 · 3befaca2 · 3befaca2
Commit 3befaca2 authored Oct 16, 2024 by sangwzh
5 changed files
--- a/graphbolt/src/cuda/gpu_cache.h
+++ b/graphbolt/src/cuda/gpu_cache.h
@@ -20,7 +20,7 @@ namespace cuda {
 class GpuCache : public torch::CustomClassHolder {
  using key_t = long long;
  constexpr static int set_associativity = 2;
-  constexpr static int WARP_SIZE = 32;
+  constexpr static int WARP_SIZE = 64;
  constexpr static int bucket_size = WARP_SIZE * set_associativity;
  using gpu_cache_t = ::gpu_cache::gpu_cache<
      key_t, uint64_t, std::numeric_limits<key_t>::max(), set_associativity,

--- a/src/array/cuda/array_index_select.hip
+++ b/src/array/cuda/array_index_select.hip
@@ -34,7 +34,8 @@ NDArray IndexSelect(NDArray array, IdArray index) {
  DType* ret_data = static_cast<DType*>(ret->data);

  const DType* array_data = static_cast<DType*>(cuda::GetDevicePointer(array));
-  const IdType* idx_data = static_cast<IdType*>(index->data);
+  // const IdType* idx_data = static_cast<IdType*>(index->data);
+  const IdType* idx_data = static_cast<IdType*>(cuda::GetDevicePointer(index));

  hipStream_t stream = runtime::getCurrentHIPStreamMasqueradingAsCUDA();
  if (num_feat == 1) {

--- a/src/runtime/cuda/gpu_cache.hip
+++ b/src/runtime/cuda/gpu_cache.hip
@@ -41,7 +41,7 @@ namespace cuda {
 template <typename key_t>
 class GpuCache : public runtime::Object {
  constexpr static int set_associativity = 2;
-  constexpr static int WARP_SIZE = 32;
+  constexpr static int WARP_SIZE = 64;
  constexpr static int bucket_size = WARP_SIZE * set_associativity;
  using gpu_cache_t = gpu_cache::gpu_cache<
      key_t, uint64_t, std::numeric_limits<key_t>::max(), set_associativity,

--- a/third_party/HugeCTR/gpu_cache/include/nv_gpu_cache.hpp
+++ b/third_party/HugeCTR/gpu_cache/include/nv_gpu_cache.hpp
@@ -30,7 +30,7 @@
 #endif

 #define SET_ASSOCIATIVITY 2
-#define SLAB_SIZE 32
+#define SLAB_SIZE 64
 #define TASK_PER_WARP_TILE_MACRO 1

 namespace gpu_cache {

--- a/third_party/HugeCTR/gpu_cache/src/nv_gpu_cache.hip
+++ b/third_party/HugeCTR/gpu_cache/src/nv_gpu_cache.hip
@@ -1251,7 +1251,7 @@ gpu_cache<key_type, ref_counter_type, empty_key, set_associativity, warp_size, s
    return;
  }
  if (warp_size != 1 && warp_size != 2 && warp_size != 4 && warp_size != 8 && warp_size != 16 &&
-      warp_size != 32) {
+      warp_size != 32 &&warp_size != 64) {
    printf("Error: Invalid value for warp_size.\n");
    return;
  }
@@ -1299,7 +1299,7 @@ gpu_cache<key_type, ref_counter_type, empty_key, set_associativity, warp_size, s
    return;
  }
  if (warp_size != 1 && warp_size != 2 && warp_size != 4 && warp_size != 8 && warp_size != 16 &&
-      warp_size != 32) {
+      warp_size != 32 && warp_size != 64) {
    printf("Error: Invalid value for warp_size.\n");
    return;
  }