device_query.cpp 2.31 KB
Newer Older
one's avatar
one committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#include <algorithm>
#include <chrono>
#include <cstdio>
#include <cstdlib>
#include <vector>

#if defined(BACKEND_CUDA)
#include <cuda_runtime_api.h>
#elif defined(BACKEND_HIP)
#include <hip/hip_runtime_api.h>
#else
#error "BACKEND_CUDA or BACKEND_HIP must be defined"
#endif

namespace {

#if defined(BACKEND_CUDA)
const char *api_name() { return "cudaGetDevice"; }
int set_device(int device) { return static_cast<int>(cudaSetDevice(device)); }
int get_device(int *device) { return static_cast<int>(cudaGetDevice(device)); }
#else
const char *api_name() { return "hipGetDevice"; }
int set_device(int device) { return static_cast<int>(hipSetDevice(device)); }
int get_device(int *device) { return static_cast<int>(hipGetDevice(device)); }
#endif

double median(std::vector<double> values) {
  std::sort(values.begin(), values.end());
  return values[values.size() / 2];
}

double mean(const std::vector<double> &values) {
  double total = 0.0;
  for (double value : values) {
    total += value;
  }
  return total / static_cast<double>(values.size());
}

} // namespace

int main(int argc, char **argv) {
  const int device = argc > 1 ? std::atoi(argv[1]) : 0;
  const int loops = argc > 2 ? std::atoi(argv[2]) : 1000000;
  const int rounds = argc > 3 ? std::atoi(argv[3]) : 7;
  const int warmup = argc > 4 ? std::atoi(argv[4]) : 10000;

  volatile int sink = 0;
  sink += set_device(device);

  int current = 0;
  for (int i = 0; i < warmup; ++i) {
    sink += get_device(&current);
    sink += current;
  }

  std::vector<double> samples;
  for (int round = 0; round < rounds; ++round) {
    auto start = std::chrono::steady_clock::now();
    for (int i = 0; i < loops; ++i) {
      sink += get_device(&current);
      sink += current;
    }
    auto stop = std::chrono::steady_clock::now();
    double total_us =
        std::chrono::duration<double, std::micro>(stop - start).count();
    samples.push_back(total_us / static_cast<double>(loops));
  }

  auto minmax = std::minmax_element(samples.begin(), samples.end());
  std::printf("section,api,loops,warmup,rounds,median_us,mean_us,min_us,max_us,sink\n");
  std::printf("device_query,%s,%d,%d,%d,%.9f,%.9f,%.9f,%.9f,%d\n",
              api_name(), loops, warmup, rounds, median(samples),
              mean(samples), *minmax.first, *minmax.second,
              static_cast<int>(sink));
  return 0;
}