#include #include #include #include #include #if defined(BACKEND_CUDA) #include #elif defined(BACKEND_HIP) #include #else #error "BACKEND_CUDA or BACKEND_HIP must be defined" #endif namespace { #if defined(BACKEND_CUDA) const char *api_name() { return "cudaGetDevice"; } int set_device(int device) { return static_cast(cudaSetDevice(device)); } int get_device(int *device) { return static_cast(cudaGetDevice(device)); } #else const char *api_name() { return "hipGetDevice"; } int set_device(int device) { return static_cast(hipSetDevice(device)); } int get_device(int *device) { return static_cast(hipGetDevice(device)); } #endif double median(std::vector values) { std::sort(values.begin(), values.end()); return values[values.size() / 2]; } double mean(const std::vector &values) { double total = 0.0; for (double value : values) { total += value; } return total / static_cast(values.size()); } } // namespace int main(int argc, char **argv) { const int device = argc > 1 ? std::atoi(argv[1]) : 0; const int loops = argc > 2 ? std::atoi(argv[2]) : 1000000; const int rounds = argc > 3 ? std::atoi(argv[3]) : 7; const int warmup = argc > 4 ? std::atoi(argv[4]) : 10000; volatile int sink = 0; sink += set_device(device); int current = 0; for (int i = 0; i < warmup; ++i) { sink += get_device(¤t); sink += current; } std::vector samples; for (int round = 0; round < rounds; ++round) { auto start = std::chrono::steady_clock::now(); for (int i = 0; i < loops; ++i) { sink += get_device(¤t); sink += current; } auto stop = std::chrono::steady_clock::now(); double total_us = std::chrono::duration(stop - start).count(); samples.push_back(total_us / static_cast(loops)); } auto minmax = std::minmax_element(samples.begin(), samples.end()); std::printf("section,api,loops,warmup,rounds,median_us,mean_us,min_us,max_us,sink\n"); std::printf("device_query,%s,%d,%d,%d,%.9f,%.9f,%.9f,%.9f,%d\n", api_name(), loops, warmup, rounds, median(samples), mean(samples), *minmax.first, *minmax.second, static_cast(sink)); return 0; }