/************************************************************************* * Copyright (c) Microsoft Corporation. * Licensed under the MIT License. ************************************************************************/ #include #include #include #include #include #include #include #include //cerr #include #define HIP_IPC_MEM_MIN_SIZE 2097152UL #define NUM_LOOPS_WARMUP 2000 #define NUM_LOOPS_RUN 10000 #define PING_MODE 0 #define PONG_MODE 1 __global__ void PingKernel(uint64_t* local_flag, uint64_t* remote_flag, uint64_t* time_delta) { #pragma unroll for (uint32_t i = 1; i < NUM_LOOPS_WARMUP; i++) { __atomic_store_n(remote_flag, i, __ATOMIC_RELAXED); while (__atomic_load_n(local_flag, __ATOMIC_RELAXED) != i); } uint64_t start_time = wall_clock64(); #pragma unroll for (uint32_t i = NUM_LOOPS_WARMUP; i <= NUM_LOOPS_WARMUP + NUM_LOOPS_RUN; i++) { __atomic_store_n(remote_flag, i, __ATOMIC_RELAXED); while (__atomic_load_n(local_flag, __ATOMIC_RELAXED) != i); } uint64_t end_time = wall_clock64(); *time_delta = end_time - start_time; } __global__ void PongKernel(uint64_t* local_flag, uint64_t* remote_flag, uint64_t* time_delta) { #pragma unroll for (uint32_t i = 1; i < NUM_LOOPS_WARMUP; i++) { while (__atomic_load_n(local_flag, __ATOMIC_RELAXED) != i); __atomic_store_n(remote_flag, i, __ATOMIC_RELAXED); } uint64_t start_time = wall_clock64(); #pragma unroll for (uint32_t i = NUM_LOOPS_WARMUP; i <= NUM_LOOPS_WARMUP + NUM_LOOPS_RUN; i++) { while (__atomic_load_n(local_flag, __ATOMIC_RELAXED) != i); __atomic_store_n(remote_flag, i, __ATOMIC_RELAXED); } uint64_t end_time = wall_clock64(); *time_delta = end_time - start_time; } #define HIPCHECK(cmd) \ do { \ hipError_t error = (cmd); \ if (error != hipSuccess) \ { \ std::cerr << "Encountered HIP error (" << error << ") at line " \ << __LINE__ << " in file " << __FILE__ << "\n"; \ exit(-1); \ } \ } while (0) int main(int argc, char** argv) { hipStream_t stream[2]; hipError_t err = hipSuccess; int device_id[2]; hipDeviceProp_t prop[2]; if (argc != 3) { fprintf(stderr, "Usage: ./p2p_latency_test ping_dev_id pong_dev_id\n"); return -1; } device_id[0] = atoi(argv[1]); device_id[1] = atoi(argv[2]); fprintf(stdout, "Using devices %d %d\n", device_id[0], device_id[1]); uint64_t *flag[2]; uint64_t *time_delta[2]; HIPCHECK(hipSetDevice(device_id[0])); HIPCHECK(hipStreamCreateWithFlags(&stream[0], hipStreamNonBlocking)); HIPCHECK(hipDeviceEnablePeerAccess(device_id[1], 0)); HIPCHECK(hipGetDeviceProperties(&prop[0], device_id[0])); HIPCHECK(hipExtMallocWithFlags((void**)&flag[0], HIP_IPC_MEM_MIN_SIZE, prop[0].gcnArch / 10 == 94 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained)); HIPCHECK(hipMalloc((void**)&time_delta[0], HIP_IPC_MEM_MIN_SIZE)); HIPCHECK(hipMemsetAsync(flag[0], 0, HIP_IPC_MEM_MIN_SIZE, stream[0])); HIPCHECK(hipStreamSynchronize(stream[0])); HIPCHECK(hipSetDevice(device_id[1])); HIPCHECK(hipStreamCreateWithFlags(&stream[1], hipStreamNonBlocking)); HIPCHECK(hipDeviceEnablePeerAccess(device_id[0], 0)); HIPCHECK(hipGetDeviceProperties(&prop[1], device_id[1])); HIPCHECK(hipExtMallocWithFlags((void**)&flag[1], HIP_IPC_MEM_MIN_SIZE, prop[1].gcnArch / 10 == 94 ? hipDeviceMallocUncached : hipDeviceMallocFinegrained)); HIPCHECK(hipMalloc((void**)&time_delta[1], HIP_IPC_MEM_MIN_SIZE)); HIPCHECK(hipMemsetAsync(flag[1], 0, HIP_IPC_MEM_MIN_SIZE, stream[1])); HIPCHECK(hipStreamSynchronize(stream[1])); HIPCHECK(hipSetDevice(device_id[0])); PingKernel<<<1, 1, 0, stream[0]>>>(flag[0], flag[1], time_delta[0]); HIPCHECK(hipSetDevice(device_id[1])); PongKernel<<<1, 1, 0, stream[1]>>>(flag[1], flag[0], time_delta[1]); double vega_gpu_rtc_freq; HIPCHECK(hipStreamSynchronize(stream[0])); vega_gpu_rtc_freq = (prop[0].gcnArch / 10 == 94) ? 1.0E8 : 2.5E7; fprintf(stdout, "One-way latency in us: %g\n", double(*time_delta[0]) * 1e6 / NUM_LOOPS_RUN / vega_gpu_rtc_freq / 2); HIPCHECK(hipStreamSynchronize(stream[1])); vega_gpu_rtc_freq = (prop[1].gcnArch / 10 == 94) ? 1.0E8 : 2.5E7; fprintf(stdout, "One-way latency in us: %g\n", double(*time_delta[1]) * 1e6 / NUM_LOOPS_RUN / vega_gpu_rtc_freq / 2); HIPCHECK(hipFree(flag[0])); HIPCHECK(hipFree(time_delta[0])); HIPCHECK(hipFree(flag[1])); HIPCHECK(hipFree(time_delta[1])); return 0; }