printf("[ERROR] Duplicate executor type '%c' specified for sweep executor\n",ch);
exit(1);
}
}
}
// Display info on the env vars that can be used
...
...
@@ -179,7 +270,9 @@ public:
printf(" BLOCK_BYTES=B - Each CU (except the last) receives a multiple of BLOCK_BYTES to copy\n");
printf(" BYTE_OFFSET - Initial byte-offset for memory allocations. Must be multiple of 4. Defaults to 0\n");
printf(" FILL_PATTERN=STR - Fill input buffer with pattern specified in hex digits (0-9,a-f,A-F). Must be even number of digits, (byte-level big-endian)\n");
printf(" NUM_CPU_DEVICES=X - Restrict number of CPUs to X. May not be greater than # detected NUMA nodes\n");
printf(" NUM_CPU_PER_TRANSFER=C - Use C threads per Transfer for CPU-executed copies\n");
printf(" NUM_GPU_DEVICES=X - Restrict number of GCPUs to X. May not be greater than # detected HIP devices\n");
printf(" NUM_ITERATIONS=I - Perform I timed iteration(s) per test\n");
printf(" NUM_WARMUPS=W - Perform W untimed warmup iteration(s) per test\n");
printf(" OUTPUT_TO_CSV - Outputs to CSV format if set\n");
...
...
@@ -207,8 +300,10 @@ public:
else
printf("Pseudo-random: (Element i = i modulo 383 + 31)");
printf("\n");
printf("%-20s = %12d : Using %d CPU thread(s) per CPU-based-copy Transfer\n","NUM_CPU_PER_TRANSFER",numCpuPerTransfer,numCpuPerTransfer);
printf("%-20s = %12d : Running %d %s per topology\n","NUM_ITERATIONS",numIterations,
printf("%-20s = %12d : Using %d CPU devices\n","NUM_CPU_DEVICES",numCpuDevices,numCpuDevices);
printf("%-20s = %12d : Using %d CPU thread(s) per CPU-executed Transfer\n","NUM_CPU_PER_TRANSFER",numCpuPerTransfer,numCpuPerTransfer);
printf("%-20s = %12d : Using %d GPU devices\n","NUM_GPU_DEVICES",numGpuDevices,numGpuDevices);
printf("%-20s = %12d : Running %d %s per test\n","NUM_ITERATIONS",numIterations,