useDmaCopy=GetEnvVar("USE_GPU_DMA",0);// Needed for numGpuSubExec
useDmaCopy=GetEnvVar("USE_GPU_DMA",0);// Needed for numGpuSubExec
...
@@ -221,6 +226,7 @@ public:
...
@@ -221,6 +226,7 @@ public:
// A2A Benchmark related
// A2A Benchmark related
a2aDirect=GetEnvVar("A2A_DIRECT",1);
a2aDirect=GetEnvVar("A2A_DIRECT",1);
a2aMode=GetEnvVar("A2A_MODE",0);
// Determine random seed
// Determine random seed
char*sweepSeedStr=getenv("SWEEP_SEED");
char*sweepSeedStr=getenv("SWEEP_SEED");
...
@@ -401,14 +407,14 @@ public:
...
@@ -401,14 +407,14 @@ public:
printf("[ERROR] Number of GPUs to use (%d) cannot exceed number of detected GPUs (%d)\n",numGpuDevices,numDetectedGpus);
printf("[ERROR] Number of GPUs to use (%d) cannot exceed number of detected GPUs (%d)\n",numGpuDevices,numDetectedGpus);
exit(1);
exit(1);
}
}
if(blockSize%64)
if(gfxBlockSize%64)
{
{
printf("[ERROR] BLOCK_SIZE (%d) must be a multiple of 64\n",blockSize);
printf("[ERROR] GFX_BLOCK_SIZE (%d) must be a multiple of 64\n",gfxBlockSize);
exit(1);
exit(1);
}
}
if(blockSize>MAX_BLOCKSIZE)
if(gfxBlockSize>MAX_BLOCKSIZE)
{
{
printf("[ERROR] BLOCK_SIZE (%d) must be less than %d\n",blockSize,MAX_BLOCKSIZE);
printf("[ERROR] BLOCK_SIZE (%d) must be less than %d\n",gfxBlockSize,MAX_BLOCKSIZE);
exit(1);
exit(1);
}
}
if(byteOffset%sizeof(float))
if(byteOffset%sizeof(float))
...
@@ -494,9 +500,22 @@ public:
...
@@ -494,9 +500,22 @@ public:
exit(1);
exit(1);
}
}
}
}
if(gpuKernel<0||gpuKernel>NUM_GPU_KERNELS)
if(a2aMode<0||a2aMode>2)
{
printf("[ERROR] a2aMode must be between 0 and 2\n");
exit(1);
}
if(gfxUnroll<1||gfxUnroll>MAX_UNROLL)
{
printf("[ERROR] GFX kernel unroll factor must be between 1 and %d\n",MAX_UNROLL);
exit(1);
}
if(gfxWaveOrder<0||gfxWaveOrder>=6)
{
{
printf("[ERROR] GPU kernel must be between 0 and %d\n",NUM_GPU_KERNELS);
printf("[ERROR] GFX wave order must be between 0 and 5\n");
exit(1);
exit(1);
}
}
...
@@ -533,6 +552,12 @@ public:
...
@@ -533,6 +552,12 @@ public:
exit(1);
exit(1);
}
}
if(getenv("GPU_KERNEL"))
{
printf("[WARN] GPU_KERNEL has been deprecated and replaced by GFX_KERNEL and GFX_UNROLL\n");
exit(1);
}
char*enableSdma=getenv("HSA_ENABLE_SDMA");
char*enableSdma=getenv("HSA_ENABLE_SDMA");
if(enableSdma&&!strcmp(enableSdma,"0"))
if(enableSdma&&!strcmp(enableSdma,"0"))
{
{
...
@@ -553,6 +578,9 @@ public:
...
@@ -553,6 +578,9 @@ public:
printf(" CONTINUE_ON_ERROR - Continue tests even after mismatch detected\n");
printf(" CONTINUE_ON_ERROR - Continue tests even after mismatch detected\n");
printf(" CU_MASK - CU mask for streams specified in hex digits (0-0,a-f,A-F)\n");
printf(" CU_MASK - CU mask for streams specified in hex digits (0-0,a-f,A-F)\n");
printf(" FILL_PATTERN=STR - Fill input buffer with pattern specified in hex digits (0-9,a-f,A-F). Must be even number of digits, (byte-level big-endian)\n");
printf(" FILL_PATTERN=STR - Fill input buffer with pattern specified in hex digits (0-9,a-f,A-F). Must be even number of digits, (byte-level big-endian)\n");
printf(" GFX_UNROLL - Unroll factor for GFX kernel (0=auto), must be less than %d\n",MAX_UNROLL);
printf(" GFX_SINGLE_TEAM - Have subexecutors work together on full array instead of working on individual disjoint subarrays\n");
printf(" GFX_WAVE_ORDER - Stride pattern for GFX kernel (0=UWC,1=UCW,2=WUC,3=WCU,4=CUW,5=CWU)\n");
printf(" HIDE_ENV - Hide environment variable value listing\n");
printf(" HIDE_ENV - Hide environment variable value listing\n");
printf(" NUM_CPU_DEVICES=X - Restrict number of CPUs to X. May not be greater than # detected NUMA nodes\n");
printf(" NUM_CPU_DEVICES=X - Restrict number of CPUs to X. May not be greater than # detected NUMA nodes\n");
printf(" NUM_GPU_DEVICES=X - Restrict number of GPUs to X. May not be greater than # detected HIP devices\n");
printf(" NUM_GPU_DEVICES=X - Restrict number of GPUs to X. May not be greater than # detected HIP devices\n");