Unverified Commit 33a5435c authored by gilbertlee-amd's avatar gilbertlee-amd Committed by GitHub
Browse files

v1.44 Adding rwrite preset benchmark (#77)

parent 30f1c584
......@@ -3,6 +3,12 @@
Documentation for TransferBench is available at
[https://rocm.docs.amd.com/projects/TransferBench](https://rocm.docs.amd.com/projects/TransferBench).
## v1.44
### Additions
* Adding rwrite preset to benchmark remote parallel writes
* Usage: ./TransferBench rwrite <numBytes=64M> <#CUs=8> <srcGpu=0> <minGpus=1> <maxGpus=3>
## v1.43
### Changes
......
......@@ -149,6 +149,32 @@ int main(int argc, char **argv)
} while (curr < N * 2);
}
}
else if (!strcmp(argv[1], "rwrite"))
{
if (ev.numGpuDevices < 2)
{
printf("[ERROR] Remote write benchmark requires at least 2 GPUs\n");
exit(1);
}
ev.DisplayRemoteWriteEnvVars();
int numSubExecs = (argc > 3 ? atoi(argv[3]) : 8);
int srcIdx = (argc > 4 ? atoi(argv[4]) : 0);
int minGpus = (argc > 5 ? atoi(argv[5]) : 1);
int maxGpus = (argc > 6 ? atoi(argv[6]) : std::min(ev.numGpuDevices - 1, 3));
for (int N = 256; N <= (1<<27); N *= 2)
{
int delta = std::max(1, N / ev.samplingFactor);
int curr = (numBytesPerTransfer == 0) ? N : numBytesPerTransfer / sizeof(float);
do
{
RunRemoteWriteBenchmark(ev, curr * sizeof(float), numSubExecs, srcIdx, minGpus, maxGpus);
if (numBytesPerTransfer != 0) exit(0);
curr += delta;
} while (curr < N * 2);
}
}
else if (!strcmp(argv[1], "cmdline"))
{
// Print environment variables and CSV header
......@@ -2393,6 +2419,72 @@ void RunSchmooBenchmark(EnvVars const& ev, size_t const numBytesPerTransfer, int
}
}
void RunRemoteWriteBenchmark(EnvVars const& ev, size_t const numBytesPerTransfer, int numSubExecs, int const srcIdx, int minGpus, int maxGpus)
{
char memType = ev.useFineGrain ? 'F' : 'G';
printf("Bytes to write: %lu from GPU %d using %d CUs [Sweeping %d to %d parallel writes]\n", numBytesPerTransfer, srcIdx, numSubExecs, minGpus, maxGpus);
for (int i = 0; i < ev.numGpuDevices; i++)
{
if (i == srcIdx) continue;
printf(" GPU %3d ", i);
}
printf("\n");
for (int i = 0; i < ev.numGpuDevices-1; i++)
{
printf("-------------");
}
printf("\n");
for (int p = minGpus; p <= maxGpus; p++)
{
for (int bitmask = 0; bitmask < (1<<ev.numGpuDevices); bitmask++)
{
if (bitmask & (1<<srcIdx)) continue;
if (__builtin_popcount(bitmask) == p)
{
std::vector<Transfer> transfers;
for (int i = 0; i < ev.numGpuDevices; i++)
{
if (bitmask & (1<<i))
{
Transfer t;
t.dstType.resize(1);
t.dstIndex.resize(1);
t.exeType = EXE_GPU_GFX;
t.exeIndex = srcIdx;
t.exeSubIndex = -1;
t.numSubExecs = numSubExecs;
t.numBytes = numBytesPerTransfer;
t.numSrcs = 0;
t.numDsts = 1;
t.dstType[0] = (ev.useFineGrain ? MEM_GPU_FINE : MEM_GPU);
t.dstIndex[0] = i;
transfers.push_back(t);
}
}
ExecuteTransfers(ev, 0, 0, transfers, false);
int counter = 0;
for (int i = 0; i < ev.numGpuDevices; i++)
{
if (bitmask & (1<<i))
printf(" %8.3f ", transfers[counter++].transferBandwidth);
else if (i != srcIdx)
printf(" ");
}
for (auto i = 0; i < transfers.size(); i++)
{
printf(" (N0 G%d %c%d)", srcIdx, MemTypeStr[transfers[i].dstType[0]], transfers[i].dstIndex[0]);
}
printf("\n");
}
}
printf("\n");
}
}
void RunSweepPreset(EnvVars const& ev, size_t const numBytesPerTransfer, int const numGpuSubExecs, int const numCpuSubExecs, bool const isRandom)
{
......
......@@ -29,7 +29,7 @@ THE SOFTWARE.
#include "Compatibility.hpp"
#include "Kernels.hpp"
#define TB_VERSION "1.43"
#define TB_VERSION "1.44"
extern char const MemTypeStr[];
extern char const ExeTypeStr[];
......@@ -41,7 +41,8 @@ enum ConfigModeEnum
CFG_SWEEP = 2,
CFG_SCALE = 3,
CFG_A2A = 4,
CFG_SCHMOO = 5
CFG_SCHMOO = 5,
CFG_RWRITE = 6
};
enum BlockOrderEnum
......@@ -739,6 +740,17 @@ public:
std::string("Using ") + (useFineGrain ? "fine" : "coarse") + "-grained memory");
}
void DisplayRemoteWriteEnvVars() const
{
DisplayEnvVars();
if (hideEnv) return;
if (!outputToCsv)
printf("[Remote-Write Related]\n");
PRINT_EV("USE_FINE_GRAIN", useFineGrain,
std::string("Using ") + (useFineGrain ? "fine" : "coarse") + "-grained memory");
}
// Helper function that gets parses environment variable or sets to default value
static int GetEnvVar(std::string const& varname, int defaultValue)
{
......
......@@ -194,6 +194,7 @@ void RunScalingBenchmark(EnvVars const& ev, size_t N, int const exeIndex, int co
void RunSweepPreset(EnvVars const& ev, size_t const numBytesPerTransfer, int const numGpuSubExec, int const numCpuSubExec, bool const isRandom);
void RunAllToAllBenchmark(EnvVars const& ev, size_t const numBytesPerTransfer, int const numSubExecs);
void RunSchmooBenchmark(EnvVars const& ev, size_t const numBytesPerTransfer, int const localIdx, int const remoteIdx, int const maxSubExecs);
void RunRemoteWriteBenchmark(EnvVars const& ev, size_t const numBytesPerTransfer, int numSubExecs, int const srcIdx, int minGpus, int maxGpus);
std::string GetLinkTypeDesc(uint32_t linkType, uint32_t hopCount);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment