Unverified Commit af4d18de authored by Yuting Jiang's avatar Yuting Jiang Committed by GitHub
Browse files

Benchmarks: Add benchmark - Add source code of DirectxGPUMemBw microbenchmark (#487)



**Description**
Add source code of DirectxGPUMemBw microbenchmark.

---------
Co-authored-by: default avatarv-junlinlv <v-junlinlv@microsoft.com>
parent ed027e4c
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#pragma once
#include <iostream>
#include <sstream>
#include <string>
#include "../directx_utils/Options.h"
#include "GPUMemRwBw.h"
enum Memtype {
Read,
Write,
ReadWrite,
};
const std::string MemtypeString[] = {"Read", "Write", "ReadWrite"};
class BenchmarkOptions : public Options {
public:
// Number of warm up rounds.
int num_warm_up = 0;
// Number of loop rounds of dispatch to measure the performance.
int num_loop = 0;
// Size of data for GPU mem access.
unsigned long long size;
// Run size from min_size to max_size for GPU mem access.
unsigned long long min_size = 0;
// Run size from min_size to max_size for GPU mem access.
unsigned long long max_size = 0;
// Whether check data correctness.
bool check_data = false;
// Memory operation type.
Memtype mem_type = Memtype::Write;
// Number of threads to launch.
UInt3 num_threads;
/**
* @brief Construct a new BenchmarkOptions object.
*/
BenchmarkOptions(int argc, char *argv[]) : Options(argc, argv) {}
/**
* @brief Get the option usage.
*/
void get_option_usage() override {
std::cout << "Usage: " << std::endl;
std::cout << " --num_warm_up <num_warm_up> : Number of warm up rounds." << std::endl;
std::cout << " --num_loop <num_loop> : Number of loop times to measure the performance." << std::endl;
std::cout << " --minbytes <minbytes> : Lower data size bound to test." << std::endl;
std::cout << " --maxbytes <maxbytes> : Upper data size bound to test." << std::endl;
std::cout << " --check_data <check_data> : Whether check data correctness." << std::endl;
std::cout << " --read : Memory operation type is read." << std::endl;
std::cout << " --write : Memory operation type is write." << std::endl;
std::cout << " --readwrite : Memory operation type is readwrite." << std::endl;
std::cout << " --numthreads <x>,<y>,<z> : Number of threads in 3 dimenstions to launch." << std::endl;
std::cout << " --help : Print help message." << std::endl;
}
/**
* @brief Parse the arguments.
*/
virtual void parse_arguments() override {
num_warm_up = get_cmd_line_argument_int("--num_warm_up", 0);
num_loop = get_cmd_line_argument_int("--num_loop", 1);
size = get_cmd_line_argument_ulonglong("--size", -1);
min_size = get_cmd_line_argument_int("--minbytes", 4 * 1024);
max_size =
get_cmd_line_argument_ulonglong("--maxbytes", static_cast<unsigned long long>(1LL * 1024 * 1024 * 1024));
check_data = get_cmd_line_argument_bool("--check");
if (get_cmd_line_argument_bool("--read")) {
mem_type = Memtype::Read;
}
if (get_cmd_line_argument_bool("--write")) {
mem_type = Memtype::Write;
}
if (get_cmd_line_argument_bool("--readwrite")) {
mem_type = Memtype::ReadWrite;
}
num_threads = get_cmd_line_argument_uint3("--numthreads", {256, 1, 1});
}
};
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#include <fstream>
#include <iostream>
#include <tchar.h>
#include <vector>
#include "GPUMemRwBw.h"
/*
* @brief Start benchmark.
*/
void GPUMemRwBw::Run() {
// Create GPU pipeline and device objects.
CreatePipeline();
// Prepare data and buffers.
PrepareDataAndBuffer(this->m_num_elements);
// Load shaders and root signatures.
LoadAssets();
// Start benchmark.
double time_ms = MemReadWriteBench(this->m_num_elements, opts->num_loop, opts->num_warm_up);
double bw = this->m_num_elements * sizeof(float) * opts->num_loop / time_ms / 1e6;
// Output benchmark result.
std::string mode = MemtypeString[static_cast<int>(opts->mem_type)];
cout << "GPUMemBw: " << mode << " " << opts->size << " " << bw << " GB/s" << endl;
}
/**
* @brief Allocate resouce on both CPU side and GPU side and construct a array of buffers with given length.
* @param numElement the length of data array.
*/
void GPUMemRwBw::PrepareDataAndBuffer(SIZE_T numElement) {
// Prepare CPU side data.
std::vector<float> dataA(numElement);
for (SIZE_T i = 0; i < numElement; i++) {
dataA[i] = i % 256;
}
// Allocate resources on GPU side to take those data.
UINT64 byteSize = dataA.size() * sizeof(float);
if (opts->mem_type == Memtype::Write || opts->mem_type == Memtype::ReadWrite) {
m_inputBuffer =
CreateDefaultBuffer(m_device.Get(), m_commandList.Get(), dataA.data(), byteSize, m_uploadBuffer);
}
// Allocate upload buffer to upload data from CPU to GPU.
ThrowIfFailed(m_device->CreateCommittedResource(
get_rvalue_ptr(CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_DEFAULT)), D3D12_HEAP_FLAG_NONE,
get_rvalue_ptr(CD3DX12_RESOURCE_DESC::Buffer(byteSize, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS)),
D3D12_RESOURCE_STATE_UNORDERED_ACCESS, nullptr, IID_PPV_ARGS(&m_outputBuffer)));
// Allocate readback buffer if needed.
if (opts->check_data && opts->mem_type != Memtype::Read) {
// Allocate readback buffer to check result correctness
ThrowIfFailed(m_device->CreateCommittedResource(
get_rvalue_ptr(CD3DX12_HEAP_PROPERTIES(D3D12_HEAP_TYPE_READBACK)), D3D12_HEAP_FLAG_NONE,
get_rvalue_ptr(CD3DX12_RESOURCE_DESC::Buffer(byteSize)), D3D12_RESOURCE_STATE_COPY_DEST, nullptr,
IID_PPV_ARGS(&m_readbackBuffer)));
}
// Prepare the parameter buffer of shader.
UINT8 *pCBDataBegin;
CD3DX12_HEAP_PROPERTIES heapProperties(D3D12_HEAP_TYPE_UPLOAD);
CD3DX12_RESOURCE_DESC bufferDesc = CD3DX12_RESOURCE_DESC::Buffer(sizeof(ParameterBuffer));
ThrowIfFailed(m_device->CreateCommittedResource(&heapProperties, D3D12_HEAP_FLAG_NONE, &bufferDesc,
D3D12_RESOURCE_STATE_GENERIC_READ, nullptr,
IID_PPV_ARGS(&m_constantBuffer)));
// Fill the constant buffer to pass parameters to GPU.
ParameterBuffer param;
// Calculate total number of threads.
SIZE_T totalThreadNum = 1LL * (m_num_dispatch.x * m_num_dispatch.y * m_num_dispatch.z) *
(m_num_thread.x * m_num_thread.y * m_num_thread.z);
param.numLoop = numElement / totalThreadNum;
param.numThread = m_num_thread;
// Upload constant buffer.
param.numDispatch = m_num_dispatch;
ThrowIfFailed(m_constantBuffer->Map(0, nullptr, reinterpret_cast<void **>(&pCBDataBegin)));
memcpy(pCBDataBegin, &param, sizeof(param));
m_constantBuffer->Unmap(0, nullptr);
// Commit resource allocation command list.
ExecuteWaitForCommandQueue();
}
/**
* @brief Check result correctness.
* @param numElement the length of data array.
* @return true if result is correct.
*/
bool GPUMemRwBw::CheckData(SIZE_T numElement) {
// Readback result to check correctness.
m_commandList->ResourceBarrier(
1, get_rvalue_ptr(CD3DX12_RESOURCE_BARRIER::Transition(m_outputBuffer.Get(), D3D12_RESOURCE_STATE_COMMON,
D3D12_RESOURCE_STATE_COPY_SOURCE)));
m_commandList->CopyResource(m_readbackBuffer.Get(), m_outputBuffer.Get());
m_commandList->ResourceBarrier(
1, get_rvalue_ptr(CD3DX12_RESOURCE_BARRIER::Transition(m_outputBuffer.Get(), D3D12_RESOURCE_STATE_COPY_SOURCE,
D3D12_RESOURCE_STATE_COMMON)));
// Execute copy back and sync.
ExecuteWaitForCommandQueue();
// Access from CPU.
float *mappedData = nullptr;
ThrowIfFailed(m_readbackBuffer->Map(0, nullptr, reinterpret_cast<void **>(&mappedData)));
for (int i = 0; i < numElement; ++i) {
if ((int)mappedData[i] != i % 256) {
cout << "Error: check data failed - index " << i << " should be " << i % 256 << " but got "
<< (int)mappedData[i] << endl;
break;
}
}
m_readbackBuffer->Unmap(0, nullptr);
return true;
}
/**
* @brief Memory read write benchmark.
* @param numElem the length of data array.
* @return double the time elapsed in ms.
*/
double GPUMemRwBw::MemReadWriteBench(SIZE_T numElem, int loops, int numWarmUp) {
// Start test.
m_gpuTimer.init(m_device.Get(), m_commandQueue.Get(), 1, D3D12::QueueType::compute);
for (int i = 0; i < loops + numWarmUp; i++) {
if (i == numWarmUp) {
// Start timestamp.
m_gpuTimer.start(m_commandList.Get(), 0);
}
UInt3 dispatch = m_num_dispatch;
m_commandList->Dispatch(dispatch.x, dispatch.y, dispatch.z);
}
// Stop timestamp.
m_gpuTimer.stop(m_commandList.Get(), 0);
m_gpuTimer.resolveQueryToCPU(m_commandList.Get(), 0);
// Close, execute (and optionally reset) the command list, and also to use a fence to wait for the command queue.
ExecuteWaitForCommandQueue();
// Get time in ms.
double timeInMs = m_gpuTimer.getElapsedMsByTimestampPair(0);
if (opts->check_data && opts->mem_type != Memtype::Read) {
CheckData(numElem);
}
return timeInMs;
}
/**
* @brief Create pipeline including
* create device object, command list, command queue
* and synchronization objects.
*/
void GPUMemRwBw::CreatePipeline() {
UINT dxgiFactoryFlags = 0;
#if _DEBUG
// Enable the debug layer (requires the Graphics Tools "optional feature").
// NOTE: Enabling the debug layer after device creation will invalidate the active device.
{
ComPtr<ID3D12Debug> debugController;
if (SUCCEEDED(D3D12GetDebugInterface(IID_PPV_ARGS(&debugController)))) {
debugController->EnableDebugLayer();
// Enable additional debug layers.
dxgiFactoryFlags |= DXGI_CREATE_FACTORY_DEBUG;
}
}
#endif
ComPtr<IDXGIFactory4> factory;
ThrowIfFailed(CreateDXGIFactory2(dxgiFactoryFlags, IID_PPV_ARGS(&factory)));
ComPtr<IDXGIAdapter1> hardwareAdapter;
GetHardwareAdapter(factory.Get(), &hardwareAdapter);
ThrowIfFailed(D3D12CreateDevice(hardwareAdapter.Get(), D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&m_device)));
D3D12_COMMAND_QUEUE_DESC cqd3 = {};
cqd3.Type = D3D12_COMMAND_LIST_TYPE_DIRECT;
ThrowIfFailed(m_device->CreateCommandQueue(&cqd3, IID_PPV_ARGS(&m_commandQueue)));
ThrowIfFailed(m_device->CreateCommandAllocator(D3D12_COMMAND_LIST_TYPE_DIRECT, IID_PPV_ARGS(&m_commandAllocator)));
// Create the command list.
ThrowIfFailed(m_device->CreateCommandList(0, D3D12_COMMAND_LIST_TYPE_DIRECT, m_commandAllocator.Get(), nullptr,
IID_PPV_ARGS(&m_commandList)));
// Create synchronization objects.
ThrowIfFailed(m_device->CreateFence(0, D3D12_FENCE_FLAG_NONE, IID_PPV_ARGS(&m_fence)));
m_fenceValue = 1;
// Create an event handle to use for GPU synchronization.
m_eventHandle = CreateEvent(0, false, false, 0);
}
/**
* @brief Setup GPU pipeline resource including creating root signature, pipeline state and compile shader.
*/
void GPUMemRwBw::LoadAssets() {
// Prepare root signature, root parameter can be a table, root descriptor or root constants.
const int nParamter = 3;
CD3DX12_ROOT_PARAMETER slotRootParameter[nParamter];
// Bind the SRV, CBV and UAV descriptor tables to the root parameters.
slotRootParameter[0].InitAsShaderResourceView(0);
slotRootParameter[1].InitAsConstantBufferView(0);
slotRootParameter[2].InitAsUnorderedAccessView(0);
// Create the root signature.
// A root signature is an array of root parameters.
CD3DX12_ROOT_SIGNATURE_DESC rootSigDesc(nParamter, slotRootParameter, 0, nullptr, D3D12_ROOT_SIGNATURE_FLAG_NONE);
ComPtr<ID3DBlob> serializedRootSig = nullptr;
ComPtr<ID3DBlob> errorBlob = nullptr;
HRESULT hr = D3D12SerializeRootSignature(&rootSigDesc, D3D_ROOT_SIGNATURE_VERSION_1,
serializedRootSig.GetAddressOf(), errorBlob.GetAddressOf());
if (hr != S_OK || errorBlob != nullptr) {
std::cout << "Error: " << (char *)errorBlob->GetBufferPointer() << std::endl;
throw runtime_error("Error: D3D12SerializeRootSignature failed.");
}
ThrowIfFailed(m_device->CreateRootSignature(0, serializedRootSig->GetBufferPointer(),
serializedRootSig->GetBufferSize(),
IID_PPV_ARGS(m_rootSignature.GetAddressOf())));
// Define the number of threads per thread group.
// LPCSTR pointer obtained from myString.c_str() is only valid as long as the myString object exists.
std::string x_str = std::to_string(m_num_thread.x);
LPCSTR x_val = x_str.c_str();
std::string y_str = std::to_string(m_num_thread.y);
LPCSTR y_val = y_str.c_str();
std::string z_str = std::to_string(m_num_thread.z);
LPCSTR z_val = z_str.c_str();
D3D_SHADER_MACRO defines[] = {
{"X", x_val},
{"Y", y_val},
{"Z", z_val},
{nullptr, nullptr} // The last entry must be nullptr to indicate the end of the array
};
// Load and Compile shader according to user specified.
switch (opts->mem_type) {
case Memtype::Read:
m_shader = CompileShader(L"ReadWrite.hlsl", defines, "Read", "cs_5_0");
break;
case Memtype::Write:
m_shader = CompileShader(L"ReadWrite.hlsl", defines, "Write", "cs_5_0");
break;
case Memtype::ReadWrite:
m_shader = CompileShader(L"ReadWrite.hlsl", defines, "ReadWrite", "cs_5_0");
break;
default:
std::cout << "Error: Invalid memory type." << std::endl;
exit(1);
}
// Describe and create the graphics pipeline state object (PSO).
D3D12_COMPUTE_PIPELINE_STATE_DESC computePsoDesc = {};
computePsoDesc.pRootSignature = m_rootSignature.Get();
computePsoDesc.CS = {reinterpret_cast<BYTE *>(m_shader->GetBufferPointer()), m_shader->GetBufferSize()};
computePsoDesc.Flags = D3D12_PIPELINE_STATE_FLAG_NONE;
ThrowIfFailed(m_device->CreateComputePipelineState(&computePsoDesc, IID_PPV_ARGS(&m_PSO)));
ExecuteWaitForCommandQueue();
// Setup root signature for pipeline.
m_commandList->SetPipelineState(m_PSO.Get());
m_commandList->SetComputeRootSignature(m_rootSignature.Get());
if (opts->mem_type == Memtype::Write || opts->mem_type == Memtype::ReadWrite) {
m_commandList->SetComputeRootShaderResourceView(0, m_inputBuffer->GetGPUVirtualAddress());
}
m_commandList->SetComputeRootConstantBufferView(1, m_constantBuffer->GetGPUVirtualAddress());
m_commandList->SetComputeRootUnorderedAccessView(2, m_outputBuffer->GetGPUVirtualAddress());
}
/**
* @brief Create a default buffer and upload data with the upload buffer.
* @param device the GPU device object.
* @param cmdList the GPU command list object.
* @param initData the data that need to upload.
* @param byteSize the size of data that need to upload.
* @param uploadBuffer the upload that use for upload data.
* @return a constant buffer object.
*/
Microsoft::WRL::ComPtr<ID3D12Resource>
GPUMemRwBw::CreateDefaultBuffer(ID3D12Device *device, ID3D12GraphicsCommandList *cmdList, const void *initData,
UINT64 byteSize, Microsoft::WRL::ComPtr<ID3D12Resource> &uploadBuffer) {
ComPtr<ID3D12Resource> defaultBuffer;
// Create target default buffer.
CD3DX12_HEAP_PROPERTIES DefaultHeap(D3D12_HEAP_TYPE_DEFAULT);
CD3DX12_RESOURCE_DESC defaultResourceDesc = CD3DX12_RESOURCE_DESC::Buffer(byteSize);
ThrowIfFailed(device->CreateCommittedResource(&DefaultHeap, D3D12_HEAP_FLAG_NONE, &defaultResourceDesc,
D3D12_RESOURCE_STATE_COMMON, nullptr,
IID_PPV_ARGS(defaultBuffer.GetAddressOf())));
// Create a temporary upload buffer to upload data.
CD3DX12_HEAP_PROPERTIES UploadHeap(D3D12_HEAP_TYPE_UPLOAD);
CD3DX12_RESOURCE_DESC UploadResourceDesc = CD3DX12_RESOURCE_DESC::Buffer(byteSize);
ThrowIfFailed(device->CreateCommittedResource(&UploadHeap, D3D12_HEAP_FLAG_NONE, &UploadResourceDesc,
D3D12_RESOURCE_STATE_GENERIC_READ, nullptr,
IID_PPV_ARGS(uploadBuffer.GetAddressOf())));
// Upload data that pass in.
D3D12_SUBRESOURCE_DATA subResourceData = {};
subResourceData.pData = initData;
subResourceData.RowPitch = byteSize;
subResourceData.SlicePitch = subResourceData.RowPitch;
// Commit copy command list.
CD3DX12_RESOURCE_BARRIER WriteBarrier = CD3DX12_RESOURCE_BARRIER::Transition(
defaultBuffer.Get(), D3D12_RESOURCE_STATE_COMMON, D3D12_RESOURCE_STATE_COPY_DEST);
cmdList->ResourceBarrier(1, &WriteBarrier);
UpdateSubresources<1>(cmdList, defaultBuffer.Get(), uploadBuffer.Get(), 0, 0, 1, &subResourceData);
CD3DX12_RESOURCE_BARRIER ReadBarrier = CD3DX12_RESOURCE_BARRIER::Transition(
defaultBuffer.Get(), D3D12_RESOURCE_STATE_COPY_DEST, D3D12_RESOURCE_STATE_GENERIC_READ);
cmdList->ResourceBarrier(1, &ReadBarrier);
return defaultBuffer;
}
/**
* @brief Execute the commands and wait until command completed.
*/
void GPUMemRwBw::ExecuteWaitForCommandQueue(DWORD dwMilliseconds) {
// Close, execute (and optionally reset) the command list, and also to use a fence to wait for the command queue.
ThrowIfFailed(m_commandList->Close());
ID3D12CommandList *listsToExecute[] = {m_commandList.Get()};
m_commandQueue->ExecuteCommandLists(ARRAYSIZE(listsToExecute), listsToExecute);
// Signal and increment the fence value.
const UINT64 fenceL = m_fenceValue;
ThrowIfFailed(m_commandQueue->Signal(m_fence.Get(), fenceL));
m_fenceValue++;
// Wait until command queue is done.
if (m_fence->GetCompletedValue() < fenceL) {
ThrowIfFailed(m_fence->SetEventOnCompletion(fenceL, m_eventHandle));
WaitForSingleObject(m_eventHandle, dwMilliseconds);
}
// Reset the command allocator and command list.
ID3D12CommandAllocator *activeAllocator = m_commandAllocator.Get();
ThrowIfFailed(activeAllocator->Reset());
ThrowIfFailed(m_commandList->Reset(activeAllocator, nullptr));
}
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#pragma once
#ifndef WIN32_LEAN_AND_MEAN
#define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers.
#endif
#include <algorithm>
#include <chrono>
#include <random>
#include <string>
#include <vector>
#include <wrl.h>
#include <D3Dcompiler.h>
#include <DirectXMath.h>
#include <d3d12.h>
#include <d3d12shader.h>
#include <dxgi1_6.h>
#include "../directx_third_party/DXSampleHelper.h"
#include "../directx_third_party/d3dx12.h"
#include "../directx_utils/D3D12Timer.h"
#include "BenchmarkOptions.h"
// linker
#pragma comment(lib, "dxguid.lib")
#pragma comment(lib, "dxgi.lib")
#pragma comment(lib, "d3d12.lib")
#pragma comment(lib, "d3dcompiler.lib")
#if defined(_DEBUG)
#include <dxgidebug.h>
#endif
using namespace DirectX;
// Note that while ComPtr is used to manage the lifetime of resources on the CPU,
// it has no understanding of the lifetime of resources on the GPU. Apps must account
// for the GPU lifetime of resources to avoid destroying objects that may still be
// referenced by the GPU.
// An example of this can be found in the class method: OnDestroy().
using Microsoft::WRL::ComPtr;
using namespace std;
struct ParameterBuffer {
int numLoop;
UInt3 numThread;
UInt3 numDispatch;
};
template <typename T> T *get_rvalue_ptr(T &&v) { return &v; }
class GPUMemRwBw {
public:
/**
* @brief Constructor, initialize the options.
* @param opts, Options for construct.
* @param usize, the byte size of data array.
*/
GPUMemRwBw(BenchmarkOptions *opts) : opts(opts) {
// The setting of num_thread need be consistent with the the shader file.
m_num_thread = opts->num_threads;
m_num_elements = opts->size / sizeof(float);
uint32_t numThreadGroup = m_num_elements / (m_num_thread.x * m_num_thread.y * m_num_thread.z);
m_num_dispatch = {numThreadGroup, 1, 1};
}
/**
* @brief Destructor, release the fence.
*/
~GPUMemRwBw() {}
/**
* @brief Start and run the benchmark.
*/
void Run();
/**
* @brief Memory read write benchmark.
* @param numElem the length of data array.
* @param loops the number of dispatch tiems for measuring the performance.
* @param numWarmUp the number of warm up dispatch times.
* @return double the time elapsed in ms.
*/
double MemReadWriteBench(SIZE_T numElem, int loops, int numWarmUp);
/**
* @brief Create pipeline including
* create device object, command list, command queue
* and synchronization objects.
*/
void CreatePipeline();
/**
* @brief Setup GPU pipeline resource including creating root signature, pipeline state and compile shader.
*/
void LoadAssets();
/**
* @brief Allocate resouce on both CPU side and GPU side and construct a array of buffers with given length.
* @param numElement the length of data array.
*/
void PrepareDataAndBuffer(SIZE_T numElement);
/**
* @brief Create a default buffer and upload data with the upload buffer.
* @param device the GPU device object.
* @param cmdList the GPU command list object.
* @param initData the data that need to upload.
* @param byteSize the size of data that need to upload.
* @param UploadBuffer the upload that use for upload data.
* @return a constant buffer object.
*/
Microsoft::WRL::ComPtr<ID3D12Resource> CreateDefaultBuffer(ID3D12Device *device, ID3D12GraphicsCommandList *cmdList,
const void *initData, UINT64 byteSize,
Microsoft::WRL::ComPtr<ID3D12Resource> &uploadBuffer);
/**
* @brief Execute the commands and wait until command completed.
*/
void ExecuteWaitForCommandQueue(DWORD dwMilliseconds = 30000);
/**
* @brief Check result correctness.
* @param numElement the length of data array.
* @return true if result is correct.
*/
bool CheckData(SIZE_T numElement);
private:
// Dispatch layout of command.
UInt3 m_num_dispatch;
// Number of elements in data buffer.
uint32_t m_num_elements = 0;
// Number of threads each group.
UInt3 m_num_thread;
// Pipeline objects.
ComPtr<ID3D12Device> m_device = nullptr;
ComPtr<ID3D12CommandAllocator> m_commandAllocator = nullptr;
ComPtr<ID3D12CommandQueue> m_commandQueue = nullptr;
ComPtr<ID3D12GraphicsCommandList> m_commandList = nullptr;
// Upload buffer to upload data from CPU to GPU.
ComPtr<ID3D12Resource> m_uploadBuffer = nullptr;
// Input buffer to pass data into GPU.
ComPtr<ID3D12Resource> m_inputBuffer = nullptr;
// Readback buffer to copy data from GPU to CPU for data check.
ComPtr<ID3D12Resource> m_readbackBuffer = nullptr;
// Output buffer.
ComPtr<ID3D12Resource> m_outputBuffer = nullptr;
// Constant buffer.
ComPtr<ID3D12Resource> m_constantBuffer = nullptr;
// Root signature of GPU pipeline.
ComPtr<ID3D12RootSignature> m_rootSignature = nullptr;
// Pipeline object to execute.
ComPtr<ID3D12PipelineState> m_PSO = nullptr;
// Shader objects that loaded.
ComPtr<ID3DBlob> m_shader = nullptr;
// Synchronization objects.
ComPtr<ID3D12Fence1> m_fence = nullptr;
HANDLE m_eventHandle = nullptr;
UINT64 m_fenceValue = 0;
// GPU timer.
D3D12::D3D12Timer m_gpuTimer;
// User options.
BenchmarkOptions *opts;
};
<?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|x64">
<Configuration>Debug</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
<ProjectConfiguration Include="Release|x64">
<Configuration>Release</Configuration>
<Platform>x64</Platform>
</ProjectConfiguration>
</ItemGroup>
<PropertyGroup Label="Globals">
<VCProjectVersion>16.0</VCProjectVersion>
<Keyword>Win32Proj</Keyword>
<ProjectGuid>{7880ced5-0e93-4003-9f9b-2ed29bc4bd0f}</ProjectGuid>
<RootNamespace>GPUMemRwBw</RootNamespace>
<WindowsTargetPlatformVersion>10.0</WindowsTargetPlatformVersion>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>true</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
<ConfigurationType>Application</ConfigurationType>
<UseDebugLibraries>false</UseDebugLibraries>
<PlatformToolset>v143</PlatformToolset>
<WholeProgramOptimization>true</WholeProgramOptimization>
<CharacterSet>Unicode</CharacterSet>
</PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
<ImportGroup Label="ExtensionSettings">
</ImportGroup>
<ImportGroup Label="Shared">
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
</ImportGroup>
<PropertyGroup Label="UserMacros" />
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<WarningLevel>Level3</WarningLevel>
<FunctionLevelLinking>true</FunctionLevelLinking>
<IntrinsicFunctions>true</IntrinsicFunctions>
<SDLCheck>true</SDLCheck>
<PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<ConformanceMode>true</ConformanceMode>
</ClCompile>
<Link>
<SubSystem>Console</SubSystem>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<OptimizeReferences>true</OptimizeReferences>
<GenerateDebugInformation>true</GenerateDebugInformation>
</Link>
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="../directx_utils/D3D12Timer.cpp"/>
<ClCompile Include="Main.cpp"/>
<ClCompile Include="GPUMemRwBw.cpp"/>
</ItemGroup>
<ItemGroup>
<ClInclude Include="../directx_utils/D3D12Timer.h"/>
<ClInclude Include="../directx_utils/Options.h"/>
<ClInclude Include="../directx_third_party/d3dx12.h"/>
<ClInclude Include="../directx_third_party/DXSampleHelper.h"/>
<ClInclude Include="GPUMemRwBw.h"/>
<ClInclude Include="BenchmarkOptions.h"/>
</ItemGroup>
<ItemGroup>
<CustomBuild Include="ReadWrite.hlsl">
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">Compute</ShaderType>
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">4.0</ShaderModel>
<ShaderType Condition="'$(Configuration)|$(Platform)'=='Release|x64'">Compute</ShaderType>
<ShaderModel Condition="'$(Configuration)|$(Platform)'=='Release|x64'">4.0</ShaderModel>
<FileType>Document</FileType>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</ExcludedFromBuild>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">copy %(Identity) "$(OutDir)" &gt; NUL</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">$(OutDir)\%(Identity)</Outputs>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release|x64'">false</ExcludedFromBuild>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">copy %(Identity) "$(OutDir)" &gt; NUL</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">$(OutDir)\%(Identity)</Outputs>
</CustomBuild>
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">
</ImportGroup>
</Project>
\ No newline at end of file
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
#include <iostream>
#include <sstream>
#include "GPUMemRwBw.h"
int main(int argc, char *argv[]) {
BenchmarkOptions option(argc, argv);
option.init();
if (option.size != -1) {
// Run only one size
GPUMemRwBw benchmark(&option);
benchmark.Run();
} else {
// Run all sizes
for (SIZE_T usize = option.min_size; usize <= option.max_size; usize += usize) {
option.size = usize;
GPUMemRwBw benchmark(&option);
benchmark.Run();
}
}
}
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
StructuredBuffer<float> gInputA : register(t0);
RWStructuredBuffer<float> gOutput : register(u0);
cbuffer ParamBuffer : register(b0) {
int numLoop;
uint3 numThreads;
uint3 numDispatch;
};
[numthreads(X, Y, Z)]
void Read(uint threadID : SV_GroupIndex, uint3 groupID : SV_GroupID, uint3 dispatchId : SV_DispatchThreadID)
{
uint idStart = dispatchId.x +
dispatchId.y * numDispatch.x * numThreads.x +
dispatchId.z * numDispatch.x * numThreads.x * numDispatch.y * numThreads.y;
uint start = idStart * numLoop;
uint end = start + numLoop;
for (uint i = start; i < end; i++)
{
float c = gOutput[i];
if (c == -1)
{
// This condition should never access since gOutput init as zero.
// It is for avoid compile optimization.
gOutput[i] = 0;
}
}
}
[numthreads(X, Y, Z)]
void Write(uint threadID : SV_GroupIndex, uint3 groupID : SV_GroupID, uint3 dispatchId : SV_DispatchThreadID)
{
uint idStart = dispatchId.x +
dispatchId.y * numDispatch.x * numThreads.x +
dispatchId.z * numDispatch.x * numThreads.x * numDispatch.y * numThreads.y;
uint start = idStart * numLoop;
uint end = start + numLoop;
for (uint i = start; i < end; i++)
{
gOutput[i] = i % 256;
}
}
[numthreads(X, Y, Z)]
void ReadWrite(uint threadID : SV_GroupIndex, uint3 groupID : SV_GroupID, uint3 dispatchId : SV_DispatchThreadID)
{
uint idStart = dispatchId.x +
dispatchId.y * numDispatch.x * numThreads.x +
dispatchId.z * numDispatch.x * numThreads.x * numDispatch.y * numThreads.y;
uint start = idStart * numLoop;
uint end = start + numLoop;
for (uint i = start; i < end; i++)
{
gOutput[i] = gInputA[i];
}
}
......@@ -7,6 +7,12 @@
#include <sstream>
#include <string>
struct UInt3 {
unsigned int x;
unsigned int y;
unsigned int z;
};
class Options {
protected:
char **begin;
......@@ -43,6 +49,51 @@ class Options {
return defaults;
}
/**
* @brief Get the unsigned long long type value of cmd line argument.
* @param option the cmd line argument.
* @param defaults the default value.
* @return unsigned long long the unsigned long long type value of cmd line argument 'option'.
*/
std::vector<unsigned int> splitAndConvertToInt(const std::string &str) {
std::vector<unsigned int> result;
std::stringstream ss(str);
std::string token;
while (std::getline(ss, token, ',')) {
try {
result.push_back(std::stoul(token));
} catch (std::invalid_argument &e) {
throw std::invalid_argument("Invalid argument: " + token + e.what());
}
}
return result;
}
/**
* @brief Get the unsigned int type value of cmd line argument.
* @param option the cmd line argument.
* @param defaults the default value.
* @return unsigned int the unsigned int type value of cmd line argument 'option'.
*/
UInt3 get_cmd_line_argument_uint3(const std::string &option, const UInt3 &defaults) {
if (char *value = get_cmd_option(option)) {
try {
std::vector<unsigned int> values = splitAndConvertToInt(value);
if (values.size() != 3) {
std::cout << "Error: Invalid argument - " << option << " should be unsigned int3" << '\n';
exit(1);
}
return {values[0], values[1], values[2]};
} catch (const std::exception &e) {
std::cout << "Error: Invalid argument - " << option << " should be unsigned int3" << e.what() << '\n';
exit(1);
}
}
return defaults;
}
/**
* @brief Get the string type value of cmd line argument.
* @param option the cmd line argument.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment