Unverified Commit 068c5ce1 authored by Pedram Alizadeh's avatar Pedram Alizadeh Committed by GitHub
Browse files

Merge pull request #13 from PedramAlizadeh/documentation

Added documentation, pthread linking, and Hex value printing for better debugging
parents a438ef95 125dfffc
# Changelog for TransferBench # Changelog for TransferBench
## v1.14
### Added
- Added documentation
- Added pthread linking in src/Makefile and CMakeLists.txt
- Added printing off the hex value of the floats for output and reference
## v1.13 ## v1.13
### Added ### Added
- Added support for cmake - Added support for cmake
......
...@@ -6,9 +6,9 @@ else() ...@@ -6,9 +6,9 @@ else()
endif() endif()
cmake_minimum_required(VERSION 3.5) cmake_minimum_required(VERSION 3.5)
project(TransferBench CXX) project(TransferBench CXX)
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -L${ROCM_PATH}/hsa/lib") set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3 -L${ROCM_PATH}/lib")
include_directories(${ROCM_PATH}/hsa/include) include_directories(${ROCM_PATH}/include)
link_libraries(numa hsa-runtime64) link_libraries(numa hsa-runtime64 pthread)
set (CMAKE_RUNTIME_OUTPUT_DIRECTORY ..) set (CMAKE_RUNTIME_OUTPUT_DIRECTORY ..)
add_executable(TransferBench src/TransferBench.cpp) add_executable(TransferBench src/TransferBench.cpp)
target_include_directories(TransferBench PRIVATE src/include) target_include_directories(TransferBench PRIVATE src/include)
......
*******************************************
Welcome to TransferBench's documentation!
*******************************************
TransferBench is a simple utility capable of benchmarking simultaneous transfers between user-specified devices (CPUs/GPUs).
A Transfer is defined as a single operation where an executor reads and adds together values from source (SRC) memory locations, then writes the sum to destination (DST) memory locations. This simplifies to a simple copy operation when dealing with single SRC/DST.
The user has control over the SRC and DST memory locations by indicating memory type followed by the device index. TransferBench supports coarse-grained pinned host memory, unpinned host memory, fine-grained host memory, coarse-grained global device memory, fine-grained global device memory, and null memory (for an empty transfer). In addition, the user can determine the size of the transfer (number of bytes to copy) for their tests.
The executor of the transfer can also be specified by the user. The options are CPU, kernel-based GPU, and SDMA-based GPU (DMA) executors. TransferBench also provides the option to choose the number of sub-executors. In case of a CPU executor this argument specifies the number of CPU threads, while for a GPU executor it defines the number of compute units (CU). If DMA is specified as the executor, the sub-executor argument determines the number of streams to be used.
For more examples, please refer to the example.cfg file in the examples folder.
...@@ -3,12 +3,12 @@ ROCM_PATH ?= /opt/rocm ...@@ -3,12 +3,12 @@ ROCM_PATH ?= /opt/rocm
HIPCC=$(ROCM_PATH)/bin/hipcc HIPCC=$(ROCM_PATH)/bin/hipcc
EXE=TransferBench EXE=TransferBench
CXXFLAGS = -O3 -I. -Iinclude -I$(ROCM_PATH)/hsa/include -lnuma -L$(ROCM_PATH)/hsa/lib -lhsa-runtime64 CXXFLAGS = -O3 -I. -Iinclude -I$(ROCM_PATH)/include -lnuma -L$(ROCM_PATH)/lib -lhsa-runtime64
LDFLAGS += -lpthread
all: $(EXE) all: $(EXE)
$(EXE): $(EXE).cpp $(shell find -regex ".*\.\hpp") $(EXE): $(EXE).cpp $(shell find -regex ".*\.\hpp")
$(HIPCC) $(CXXFLAGS) $< -o ../$@ $(HIPCC) $(CXXFLAGS) $< -o ../$@ $(LDFLAGS)
clean: clean:
rm -f *.o ../$(EXE) rm -f *.o ../$(EXE)
...@@ -1452,8 +1452,7 @@ void Transfer::ValidateDst(EnvVars const& ev) ...@@ -1452,8 +1452,7 @@ void Transfer::ValidateDst(EnvVars const& ev)
{ {
if (reference[i] != output[i]) if (reference[i] != output[i])
{ {
printf("\n[ERROR] Destination array %d value at index %lu (%.3f) does not match expected value (%.3f)\n", printf("\n[ERROR] Destination array %d value at index %lu (%.3f) [%X] does not match expected value (%.3f) [%X]\n", dstIdx, i, output[i], *(unsigned int*)&output[i], reference[i], *(unsigned int*)&reference[i]);
dstIdx, i, output[i], reference[i]);
printf("[ERROR] Failed Transfer details: #%d: %s -> [%c%d:%d] -> %s\n", printf("[ERROR] Failed Transfer details: #%d: %s -> [%c%d:%d] -> %s\n",
this->transferIndex, this->transferIndex,
this->SrcToStr().c_str(), this->SrcToStr().c_str(),
......
...@@ -28,7 +28,7 @@ THE SOFTWARE. ...@@ -28,7 +28,7 @@ THE SOFTWARE.
#include <time.h> #include <time.h>
#include "Kernels.hpp" #include "Kernels.hpp"
#define TB_VERSION "1.13" #define TB_VERSION "1.14"
extern char const MemTypeStr[]; extern char const MemTypeStr[];
extern char const ExeTypeStr[]; extern char const ExeTypeStr[];
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment