Commit 3bb2e7a5 authored by one's avatar one
Browse files

Update gemv benchnmark scripts

- Remove gemv_export.cpp
- Update Makefile and README for compiler variable changes
- Adjust run-all.sh for consistent build commands
parent 0fe0b01f
GPU_ARCH ?= gfx936
CXX ?= hipcc
CXX_COMPILER ?= hipcc
CXX_FLAGS ?= -std=c++17 -O3
TARGET := gemv_bench
SRC := main.cpp
LIB := libgemv_bf16.so
LIB_SRC := gemv_export.cpp
DEP := gemv_bf16.h gemv_utils.h hip_compat.h
IS_HIPCC := $(findstring hipcc,$(CXX))
IS_HIPCC := $(findstring hipcc,$(CXX_COMPILER))
# 根据编译器调整 Flags
ifneq (,$(IS_HIPCC))
......@@ -19,18 +17,12 @@ else
ARCH_FLAGS := -arch=$(GPU_ARCH) -x cu
endif
.PHONY: all clean lib lib.so
.PHONY: all clean
all: $(TARGET)
lib: $(LIB)
lib.so: $(LIB)
$(LIB): $(LIB_SRC) $(DEP)
$(CXX) $(CXX_FLAGS) $(ARCH_FLAGS) -shared -fPIC -o $@ $<
$(TARGET): $(SRC) $(DEP)
$(CXX) $(CXX_FLAGS) $(ARCH_FLAGS) -o $@ $<
$(CXX_COMPILER) $(CXX_FLAGS) $(ARCH_FLAGS) -o $@ $<
clean:
rm -f $(TARGET) $(LIB) *.o lib lib.so
rm -f $(TARGET) *.o
......@@ -13,10 +13,10 @@ GEMV Benchmarks
```bash
# 使用 HIPCC:
CXX=hipcc make GPU_ARCH=gfx936
make CXX_COMPILER=hipcc GPU_ARCH=gfx936
# 使用 NVCC:
CXX=nvcc make GPU_ARCH=sm_80
make CXX_COMPILER=nvcc GPU_ARCH=sm_80
```
## Run
......
#include "gemv_bf16.h"
extern "C" {
/** y = alpha * A^T * x + beta * y
*
* @param d_A: input matrix A
* @param M: number of rows of A
* @param K: number of columns of A
* @param lda: leading dimension of A
* @param d_x: input vector x
* @param d_y: output vector y
* @param alpha: scaling factor for A^T * x
* @param beta: scaling factor for y
*/
void gemv_bf16_TN_vec_warp_unroll_ntl(hip_bfloat16 *d_A, int M, int K, int lda,
hip_bfloat16 *d_x, hip_bfloat16 *d_y,
float alpha, float beta) {
constexpr bool USE_NTL = true;
constexpr int UNROLL = 4;
int block_size = 128;
int warps_per_block = block_size / WARP_SIZE;
int grid = (M + warps_per_block - 1) / warps_per_block;
dim3 grid_dim(grid);
dim3 block_dim(block_size);
gemv_bf16_TN_vec_warp_unroll<USE_NTL, UNROLL>
<<<grid_dim, block_dim>>>(M, K, alpha, d_A, lda, d_x, beta, d_y);
return;
}
} // extern "C"
#!/bin/bash
set -e
# BW1000
export HIP_VISIBLE_DEVICES=1
BIND_CMD="numactl -N 1 -m 1"
# BW150
# export HIP_VISIBLE_DEVICES=1
# BIND_CMD="numactl -N 0 -m 0"
# BW1000
export HIP_VISIBLE_DEVICES=1
BIND_CMD="numactl -N 1 -m 1"
make clean
CXX=hipcc make GPU_ARCH=gfx936
# CXX=nvcc make GPU_ARCH=sm_80
make CXX_COMPILER=hipcc GPU_ARCH=gfx936
# make CXX_COMPILER=nvcc GPU_ARCH=sm_80
W1="--verify 1 -M 11264 -K 4096 --alpha 1 --beta 0 -B 128"
W2="--verify 1 -M 4096 -K 11264 --alpha 1 --beta 0 -B 128"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment