GPU_ARCH ?= gfx936
CXX_COMPILER ?= hipcc
CXX_FLAGS ?= -std=c++17 -O3

TARGET := gemv_bench
SRC := main.cpp
DEP := gemv_bf16.h gemv_utils.h hip_compat.h

IS_HIPCC := $(findstring hipcc,$(CXX_COMPILER))

# 根据编译器调整 Flags
ifneq (,$(IS_HIPCC))
    # HIPCC
    ARCH_FLAGS := --offload-arch=$(GPU_ARCH)
else
    # NVCC
    ARCH_FLAGS := -arch=$(GPU_ARCH) -x cu
endif

.PHONY: all clean

all: $(TARGET)

$(TARGET): $(SRC) $(DEP)
	$(CXX_COMPILER) $(CXX_FLAGS) $(ARCH_FLAGS) -o $@ $<

clean:
	rm -f $(TARGET) *.o
