[Kernel] Add punica dimension for Baichuan-13B (#4053)

989ae253 · Jee Li · GitHub · 0a430b4a · 989ae253 · 989ae253
Unverified Commit 989ae253 authored Apr 13, 2024 by Jee Li Committed by GitHub Apr 13, 2024
Showing with 3 additions and 1 deletion

csrc/punica/bgmv/bgmv_config.h csrc/punica/bgmv/bgmv_config.h +1 -0

tests/lora/test_baichuan.py tests/lora/test_baichuan.py +1 -1

tests/lora/test_punica.py tests/lora/test_punica.py +1 -0

No files found.
--- a/csrc/punica/bgmv/bgmv_config.h
+++ b/csrc/punica/bgmv/bgmv_config.h
@@ -47,6 +47,7 @@ void bgmv_kernel(out_T *__restrict__ Y, const in_T *__restrict__ X,
    f(in_T, out_T, W_T, narrow, 13696) \
    f(in_T, out_T, W_T, narrow, 13824) \
    f(in_T, out_T, W_T, narrow, 14336) \
+    f(in_T, out_T, W_T, narrow, 15360) \
    f(in_T, out_T, W_T, narrow, 16384) \
    f(in_T, out_T, W_T, narrow, 20480) \
    f(in_T, out_T, W_T, narrow, 22016) \

--- a/tests/lora/test_baichuan.py
+++ b/tests/lora/test_baichuan.py
@@ -62,7 +62,7 @@ def test_baichuan_lora(baichuan_lora_files):
 @pytest.mark.skip("Requires multiple GPUs")
-def test_llama_tensor_parallel_equality(baichuan_lora_files):
+def test_baichuan_tensor_parallel_equality(baichuan_lora_files):
    # Cannot use as it will initialize torch.cuda too early...
    # if torch.cuda.device_count() < 4:
    #     pytest.skip(f"Not enough GPUs for tensor parallelism {4}")

--- a/tests/lora/test_punica.py
+++ b/tests/lora/test_punica.py
@@ -72,6 +72,7 @@ H1 = H2 = [
    11008,
    13824,
    14336,
+    15360,
    22016,
    24576,
    27392,