Add missing kernel for CodeLlama-34B on A/H100 (no tensor parallelism) when...

Add missing kernel for CodeLlama-34B on A/H100 (no tensor parallelism) when using Multi-LoRA. (#3350)

Add missing kernel for CodeLlama-34B on A/H100 (no tensor parallelism) when...
Add missing kernel for CodeLlama-34B on A/H100 (no tensor parallelism) when using Multi-LoRA. (#3350)
ae0ccb40 · Or Sharir · GitHub · 739c350c · ae0ccb40 · ae0ccb40
Unverified Commit ae0ccb40 authored Mar 13, 2024 by Or Sharir Committed by GitHub Mar 13, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 1 deletion

csrc/punica/bgmv/bgmv_config.h csrc/punica/bgmv/bgmv_config.h +1 -0

tests/lora/test_punica.py tests/lora/test_punica.py +1 -1

No files found.
--- a/csrc/punica/bgmv/bgmv_config.h
+++ b/csrc/punica/bgmv/bgmv_config.h
@@ -43,6 +43,7 @@ void bgmv_kernel(out_T *__restrict__ Y, const in_T *__restrict__ X,
    f(in_T, out_T, W_T, narrow, 14336) \
    f(in_T, out_T, W_T, narrow, 16384) \
    f(in_T, out_T, W_T, narrow, 20480) \
+    f(in_T, out_T, W_T, narrow, 22016) \
    f(in_T, out_T, W_T, narrow, 24576) \
    f(in_T, out_T, W_T, narrow, 28672) \
    f(in_T, out_T, W_T, narrow, 32000) \

--- a/tests/lora/test_punica.py
+++ b/tests/lora/test_punica.py
@@ -45,7 +45,7 @@ def _lora_ref_impl(
 H1 = H2 = [
    128, 256, 512, 1024, 1280, 2048, 2560, 2752, 3072, 3456, 3584, 4096, 5120,
    5504, 5632, 6144, 6912, 7168, 8192, 9216, 10240, 11008, 13824, 14336,
-    24576, 32000, 32256, 32512, 32768, 33024
+    22016, 24576, 32000, 32256, 32512, 32768, 33024
 ]
 SEED = [0xabcdabcd987]