"tests/test_marching_cubes.py" did not exist on "9c6b58c5ad5eb809a341925add44dfc0bdf2df5e"
Unverified Commit 4c3f8a70 authored by drbh's avatar drbh Committed by GitHub
Browse files

fix: allocate tmp based on sgmv kernel if available (#2345)

* fix: allocate tmp based on sgmv kernel if available

* fix: re add copy build artifacts step for punica kernels
parent 155f9c98
......@@ -226,6 +226,8 @@ COPY --from=exllamav2-kernels-builder /usr/src/build/lib.linux-x86_64-cpython-31
COPY --from=awq-kernels-builder /usr/src/llm-awq/awq/kernels/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
# Copy build artifacts from eetq kernels builder
COPY --from=eetq-kernels-builder /usr/src/eetq/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
# Copy build artifacts from lorax punica kernels builder
COPY --from=lorax-punica-builder /usr/src/lorax-punica/server/punica_kernels/build/lib.linux-x86_64-cpython-310 /opt/conda/lib/python3.10/site-packages
# Copy build artifacts from fbgemm builder
COPY --from=fbgemm-builder /usr/src/fbgemm/fbgemm_gpu/_skbuild/linux-x86_64-3.10/cmake-install /opt/conda/lib/python3.10/site-packages
# Copy build artifacts from vllm builder
......
......@@ -151,13 +151,17 @@ def get_tmp_expand_size(size: int) -> int:
def get_tmp_tensors(
nsegments: int, lora_rank: int, device: torch.device
) -> Tuple[torch.Tensor, torch.Tensor]:
if use_cutlass_shrink(lora_rank) and has_sgmv():
use_cutlass = use_cutlass_shrink(lora_rank) and has_sgmv()
has_sgmv_available = has_sgmv()
if use_cutlass:
tmp = get_tmp_tensor_for_size(nsegments, device)
return tmp, tmp
elif has_sgmv_available:
return get_tmp_tensor(device), get_tmp_tensor_for_size(nsegments, device)
else:
tmp_shrink = get_tmp_tensor(device)
tmp_expand = get_tmp_tensor_for_size_no_kernels(nsegments, device)
return tmp_shrink, tmp_expand
tmp = get_tmp_tensor_for_size(nsegments, device)
return tmp, tmp
def lora_a_sgmv_cutlass(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment