Unverified Commit 21b44473 authored by Dmitry Tokarev's avatar Dmitry Tokarev Committed by GitHub
Browse files

feat: lmcache on CUDA 13 and ARM (#7534)


Signed-off-by: default avatarDmitry Tokarev <dtokarev@nvidia.com>
parent 2263defc
......@@ -231,7 +231,7 @@ echo "\n=== Installing LMCache from source ==="
# (undefined symbol: c10::cuda::c10_cuda_check_implementation).
# Build from source AFTER vLLM so c_ops.so compiles against the installed PyTorch.
# Ref: https://docs.lmcache.ai/getting_started/installation.html#install-latest-lmcache-from-source
if [ "$DEVICE" = "cuda" ] && [[ "$CUDA_VERSION_MAJOR" == "12" ]] && [ "$ARCH" = "amd64" ]; then
if [ "$DEVICE" = "cuda" ]; then
git clone --depth 1 --branch v${LMCACHE_REF} https://github.com/LMCache/LMCache.git ${INSTALLATION_DIR}/lmcache
cd ${INSTALLATION_DIR}/lmcache
uv pip install -r requirements/build.txt
......@@ -256,7 +256,7 @@ elif [ "$DEVICE" = "xpu" ] && [ "$ARCH" = "amd64" ]; then
uv pip install lmcache==${LMCACHE_REF}
echo "✓ LMCache ${LMCACHE_REF} installed from PyPI (XPU)"
else
echo "⚠ Skipping LMCache (ARM64 or CUDA 13 not supported)"
echo "⚠ Skipping LMCache for DEVICE=${DEVICE} ARCH=${ARCH} (not supported)"
fi
if [ "$DEVICE" = "cuda" ]; then
......
......@@ -148,10 +148,6 @@ vllm_configs = {
), # KV cache cap (2x safety over min=559_693_824)
pytest.mark.timeout(360), # ~7x observed 49.0s; old value before profiling
pytest.mark.pre_merge,
pytest.mark.skipif(
_is_cuda13(),
reason="lmcache does not support CUDA 13 as of v0.3.11",
),
],
model="Qwen/Qwen3-0.6B",
request_payloads=[
......@@ -174,10 +170,6 @@ vllm_configs = {
), # KV cache cap (2x safety over min=559_693_824)
pytest.mark.timeout(360), # ~7x observed 49.3s; old value before profiling
pytest.mark.pre_merge,
pytest.mark.skipif(
_is_cuda13(),
reason="lmcache does not support CUDA 13 as of v0.3.11",
),
],
model="Qwen/Qwen3-0.6B",
env={
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment