Unverified Commit f7244d1c authored by jthomson04's avatar jthomson04 Committed by GitHub
Browse files

fix: Fix flaky KVBM disagg accuracy test (#4135)


Signed-off-by: default avatarjthomson04 <jwillthomson19@gmail.com>
parent 3d036fc4
......@@ -213,6 +213,21 @@ class LLMServerManager:
# Give frontend time to start up
time.sleep(5)
model = os.environ.get(
"KVBM_MODEL_ID", "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
)
# Try to download the model.
print("Attempting model download...")
try:
subprocess.run(
f"pip install hf_transfer && HF_HUB_ENABLE_HF_TRANSFER=1 hf download {model}",
check=True,
shell=True,
)
except subprocess.CalledProcessError:
print("Model download failed. Is this a locally stored model?")
# Launch decoder
self.process_decoder = subprocess.Popen(
self.decoder_cmd,
......@@ -223,11 +238,6 @@ class LLMServerManager:
)
print(f"Decoder process started with PID: {self.process_decoder.pid}")
# The prefiller and decoder cannot download the model simultaneously,
# because the Hugging Face rust library (invoked by fetch_llm) needs to hold an exclusive lock on the model files.
print("Sleeping for 60 seconds to allow the decoder to download the model. ")
time.sleep(60)
# Launch prefiller
self.process_prefiller = subprocess.Popen(
self.prefiller_cmd,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment