Unverified Commit 01f98730 authored by Lifu Huang's avatar Lifu Huang Committed by GitHub
Browse files

Fix CI test OOM issue. (#7799)

parent 199d6218
...@@ -38,6 +38,8 @@ PROMPTS = [ ...@@ -38,6 +38,8 @@ PROMPTS = [
"What are the main components of a computer?", "What are the main components of a computer?",
] ]
MEM_FRACTION_STATIC = 0.8
class OperationType(Enum): class OperationType(Enum):
LOAD = "load" LOAD = "load"
...@@ -339,6 +341,7 @@ class LoRAUpdateEngineTestSession(LoRAUpdateTestSessionBase): ...@@ -339,6 +341,7 @@ class LoRAUpdateEngineTestSession(LoRAUpdateTestSessionBase):
lora_paths=self.lora_paths, lora_paths=self.lora_paths,
lora_backend=self.lora_backend, lora_backend=self.lora_backend,
torch_dtype=torch.float16, torch_dtype=torch.float16,
mem_fraction_static=MEM_FRACTION_STATIC,
max_loras_per_batch=self.max_loras_per_batch, max_loras_per_batch=self.max_loras_per_batch,
disable_cuda_graph=self.disable_cuda_graph, disable_cuda_graph=self.disable_cuda_graph,
cuda_graph_max_bs=self.cuda_graph_max_bs, cuda_graph_max_bs=self.cuda_graph_max_bs,
...@@ -440,6 +443,8 @@ class LoRAUpdateServerTestSession(LoRAUpdateTestSessionBase): ...@@ -440,6 +443,8 @@ class LoRAUpdateServerTestSession(LoRAUpdateTestSessionBase):
"42", "42",
"--max-running-request", "--max-running-request",
"1", "1",
"--mem-fraction-static",
str(MEM_FRACTION_STATIC),
] ]
if self.disable_cuda_graph: if self.disable_cuda_graph:
other_args.append("--disable-cuda-graph") other_args.append("--disable-cuda-graph")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment