Unverified Commit 4cb53ecd authored by Yineng Zhang's avatar Yineng Zhang Committed by GitHub
Browse files

fix: log warning when disable cuda graph (#5209)

parent 456b008b
...@@ -269,10 +269,10 @@ class CudaGraphRunner: ...@@ -269,10 +269,10 @@ class CudaGraphRunner:
raise Exception( raise Exception(
f"Capture cuda graph failed: {e}\n" f"Capture cuda graph failed: {e}\n"
"Possible solutions:\n" "Possible solutions:\n"
"1. disable cuda graph by --disable-cuda-graph\n" "1. set --mem-fraction-static to a smaller value (e.g., 0.8 or 0.7)\n"
"2. set --mem-fraction-static to a smaller value (e.g., 0.8 or 0.7)\n" "2. set --cuda-graph-max-bs to a smaller value (e.g., 32)\n"
"3. disable torch compile by not using --enable-torch-compile\n" "3. disable torch compile by not using --enable-torch-compile\n"
"4. set --cuda-graph-max-bs to a smaller value (e.g., 32)\n" "4. disable cuda graph by --disable-cuda-graph\n"
"Open an issue on GitHub https://github.com/sgl-project/sglang/issues/new/choose \n" "Open an issue on GitHub https://github.com/sgl-project/sglang/issues/new/choose \n"
) )
......
...@@ -924,6 +924,12 @@ class ModelRunner: ...@@ -924,6 +924,12 @@ class ModelRunner:
return return
if self.server_args.disable_cuda_graph: if self.server_args.disable_cuda_graph:
logger.warning(
"\n\nCUDA Graph is DISABLED.\n"
"This will cause significant performance degradation.\n"
"CUDA Graph should almost never be disabled in most usage scenarios.\n"
"If you encounter OOM issues, please try setting --mem-fraction-static to a lower value (such as 0.8 or 0.7) instead of disabling CUDA Graph.\n"
)
return return
tic = time.time() tic = time.time()
......
...@@ -84,10 +84,10 @@ class EAGLEDraftCudaGraphRunner: ...@@ -84,10 +84,10 @@ class EAGLEDraftCudaGraphRunner:
raise Exception( raise Exception(
f"Capture cuda graph failed: {e}\n" f"Capture cuda graph failed: {e}\n"
"Possible solutions:\n" "Possible solutions:\n"
"1. disable cuda graph by --disable-cuda-graph\n" "1. set --mem-fraction-static to a smaller value (e.g., 0.8 or 0.7)\n"
"2. set --mem-fraction-static to a smaller value (e.g., 0.8 or 0.7)\n" "2. disable torch compile by not using --enable-torch-compile\n"
"3. disable torch compile by not using --enable-torch-compile\n" "3. specify --dtype to the same dtype (e.g. bfloat16)\n"
"4. specify --dtype to the same dtype (e.g. bfloat16)\n" "4. disable cuda graph by --disable-cuda-graph\n"
"Open an issue on GitHub https://github.com/sgl-project/sglang/issues/new/choose \n" "Open an issue on GitHub https://github.com/sgl-project/sglang/issues/new/choose \n"
) )
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment