Unverified Commit 62e2e99d authored by Xinyuan Tong's avatar Xinyuan Tong Committed by GitHub
Browse files

fix: make inference deterministic for large TP (#10930)


Co-authored-by: default avataryhyang201 <yhyang201@gmail.com>
Co-authored-by: default avatarYangmin Li <yangminl@nvidia.com>
Co-authored-by: default avatarYuan Luo <yuan.luo@hotmail.com>
Co-authored-by: default avatargemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
parent 8ebf72fe
......@@ -1166,15 +1166,12 @@ class ServerArgs:
# Check TP size
if self.tp_size > 1:
raise ValueError(
"Currently only TP size 1 is supported for deterministic inference."
os.environ["NCCL_ALGO"] = "allreduce:tree"
self.disable_custom_all_reduce = True
logger.warning(
"NCCL_ALGO is set to 'allreduce:tree' and custom all reduce is disabled for deterministic inference when TP size > 1."
)
# Warnings on MoE models
logger.warning(
"Currently deterministic inference is only tested on dense models. Please be cautious when using it on MoE models."
)
def _handle_other_validations(self):
pass
......
......@@ -19,7 +19,7 @@ from sglang.profiler import run_profile
PROMPT_1 = "Tell me about Richard Feynman: "
PROMPT_2 = "Generate 1000 random numbers. Go directly into it, don't say Sure and don't say here are numbers. Just start with a number."
dirpath = os.path.dirname(__file__)
with open("python/sglang/test/long_prompt.txt", "r") as f:
with open(os.path.join(dirpath, "long_prompt.txt"), "r") as f:
LONG_PROMPT = f.read()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment