"vscode:/vscode.git/clone" did not exist on "d03c097677929e4e1cd6563875389bd66564ef09"
Unverified Commit 02199790 authored by ishandhanani's avatar ishandhanani Committed by GitHub
Browse files

chore: update low latency fp8 (#4499)

parent c0c664a9
......@@ -83,6 +83,7 @@ if [ "$mode" = "prefill" ]; then
if [[ "${USE_INIT_LOCATIONS,,}" == "true" ]]; then command_suffix="--init-expert-location /configs/prefill_dsr1-0528_in1000out1000_num40000.json"; fi
if [[ -n "${DUMP_CONFIG_PATH}" ]]; then command_suffix="${command_suffix} --dump-config-to ${DUMP_CONFIG_PATH}"; fi
SGLANG_ENABLE_JIT_DEEPGEMM=false \
DYN_SKIP_SGLANG_LOG_FORMATTING=1 \
MC_TE_METRIC=true \
SGLANG_ENABLE_FLASHINFER_GEMM=1 \
......@@ -140,6 +141,7 @@ elif [ "$mode" = "decode" ]; then
if [[ "${USE_INIT_LOCATIONS,,}" == "true" ]]; then command_suffix="--init-expert-location /configs/decode_dsr1-0528_loadgen_in1024out1024_num2000_2p12d.json"; fi
if [[ -n "${DUMP_CONFIG_PATH}" ]]; then command_suffix="${command_suffix} --dump-config-to ${DUMP_CONFIG_PATH}"; fi
SGLANG_ENABLE_JIT_DEEPGEMM=false \
DYN_SKIP_SGLANG_LOG_FORMATTING=1 \
MC_TE_METRIC=true \
SGLANG_ENABLE_FLASHINFER_GEMM=1 \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment