Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
dcb5624a
Commit
dcb5624a
authored
Apr 29, 2025
by
zhuwenwen
Browse files
Merge tag 'v0.8.5' into v0.8.5-dev
parents
55880ca2
ba41cc90
Changes
690
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
24 additions
and
4 deletions
+24
-4
.buildkite/lm-eval-harness/configs/DeepSeek-V2-Lite-Chat.yaml
...ldkite/lm-eval-harness/configs/DeepSeek-V2-Lite-Chat.yaml
+1
-0
.buildkite/lm-eval-harness/configs/Meta-Llama-3-70B-Instruct-FBGEMM-nonuniform.yaml
.../configs/Meta-Llama-3-70B-Instruct-FBGEMM-nonuniform.yaml
+1
-0
.buildkite/lm-eval-harness/configs/Meta-Llama-3-70B-Instruct.yaml
...te/lm-eval-harness/configs/Meta-Llama-3-70B-Instruct.yaml
+1
-0
.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-Channelwise-compressed-tensors.yaml
...a-Llama-3-8B-Instruct-Channelwise-compressed-tensors.yaml
+1
-0
.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-FBGEMM-nonuniform.yaml
...s/configs/Meta-Llama-3-8B-Instruct-FBGEMM-nonuniform.yaml
+1
-0
.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-FP8-compressed-tensors.yaml
...figs/Meta-Llama-3-8B-Instruct-FP8-compressed-tensors.yaml
+1
-0
.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-FP8.yaml
...lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-FP8.yaml
+1
-0
.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-INT8-compressed-tensors-asym.yaml
...eta-Llama-3-8B-Instruct-INT8-compressed-tensors-asym.yaml
+1
-0
.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-INT8-compressed-tensors.yaml
...igs/Meta-Llama-3-8B-Instruct-INT8-compressed-tensors.yaml
+1
-0
.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-nonuniform-compressed-tensors.yaml
...ta-Llama-3-8B-Instruct-nonuniform-compressed-tensors.yaml
+1
-0
.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct.yaml
...ite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct.yaml
+2
-1
.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-QQQ.yaml
.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-QQQ.yaml
+1
-0
.buildkite/lm-eval-harness/configs/Meta-Llama-3.2-1B-Instruct-INT8-compressed-tensors.yaml
...s/Meta-Llama-3.2-1B-Instruct-INT8-compressed-tensors.yaml
+1
-0
.buildkite/lm-eval-harness/configs/Minitron-4B-Base-FP8.yaml
.buildkite/lm-eval-harness/configs/Minitron-4B-Base-FP8.yaml
+1
-0
.buildkite/lm-eval-harness/configs/Mixtral-8x22B-Instruct-v0.1-FP8-Dynamic.yaml
...ness/configs/Mixtral-8x22B-Instruct-v0.1-FP8-Dynamic.yaml
+1
-0
.buildkite/lm-eval-harness/configs/Mixtral-8x7B-Instruct-v0.1-FP8.yaml
...-eval-harness/configs/Mixtral-8x7B-Instruct-v0.1-FP8.yaml
+1
-0
.buildkite/lm-eval-harness/configs/Mixtral-8x7B-Instruct-v0.1.yaml
...e/lm-eval-harness/configs/Mixtral-8x7B-Instruct-v0.1.yaml
+2
-1
.buildkite/lm-eval-harness/configs/Qwen1.5-MoE-W4A16-compressed-tensors.yaml
...harness/configs/Qwen1.5-MoE-W4A16-compressed-tensors.yaml
+3
-2
.buildkite/lm-eval-harness/configs/Qwen2-1.5B-Instruct-FP8W8.yaml
...te/lm-eval-harness/configs/Qwen2-1.5B-Instruct-FP8W8.yaml
+1
-0
.buildkite/lm-eval-harness/configs/Qwen2-1.5B-Instruct-INT8-compressed-tensors.yaml
.../configs/Qwen2-1.5B-Instruct-INT8-compressed-tensors.yaml
+1
-0
No files found.
.buildkite/lm-eval-harness/configs/DeepSeek-V2-Lite-Chat.yaml
View file @
dcb5624a
# For vllm script, with -t option (tensor parallel size).
# bash ./run-lm-eval-gsm-vllm-baseline.sh -m deepseek-ai/DeepSeek-V2-Lite-Chat -b "auto" -l 1000 -f 5 -t 2
model_name
:
"
deepseek-ai/DeepSeek-V2-Lite-Chat"
tasks
:
...
...
.buildkite/lm-eval-harness/configs/Meta-Llama-3-70B-Instruct-FBGEMM-nonuniform.yaml
View file @
dcb5624a
# For hf script, without -t option (tensor parallel size).
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh -m nm-testing/Meta-Llama-3-70B-Instruct-FBGEMM-nonuniform -b auto -l 1000 -f 5
model_name
:
"
nm-testing/Meta-Llama-3-70B-Instruct-FBGEMM-nonuniform"
tasks
:
...
...
.buildkite/lm-eval-harness/configs/Meta-Llama-3-70B-Instruct.yaml
View file @
dcb5624a
# For hf script, without -t option (tensor parallel size).
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh -m meta-llama/Meta-Llama-3-70B-Instruct -b 32 -l 250 -f 5
model_name
:
"
meta-llama/Meta-Llama-3-70B-Instruct"
tasks
:
...
...
.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-Channelwise-compressed-tensors.yaml
View file @
dcb5624a
# For vllm script, with -t option (tensor parallel size).
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/Meta-Llama-3-8B-Instruct-W8A8-FP8-Channelwise-compressed-tensors -b auto -l 1000 -f 5 -t 1
model_name
:
"
nm-testing/Meta-Llama-3-8B-Instruct-W8A8-FP8-Channelwise-compressed-tensors"
tasks
:
...
...
.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-FBGEMM-nonuniform.yaml
View file @
dcb5624a
# For vllm script, with -t option (tensor parallel size).
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/Meta-Llama-3-8B-Instruct-FBGEMM-nonuniform -b auto -l 1000 -f 5 -t 1
model_name
:
"
nm-testing/Meta-Llama-3-8B-Instruct-FBGEMM-nonuniform"
tasks
:
...
...
.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-FP8-compressed-tensors.yaml
View file @
dcb5624a
# For vllm script, with -t option (tensor parallel size).
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/Meta-Llama-3-8B-FP8-compressed-tensors-test -b 32 -l 1000 -f 5 -t 1
model_name
:
"
nm-testing/Meta-Llama-3-8B-FP8-compressed-tensors-test"
tasks
:
...
...
.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-FP8.yaml
View file @
dcb5624a
# For vllm script, with -t option (tensor parallel size).
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m neuralmagic/Meta-Llama-3-8B-Instruct-FP8 -b 32 -l 250 -f 5 -t 1
model_name
:
"
neuralmagic/Meta-Llama-3-8B-Instruct-FP8"
tasks
:
...
...
.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-INT8-compressed-tensors-asym.yaml
View file @
dcb5624a
# For vllm script, with -t option (tensor parallel size).
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/Meta-Llama-3-8B-Instruct-W8-Channel-A8-Dynamic-Asym-Per-Token-Test -b "auto" -l 250 -f 5 -t 1
model_name
:
"
nm-testing/Meta-Llama-3-8B-Instruct-W8-Channel-A8-Dynamic-Asym-Per-Token-Test"
tasks
:
...
...
.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-INT8-compressed-tensors.yaml
View file @
dcb5624a
# For vllm script, with -t option (tensor parallel size).
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/Meta-Llama-3-8B-Instruct-W8-Channel-A8-Dynamic-Per-Token-Test -b "auto" -l 250 -f 5 -t 1
model_name
:
"
nm-testing/Meta-Llama-3-8B-Instruct-W8-Channel-A8-Dynamic-Per-Token-Test"
tasks
:
...
...
.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct-nonuniform-compressed-tensors.yaml
View file @
dcb5624a
# For vllm script, with -t option (tensor parallel size).
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/Meta-Llama-3-8B-Instruct-nonuniform-test -b auto -l 1000 -f 5 -t 1
model_name
:
"
nm-testing/Meta-Llama-3-8B-Instruct-nonuniform-test"
tasks
:
...
...
.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-Instruct.yaml
View file @
dcb5624a
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh -m meta-llama/Meta-Llama-3-8B-Instruct -b 32 -l 250 -f 5 -t 1
# For hf script, without -t option (tensor parallel size).
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh -m meta-llama/Meta-Llama-3-8B-Instruct -b 32 -l 250 -f 5
model_name
:
"
meta-llama/Meta-Llama-3-8B-Instruct"
tasks
:
-
name
:
"
gsm8k"
...
...
.buildkite/lm-eval-harness/configs/Meta-Llama-3-8B-QQQ.yaml
View file @
dcb5624a
# For vllm script, with -t option (tensor parallel size).
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m HandH1998/QQQ-Llama-3-8b-g128 -b 32 -l 1000 -f 5 -t 1
model_name
:
"
HandH1998/QQQ-Llama-3-8b-g128"
tasks
:
...
...
.buildkite/lm-eval-harness/configs/Meta-Llama-3.2-1B-Instruct-INT8-compressed-tensors.yaml
View file @
dcb5624a
# For vllm script, with -t option (tensor parallel size).
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m neuralmagic/Llama-3.2-1B-Instruct-quantized.w8a8 -b "auto" -l 1000 -f 5 -t 1
model_name
:
"
neuralmagic/Llama-3.2-1B-Instruct-quantized.w8a8"
tasks
:
...
...
.buildkite/lm-eval-harness/configs/Minitron-4B-Base-FP8.yaml
View file @
dcb5624a
# For vllm script, with -t option (tensor parallel size).
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m mgoin/Minitron-4B-Base-FP8 -b auto -l 1000 -f 5 -t 1
model_name
:
"
mgoin/Minitron-4B-Base-FP8"
tasks
:
...
...
.buildkite/lm-eval-harness/configs/Mixtral-8x22B-Instruct-v0.1-FP8-Dynamic.yaml
View file @
dcb5624a
# For vllm script, with -t option (tensor parallel size).
# bash ./run-lm-eval-gsm-vllm-baseline.sh -m neuralmagic/Mixtral-8x22B-Instruct-v0.1-FP8-dynamic -b "auto" -l 250 -f 5 -t 8
model_name
:
"
neuralmagic/Mixtral-8x22B-Instruct-v0.1-FP8-dynamic"
tasks
:
...
...
.buildkite/lm-eval-harness/configs/Mixtral-8x7B-Instruct-v0.1-FP8.yaml
View file @
dcb5624a
# For vllm script, with -t option (tensor parallel size).
# bash ./run-lm-eval-gsm-vllm-baseline.sh -m neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8 -b "auto" -l 250 -f 5 -t 4
model_name
:
"
neuralmagic/Mixtral-8x7B-Instruct-v0.1-FP8"
tasks
:
...
...
.buildkite/lm-eval-harness/configs/Mixtral-8x7B-Instruct-v0.1.yaml
View file @
dcb5624a
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh -m neuralmagic/Mixtral-8x7B-Instruct-v0.1 -b 32 -l 250 -f 5 -t 4
# For hf script, without -t option (tensor parallel size).
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-hf-baseline.sh -m neuralmagic/Mixtral-8x7B-Instruct-v0.1 -b 32 -l 250 -f 5
model_name
:
"
mistralai/Mixtral-8x7B-Instruct-v0.1"
tasks
:
-
name
:
"
gsm8k"
...
...
.buildkite/lm-eval-harness/configs/Qwen1.5-MoE-W4A16-compressed-tensors.yaml
View file @
dcb5624a
# For vllm script, with -t option (tensor parallel size).
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/Qwen1.5-MoE-A2.7B-Chat-quantized.w4a16 -b auto -l 1319 -f 5 -t 1
model_name
:
"
nm-testing/Qwen1.5-MoE-A2.7B-Chat-quantized.w4a16"
tasks
:
-
name
:
"
gsm8k"
metrics
:
-
name
:
"
exact_match,strict-match"
value
:
0.3
1
value
:
0.3
0
-
name
:
"
exact_match,flexible-extract"
value
:
0.4
7
value
:
0.4
65
limit
:
1319
num_fewshot
:
5
.buildkite/lm-eval-harness/configs/Qwen2-1.5B-Instruct-FP8W8.yaml
View file @
dcb5624a
# For vllm script, with -t option (tensor parallel size).
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m nm-testing/Qwen2-1.5B-Instruct-FP8W8 -b auto -l 1000 -f 5 -t 1
model_name
:
"
nm-testing/Qwen2-1.5B-Instruct-FP8W8"
tasks
:
...
...
.buildkite/lm-eval-harness/configs/Qwen2-1.5B-Instruct-INT8-compressed-tensors.yaml
View file @
dcb5624a
# For vllm script, with -t option (tensor parallel size).
# bash .buildkite/lm-eval-harness/run-lm-eval-gsm-vllm-baseline.sh -m neuralmagic/Qwen2-1.5B-Instruct-quantized.w8a8 -b "auto" -l 1000 -f 5 -t 1
model_name
:
"
neuralmagic/Qwen2-1.5B-Instruct-quantized.w8a8"
tasks
:
...
...
Prev
1
2
3
4
5
…
35
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment