Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
d74674bb
"vscode:/vscode.git/clone" did not exist on "e0919f331d12dc5dbdefd0775bb6f94dd2fab4e2"
Unverified
Commit
d74674bb
authored
Jun 15, 2024
by
Allen.Dou
Committed by
GitHub
Jun 14, 2024
Browse files
[Misc] Fix arg names (#5524)
parent
703475f6
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
6 additions
and
6 deletions
+6
-6
benchmarks/kernels/benchmark_paged_attention.py
benchmarks/kernels/benchmark_paged_attention.py
+1
-1
examples/aqlm_example.py
examples/aqlm_example.py
+1
-1
examples/fp8/extract_scales.py
examples/fp8/extract_scales.py
+4
-4
No files found.
benchmarks/kernels/benchmark_paged_attention.py
View file @
d74674bb
...
@@ -165,7 +165,7 @@ if __name__ == '__main__':
...
@@ -165,7 +165,7 @@ if __name__ == '__main__':
choices
=
[
"v1"
,
"v2"
],
choices
=
[
"v1"
,
"v2"
],
default
=
"v2"
)
default
=
"v2"
)
parser
.
add_argument
(
"--batch-size"
,
type
=
int
,
default
=
8
)
parser
.
add_argument
(
"--batch-size"
,
type
=
int
,
default
=
8
)
parser
.
add_argument
(
"--seq
_
len"
,
type
=
int
,
default
=
4096
)
parser
.
add_argument
(
"--seq
-
len"
,
type
=
int
,
default
=
4096
)
parser
.
add_argument
(
"--num-query-heads"
,
type
=
int
,
default
=
64
)
parser
.
add_argument
(
"--num-query-heads"
,
type
=
int
,
default
=
64
)
parser
.
add_argument
(
"--num-kv-heads"
,
type
=
int
,
default
=
8
)
parser
.
add_argument
(
"--num-kv-heads"
,
type
=
int
,
default
=
8
)
parser
.
add_argument
(
"--head-size"
,
parser
.
add_argument
(
"--head-size"
,
...
...
examples/aqlm_example.py
View file @
d74674bb
...
@@ -17,7 +17,7 @@ def main():
...
@@ -17,7 +17,7 @@ def main():
type
=
int
,
type
=
int
,
default
=
0
,
default
=
0
,
help
=
'known good models by index, [0-4]'
)
help
=
'known good models by index, [0-4]'
)
parser
.
add_argument
(
'--tensor
_
parallel
_
size'
,
parser
.
add_argument
(
'--tensor
-
parallel
-
size'
,
'-t'
,
'-t'
,
type
=
int
,
type
=
int
,
default
=
1
,
default
=
1
,
...
...
examples/fp8/extract_scales.py
View file @
d74674bb
...
@@ -327,7 +327,7 @@ if __name__ == "__main__":
...
@@ -327,7 +327,7 @@ if __name__ == "__main__":
"--quantization-param-path <filename>). This is only used "
"--quantization-param-path <filename>). This is only used "
"if the KV cache dtype is FP8 and on ROCm (AMD GPU)."
)
"if the KV cache dtype is FP8 and on ROCm (AMD GPU)."
)
parser
.
add_argument
(
parser
.
add_argument
(
"--quantized
_
model"
,
"--quantized
-
model"
,
help
=
"Specify the directory containing a single quantized HF model. "
help
=
"Specify the directory containing a single quantized HF model. "
"It is expected that the quantization format is FP8_E4M3, for use "
"It is expected that the quantization format is FP8_E4M3, for use "
"on ROCm (AMD GPU)."
,
"on ROCm (AMD GPU)."
,
...
@@ -339,18 +339,18 @@ if __name__ == "__main__":
...
@@ -339,18 +339,18 @@ if __name__ == "__main__":
choices
=
[
"auto"
,
"safetensors"
,
"npz"
,
"pt"
],
choices
=
[
"auto"
,
"safetensors"
,
"npz"
,
"pt"
],
default
=
"auto"
)
default
=
"auto"
)
parser
.
add_argument
(
parser
.
add_argument
(
"--output
_
dir"
,
"--output
-
dir"
,
help
=
"Optionally specify the output directory. By default the "
help
=
"Optionally specify the output directory. By default the "
"KV cache scaling factors will be saved in the model directory, "
"KV cache scaling factors will be saved in the model directory, "
"however you can override this behavior here."
,
"however you can override this behavior here."
,
default
=
None
)
default
=
None
)
parser
.
add_argument
(
parser
.
add_argument
(
"--output
_
name"
,
"--output
-
name"
,
help
=
"Optionally specify the output filename."
,
help
=
"Optionally specify the output filename."
,
# TODO: Change this once additional scaling factors are enabled
# TODO: Change this once additional scaling factors are enabled
default
=
"kv_cache_scales.json"
)
default
=
"kv_cache_scales.json"
)
parser
.
add_argument
(
parser
.
add_argument
(
"--tp
_
size"
,
"--tp
-
size"
,
help
=
"Optionally specify the tensor-parallel (TP) size that the "
help
=
"Optionally specify the tensor-parallel (TP) size that the "
"quantized model should correspond to. If specified, during KV "
"quantized model should correspond to. If specified, during KV "
"cache scaling factor extraction the observed TP size will be "
"cache scaling factor extraction the observed TP size will be "
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment