Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
55d6361b
Unverified
Commit
55d6361b
authored
Jun 14, 2024
by
Allen.Dou
Committed by
GitHub
Jun 13, 2024
Browse files
[Misc] Fix arg names in quantizer script (#5507)
parent
cd9c0d65
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
8 additions
and
8 deletions
+8
-8
examples/fp8/quantizer/quantize.py
examples/fp8/quantizer/quantize.py
+8
-8
No files found.
examples/fp8/quantizer/quantize.py
View file @
55d6361b
...
...
@@ -332,7 +332,7 @@ def main(args):
if
__name__
==
"__main__"
:
parser
=
argparse
.
ArgumentParser
(
description
=
__doc__
)
parser
.
add_argument
(
"--model
_
dir"
,
parser
.
add_argument
(
"--model
-
dir"
,
help
=
"Specify where the HuggingFace model is"
,
required
=
True
)
parser
.
add_argument
(
"--device"
,
default
=
"cuda"
)
...
...
@@ -346,19 +346,19 @@ if __name__ == "__main__":
"full_prec"
],
)
parser
.
add_argument
(
"--batch
_
size"
,
parser
.
add_argument
(
"--batch
-
size"
,
help
=
"Batch size for calibration."
,
type
=
int
,
default
=
1
)
parser
.
add_argument
(
"--calib
_
size"
,
parser
.
add_argument
(
"--calib
-
size"
,
help
=
"Number of samples for calibration."
,
type
=
int
,
default
=
512
)
parser
.
add_argument
(
"--output
_
dir"
,
default
=
"exported_model"
)
parser
.
add_argument
(
"--tp
_
size"
,
type
=
int
,
default
=
1
)
parser
.
add_argument
(
"--pp
_
size"
,
type
=
int
,
default
=
1
)
parser
.
add_argument
(
"--awq
_
block
_
size"
,
type
=
int
,
default
=
128
)
parser
.
add_argument
(
"--kv
_
cache
_
dtype"
,
parser
.
add_argument
(
"--output
-
dir"
,
default
=
"exported_model"
)
parser
.
add_argument
(
"--tp
-
size"
,
type
=
int
,
default
=
1
)
parser
.
add_argument
(
"--pp
-
size"
,
type
=
int
,
default
=
1
)
parser
.
add_argument
(
"--awq
-
block
-
size"
,
type
=
int
,
default
=
128
)
parser
.
add_argument
(
"--kv
-
cache
-
dtype"
,
help
=
"KV Cache dtype."
,
default
=
None
,
choices
=
[
"int8"
,
"fp8"
,
None
])
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment