Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
e0dd4d35
Unverified
Commit
e0dd4d35
authored
Apr 04, 2024
by
Cade Daniel
Committed by
GitHub
Apr 04, 2024
Browse files
[Misc] Fix linter issues in examples/fp8/quantizer/quantize.py (#3864)
parent
e5043a3e
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
14 additions
and
16 deletions
+14
-16
examples/fp8/quantizer/quantize.py
examples/fp8/quantizer/quantize.py
+14
-16
No files found.
examples/fp8/quantizer/quantize.py
View file @
e0dd4d35
# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# noqa: E501
# SPDX-License-Identifier: Apache-2.0
# SPDX-License-Identifier: Apache-2.0
#
#
# Licensed under the Apache License, Version 2.0 (the "License");
# Licensed under the Apache License, Version 2.0 (the "License");
...
@@ -131,7 +131,8 @@ def get_tokenizer(ckpt_path, max_seq_len=MAX_SEQ_LEN, model_type=None):
...
@@ -131,7 +131,8 @@ def get_tokenizer(ckpt_path, max_seq_len=MAX_SEQ_LEN, model_type=None):
tokenizer
.
pad_token
=
tokenizer
.
eos_token
tokenizer
.
pad_token
=
tokenizer
.
eos_token
if
tokenizer
.
pad_token
is
None
:
if
tokenizer
.
pad_token
is
None
:
tokenizer
.
pad_token
=
tokenizer
.
eos_token
tokenizer
.
pad_token
=
tokenizer
.
eos_token
assert
tokenizer
.
pad_token
is
not
None
,
f
"Pad token for
{
model_type
}
cannot be set!"
assert
(
tokenizer
.
pad_token
is
not
None
),
f
"Pad token for
{
model_type
}
cannot be set!"
return
tokenizer
return
tokenizer
...
@@ -158,9 +159,9 @@ def get_model(ckpt_path, dtype="fp16", device="cuda"):
...
@@ -158,9 +159,9 @@ def get_model(ckpt_path, dtype="fp16", device="cuda"):
model_dtype
=
next
(
model
.
parameters
()).
dtype
model_dtype
=
next
(
model
.
parameters
()).
dtype
if
dtype
!=
model_dtype
:
if
dtype
!=
model_dtype
:
print
(
print
(
"[TensorRT-LLM][WARNING] The manually set model data type is "
f
"[TensorRT-LLM][WARNING] The manually set model data type is
{
dtype
}
,
"
f
"
{
dtype
}
, but the data type of the HuggingFace model is
"
f
"
but the data type of the HuggingFace model is
{
model_dtype
}
."
)
f
"
{
model_dtype
}
."
)
return
model
return
model
...
@@ -244,15 +245,13 @@ def main(args):
...
@@ -244,15 +245,13 @@ def main(args):
else
:
else
:
if
"awq"
in
args
.
qformat
:
if
"awq"
in
args
.
qformat
:
if
args
.
calib_size
>
32
:
if
args
.
calib_size
>
32
:
print
(
print
(
"AWQ calibration could take longer with calib_size = "
f
"AWQ calibration could take longer with calib_size =
{
args
.
calib_size
}
, Using"
f
"
{
args
.
calib_size
}
, Using calib_size=32 instead"
)
" calib_size=32 instead"
)
args
.
calib_size
=
32
args
.
calib_size
=
32
print
(
print
(
"
\n
AWQ calibration could take longer than other calibration "
"
\n
AWQ calibration could take longer than other calibration methods. Please"
"methods. Please increase the batch size to speed up the "
" increase the batch size to speed up the calibration process. Batch size can be"
"calibration process. Batch size can be set by adding the "
" set by adding the argument --batch_size <batch_size> to the command line.
\n
"
"argument --batch_size <batch_size> to the command line.
\n
"
)
)
calib_dataloader
=
get_calib_dataloader
(
calib_dataloader
=
get_calib_dataloader
(
tokenizer
=
tokenizer
,
tokenizer
=
tokenizer
,
...
@@ -287,9 +286,8 @@ def main(args):
...
@@ -287,9 +286,8 @@ def main(args):
with
torch
.
inference_mode
():
with
torch
.
inference_mode
():
if
model_type
is
None
:
if
model_type
is
None
:
print
(
print
(
f
"Unknown model type
{
type
(
model
).
__name__
}
. Continue "
f
"Unknown model type
{
type
(
model
).
__name__
}
. Continue exporting..."
"exporting..."
)
)
model_type
=
f
"unknown:
{
type
(
model
).
__name__
}
"
model_type
=
f
"unknown:
{
type
(
model
).
__name__
}
"
export_path
=
args
.
output_dir
export_path
=
args
.
output_dir
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment