Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
715c2d85
Unverified
Commit
715c2d85
authored
Apr 30, 2024
by
Alpay Ariyak
Committed by
GitHub
Apr 30, 2024
Browse files
[Frontend] [Core] Tensorizer: support dynamic `num_readers`, update version (#4467)
parent
a4941404
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
12 additions
and
9 deletions
+12
-9
requirements-dev.txt
requirements-dev.txt
+1
-1
setup.py
setup.py
+1
-1
vllm/model_executor/model_loader/tensorizer.py
vllm/model_executor/model_loader/tensorizer.py
+10
-7
No files found.
requirements-dev.txt
View file @
715c2d85
...
@@ -14,7 +14,7 @@ types-setuptools
...
@@ -14,7 +14,7 @@ types-setuptools
# testing
# testing
pytest
pytest
tensorizer==2.9.0
a0
tensorizer==2.9.0
pytest-forked
pytest-forked
pytest-asyncio
pytest-asyncio
pytest-rerunfailures
pytest-rerunfailures
...
...
setup.py
View file @
715c2d85
...
@@ -408,7 +408,7 @@ setup(
...
@@ -408,7 +408,7 @@ setup(
install_requires
=
get_requirements
(),
install_requires
=
get_requirements
(),
ext_modules
=
ext_modules
,
ext_modules
=
ext_modules
,
extras_require
=
{
extras_require
=
{
"tensorizer"
:
[
"tensorizer==2.9.0
a1
"
],
"tensorizer"
:
[
"tensorizer==2.9.0"
],
},
},
cmdclass
=
{
"build_ext"
:
cmake_build_ext
}
if
not
_is_neuron
()
else
{},
cmdclass
=
{
"build_ext"
:
cmake_build_ext
}
if
not
_is_neuron
()
else
{},
package_data
=
package_data
,
package_data
=
package_data
,
...
...
vllm/model_executor/model_loader/tensorizer.py
View file @
715c2d85
...
@@ -44,7 +44,7 @@ class TensorizerConfig:
...
@@ -44,7 +44,7 @@ class TensorizerConfig:
str
,
bytes
,
os
.
PathLike
,
int
]
str
,
bytes
,
os
.
PathLike
,
int
]
vllm_tensorized
:
bool
vllm_tensorized
:
bool
verify_hash
:
Optional
[
bool
]
=
False
verify_hash
:
Optional
[
bool
]
=
False
num_readers
:
Optional
[
int
]
=
1
num_readers
:
Optional
[
int
]
=
None
encryption_keyfile
:
Optional
[
str
]
=
None
encryption_keyfile
:
Optional
[
str
]
=
None
s3_access_key_id
:
Optional
[
str
]
=
None
s3_access_key_id
:
Optional
[
str
]
=
None
s3_secret_access_key
:
Optional
[
str
]
=
None
s3_secret_access_key
:
Optional
[
str
]
=
None
...
@@ -104,7 +104,7 @@ class TensorizerArgs:
...
@@ -104,7 +104,7 @@ class TensorizerArgs:
str
,
bytes
,
os
.
PathLike
,
int
]
str
,
bytes
,
os
.
PathLike
,
int
]
vllm_tensorized
:
bool
vllm_tensorized
:
bool
verify_hash
:
Optional
[
bool
]
=
False
verify_hash
:
Optional
[
bool
]
=
False
num_readers
:
Optional
[
int
]
=
1
num_readers
:
Optional
[
int
]
=
None
encryption_keyfile
:
Optional
[
str
]
=
None
encryption_keyfile
:
Optional
[
str
]
=
None
s3_access_key_id
:
Optional
[
str
]
=
None
s3_access_key_id
:
Optional
[
str
]
=
None
s3_secret_access_key
:
Optional
[
str
]
=
None
s3_secret_access_key
:
Optional
[
str
]
=
None
...
@@ -125,8 +125,9 @@ class TensorizerArgs:
...
@@ -125,8 +125,9 @@ class TensorizerArgs:
the hashes stored in the metadata. A `HashMismatchError` will be
the hashes stored in the metadata. A `HashMismatchError` will be
raised if any of the hashes do not match.
raised if any of the hashes do not match.
num_readers: Controls how many threads are allowed to read concurrently
num_readers: Controls how many threads are allowed to read concurrently
from the source file. Default is 1. This greatly increases
from the source file. Default is `None`, which will dynamically set
performance.
the number of readers based on the number of available
resources and model size. This greatly increases performance.
encryption_keyfile: File path to a binary file containing a
encryption_keyfile: File path to a binary file containing a
binary key to use for decryption. `None` (the default) means
binary key to use for decryption. `None` (the default) means
no decryption. See the example script in
no decryption. See the example script in
...
@@ -199,10 +200,12 @@ class TensorizerArgs:
...
@@ -199,10 +200,12 @@ class TensorizerArgs:
"use for decryption. Can be a file path or S3 network URI."
)
"use for decryption. Can be a file path or S3 network URI."
)
group
.
add_argument
(
group
.
add_argument
(
"--num-readers"
,
"--num-readers"
,
default
=
1
,
default
=
None
,
type
=
int
,
type
=
int
,
help
=
"Controls how many threads are allowed to read concurrently "
help
=
"Controls how many threads are allowed to read concurrently "
"from the source file."
)
"from the source file. Default is `None`, which will dynamically "
"set the number of readers based on the available resources "
"and model size. This greatly increases performance."
)
group
.
add_argument
(
group
.
add_argument
(
"--s3-access-key-id"
,
"--s3-access-key-id"
,
default
=
None
,
default
=
None
,
...
@@ -337,7 +340,7 @@ class TensorizerAgent:
...
@@ -337,7 +340,7 @@ class TensorizerAgent:
per_second
=
convert_bytes
(
deserializer
.
total_tensor_bytes
/
duration
)
per_second
=
convert_bytes
(
deserializer
.
total_tensor_bytes
/
duration
)
after_mem
=
get_mem_usage
()
after_mem
=
get_mem_usage
()
deserializer
.
close
()
deserializer
.
close
()
logger
.
info
(
"Deserialized %s in %0.2fs, %
f
/s"
,
total_bytes_str
,
logger
.
info
(
"Deserialized %s in %0.2fs, %
s
/s"
,
total_bytes_str
,
end
-
start
,
per_second
)
end
-
start
,
per_second
)
logger
.
info
(
"Memory usage before: %s"
,
before_mem
)
logger
.
info
(
"Memory usage before: %s"
,
before_mem
)
logger
.
info
(
"Memory usage after: %s"
,
after_mem
)
logger
.
info
(
"Memory usage after: %s"
,
after_mem
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment