Unverified Commit 715c2d85 authored by Alpay Ariyak's avatar Alpay Ariyak Committed by GitHub
Browse files

[Frontend] [Core] Tensorizer: support dynamic `num_readers`, update version (#4467)

parent a4941404
......@@ -14,7 +14,7 @@ types-setuptools
# testing
pytest
tensorizer==2.9.0a0
tensorizer==2.9.0
pytest-forked
pytest-asyncio
pytest-rerunfailures
......
......@@ -408,7 +408,7 @@ setup(
install_requires=get_requirements(),
ext_modules=ext_modules,
extras_require={
"tensorizer": ["tensorizer==2.9.0a1"],
"tensorizer": ["tensorizer==2.9.0"],
},
cmdclass={"build_ext": cmake_build_ext} if not _is_neuron() else {},
package_data=package_data,
......
......@@ -44,7 +44,7 @@ class TensorizerConfig:
str, bytes, os.PathLike, int]
vllm_tensorized: bool
verify_hash: Optional[bool] = False
num_readers: Optional[int] = 1
num_readers: Optional[int] = None
encryption_keyfile: Optional[str] = None
s3_access_key_id: Optional[str] = None
s3_secret_access_key: Optional[str] = None
......@@ -104,7 +104,7 @@ class TensorizerArgs:
str, bytes, os.PathLike, int]
vllm_tensorized: bool
verify_hash: Optional[bool] = False
num_readers: Optional[int] = 1
num_readers: Optional[int] = None
encryption_keyfile: Optional[str] = None
s3_access_key_id: Optional[str] = None
s3_secret_access_key: Optional[str] = None
......@@ -125,8 +125,9 @@ class TensorizerArgs:
the hashes stored in the metadata. A `HashMismatchError` will be
raised if any of the hashes do not match.
num_readers: Controls how many threads are allowed to read concurrently
from the source file. Default is 1. This greatly increases
performance.
from the source file. Default is `None`, which will dynamically set
the number of readers based on the number of available
resources and model size. This greatly increases performance.
encryption_keyfile: File path to a binary file containing a
binary key to use for decryption. `None` (the default) means
no decryption. See the example script in
......@@ -199,10 +200,12 @@ class TensorizerArgs:
"use for decryption. Can be a file path or S3 network URI.")
group.add_argument(
"--num-readers",
default=1,
default=None,
type=int,
help="Controls how many threads are allowed to read concurrently "
"from the source file.")
"from the source file. Default is `None`, which will dynamically "
"set the number of readers based on the available resources "
"and model size. This greatly increases performance.")
group.add_argument(
"--s3-access-key-id",
default=None,
......@@ -337,7 +340,7 @@ class TensorizerAgent:
per_second = convert_bytes(deserializer.total_tensor_bytes / duration)
after_mem = get_mem_usage()
deserializer.close()
logger.info("Deserialized %s in %0.2fs, %f/s", total_bytes_str,
logger.info("Deserialized %s in %0.2fs, %s/s", total_bytes_str,
end - start, per_second)
logger.info("Memory usage before: %s", before_mem)
logger.info("Memory usage after: %s", after_mem)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment