[Frontend] [Core] Tensorizer: support dynamic `num_readers`, update version (#4467)

715c2d85 · Alpay Ariyak · GitHub · a4941404 · 715c2d85 · 715c2d85
Unverified Commit 715c2d85 authored Apr 30, 2024 by Alpay Ariyak Committed by GitHub Apr 30, 2024
Hide whitespace changes
Inline Side-by-side

Showing with 12 additions and 9 deletions

requirements-dev.txt requirements-dev.txt +1 -1

setup.py setup.py +1 -1

vllm/model_executor/model_loader/tensorizer.py vllm/model_executor/model_loader/tensorizer.py +10 -7

No files found.
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -14,7 +14,7 @@ types-setuptools
 # testing
 pytest
-tensorizer==2.9.0a0
+tensorizer==2.9.0
 pytest-forked
 pytest-asyncio
 pytest-rerunfailures

--- a/setup.py
+++ b/setup.py
@@ -408,7 +408,7 @@ setup(
    install_requires=get_requirements(),
    ext_modules=ext_modules,
    extras_require={
-        "tensorizer": ["tensorizer==2.9.0a1"],
+        "tensorizer": ["tensorizer==2.9.0"],
    },
    cmdclass={"build_ext": cmake_build_ext} if not _is_neuron() else {},
    package_data=package_data,

--- a/vllm/model_executor/model_loader/tensorizer.py
+++ b/vllm/model_executor/model_loader/tensorizer.py
@@ -44,7 +44,7 @@ class TensorizerConfig:
                          str, bytes, os.PathLike, int]
    vllm_tensorized: bool
    verify_hash: Optional[bool] = False
-    num_readers: Optional[int] = 1
+    num_readers: Optional[int] = None
    encryption_keyfile: Optional[str] = None
    s3_access_key_id: Optional[str] = None
    s3_secret_access_key: Optional[str] = None
@@ -104,7 +104,7 @@ class TensorizerArgs:
                          str, bytes, os.PathLike, int]
    vllm_tensorized: bool
    verify_hash: Optional[bool] = False
-    num_readers: Optional[int] = 1
+    num_readers: Optional[int] = None
    encryption_keyfile: Optional[str] = None
    s3_access_key_id: Optional[str] = None
    s3_secret_access_key: Optional[str] = None
@@ -125,8 +125,9 @@ class TensorizerArgs:
          the hashes stored in the metadata. A `HashMismatchError` will be 
          raised if any of the hashes do not match.
      num_readers: Controls how many threads are allowed to read concurrently
-          from the source file. Default is 1. This greatly increases
+          from the source file. Default is `None`, which will dynamically set
-          performance.
+          the number of readers based on the number of available 
+          resources and model size. This greatly increases performance.
      encryption_keyfile: File path to a binary file containing a  
          binary key to use for decryption. `None` (the default) means 
          no decryption. See the example script in 
@@ -199,10 +200,12 @@ class TensorizerArgs:
            "use for decryption. Can be a file path or S3 network URI.")
        group.add_argument(
            "--num-readers",
-            default=1,
+            default=None,
            type=int,
            help="Controls how many threads are allowed to read concurrently "
-            "from the source file.")
+            "from the source file. Default is `None`, which will dynamically "
+            "set the number of readers based on the available resources "
+            "and model size. This greatly increases performance.")
        group.add_argument(
            "--s3-access-key-id",
            default=None,
@@ -337,7 +340,7 @@ class TensorizerAgent:
        per_second = convert_bytes(deserializer.total_tensor_bytes / duration)
        after_mem = get_mem_usage()
        deserializer.close()
-        logger.info("Deserialized %s in %0.2fs, %f/s", total_bytes_str,
+        logger.info("Deserialized %s in %0.2fs, %s/s", total_bytes_str,
                    end - start, per_second)
        logger.info("Memory usage before: %s", before_mem)
        logger.info("Memory usage after: %s", after_mem)