[Docker] Adding number of nvcc_threads during build as envar (#1893)

24f60a54 · AguirreNicolas · GitHub · 42c02f58 · 24f60a54 · 24f60a54
Unverified Commit 24f60a54 authored Dec 07, 2023 by AguirreNicolas Committed by GitHub Dec 07, 2023
Hide whitespace changes
Inline Side-by-side

Showing with 6 additions and 2 deletions

Dockerfile Dockerfile +3 -0

docs/source/serving/deploying_with_docker.rst docs/source/serving/deploying_with_docker.rst +1 -1

setup.py setup.py +2 -1

No files found.
--- a/Dockerfile
+++ b/Dockerfile
@@ -32,6 +32,9 @@ COPY vllm/__init__.py vllm/__init__.py
 # max jobs used by Ninja to build extensions
 ENV MAX_JOBS=$max_jobs
+# number of threads used by nvcc
+ARG nvcc_threads=8
+ENV NVCC_THREADS=$nvcc_threads
 RUN python3 setup.py build_ext --inplace
 # image to run unit testing suite

--- a/docs/source/serving/deploying_with_docker.rst
+++ b/docs/source/serving/deploying_with_docker.rst
@@ -29,7 +29,7 @@ You can build and run vLLM from source via the provided dockerfile. To build vLL
 .. code-block:: console
-    $ DOCKER_BUILDKIT=1 docker build . --target vllm-openai --tag vllm/vllm-openai --build-arg max_jobs=8
+    $ DOCKER_BUILDKIT=1 docker build . --target vllm-openai --tag vllm/vllm-openai # optionally specifies: --build-arg max_jobs=8 --build-arg nvcc_threads=2
 To run vLLM:

--- a/setup.py
+++ b/setup.py
@@ -138,7 +138,8 @@ for capability in compute_capabilities:
 # Use NVCC threads to parallelize the build.
 if nvcc_cuda_version >= Version("11.2"):
-    num_threads = min(os.cpu_count(), 8)
+    nvcc_threads = int(os.getenv("NVCC_THREADS"), 8)
+    num_threads = min(os.cpu_count(), nvcc_threads)
    NVCC_FLAGS += ["--threads", str(num_threads)]
 ext_modules = []