Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
dynamo
Commits
4426e937
Unverified
Commit
4426e937
authored
May 28, 2025
by
Neelay Shah
Committed by
GitHub
May 28, 2025
Browse files
chore: remove pa build (#1231)
parent
cd7a301b
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
11 additions
and
60 deletions
+11
-60
container/Dockerfile.sglang
container/Dockerfile.sglang
+3
-28
container/Dockerfile.tensorrt_llm
container/Dockerfile.tensorrt_llm
+5
-3
container/Dockerfile.vllm
container/Dockerfile.vllm
+3
-28
examples/llm/benchmarks/perf.sh
examples/llm/benchmarks/perf.sh
+0
-1
No files found.
container/Dockerfile.sglang
View file @
4426e937
...
@@ -10,8 +10,7 @@ ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
...
@@ -10,8 +10,7 @@ ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
ARG RELEASE_BUILD
ARG RELEASE_BUILD
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
# TODO: Move to published pypi tags
ARG GENAI_PERF_VERSION=0.0.13
ARG GENAI_PERF_TAG="e67e853413a07a778dd78a55e299be7fba9c9c24"
# Define general architecture ARGs for supporting both x86 and aarch64 builds.
# Define general architecture ARGs for supporting both x86 and aarch64 builds.
# ARCH: Used for package suffixes (e.g., amd64, arm64)
# ARCH: Used for package suffixes (e.g., amd64, arm64)
...
@@ -363,39 +362,15 @@ RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/la
...
@@ -363,39 +362,15 @@ RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/la
ENV VLLM_KV_CAPI_PATH=/opt/dynamo/bindings/lib/libdynamo_llm_capi.so
ENV VLLM_KV_CAPI_PATH=/opt/dynamo/bindings/lib/libdynamo_llm_capi.so
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/nvidia/nvda_nixl/lib/x86_64-linux-gnu/
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/nvidia/nvda_nixl/lib/x86_64-linux-gnu/
##########################################
########## Perf Analyzer Image ###########
##########################################
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS perf_analyzer
ARG GENAI_PERF_TAG
WORKDIR /workspace
# Build and install Perf Analyzer for benchmarking
RUN apt-get update -y && apt-get -y install cmake g++ libssl-dev python3 rapidjson-dev zlib1g-dev
RUN git clone https://github.com/triton-inference-server/perf_analyzer.git
RUN git -C perf_analyzer checkout ${GENAI_PERF_TAG}
RUN mkdir perf_analyzer/build
RUN cmake -B perf_analyzer/build -S perf_analyzer -D TRITON_ENABLE_PERF_ANALYZER_OPENAI=ON
RUN cmake --build perf_analyzer/build -- -j8
RUN mkdir bin && \
cp -r perf_analyzer/build/perf_analyzer/src/perf-analyzer-build /workspace/bin/
########################################
########################################
########## Development Image ###########
########## Development Image ###########
########################################
########################################
FROM ci_minimum AS dev
FROM ci_minimum AS dev
ARG GENAI_PERF_TAG
ARG GENAI_PERF_VERSION
COPY --from=perf_analyzer /workspace/bin/perf-analyzer-build/ /perf/bin
COPY --from=perf_analyzer /workspace/perf_analyzer /perf_analyzer
ENV PATH="/perf/bin:${PATH}"
# Install genai-perf for benchmarking
# Install genai-perf for benchmarking
RUN uv pip install "git+https://github.com/triton-inference-server/perf_analyzer.git@${GENAI_PERF_TAG}#subdirectory=genai-perf"
RUN uv pip install genai-perf==$GENAI_PERF_VERSION
RUN uv pip uninstall tritonclient
COPY . /workspace
COPY . /workspace
...
...
container/Dockerfile.tensorrt_llm
View file @
4426e937
...
@@ -16,6 +16,7 @@
...
@@ -16,6 +16,7 @@
ARG BASE_IMAGE="nvcr.io/nvidia/pytorch"
ARG BASE_IMAGE="nvcr.io/nvidia/pytorch"
ARG BASE_IMAGE_TAG="25.04-py3"
ARG BASE_IMAGE_TAG="25.04-py3"
ARG RELEASE_BUILD
ARG RELEASE_BUILD
ARG GENAI_PERF_VERSION=0.0.13
# Define general architecture ARGs for supporting both x86 and aarch64 builds.
# Define general architecture ARGs for supporting both x86 and aarch64 builds.
# ARCH: Used for package suffixes (e.g., amd64, arm64)
# ARCH: Used for package suffixes (e.g., amd64, arm64)
...
@@ -162,10 +163,11 @@ RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \
...
@@ -162,10 +163,11 @@ RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \
"${TENSORRTLLM_PIP_WHEEL}" ; \
"${TENSORRTLLM_PIP_WHEEL}" ; \
fi
fi
ARG GENAI_PERF_VERSION
# Install genai-perf for benchmarking
# Install genai-perf for benchmarking
# TODO: Move to published pypi tags
ARG GENAI_PERF_TAG="e67e853413a07a778dd78a55e299be7fba9c9c24"
RUN uv pip install genai-perf==$GENAI_PERF_VERSION
RUN pip install "git+https://github.com/triton-inference-server/perf_analyzer.git@${GENAI_PERF_TAG}#subdirectory=genai-perf"
# Install test dependencies
# Install test dependencies
RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \
RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \
...
...
container/Dockerfile.vllm
View file @
4426e937
...
@@ -10,8 +10,7 @@ ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
...
@@ -10,8 +10,7 @@ ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
ARG RELEASE_BUILD
ARG RELEASE_BUILD
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
# TODO: Move to published pypi tags
ARG GENAI_PERF_VERSION=0.0.13
ARG GENAI_PERF_TAG="e67e853413a07a778dd78a55e299be7fba9c9c24"
# Define general architecture ARGs for supporting both x86 and aarch64 builds.
# Define general architecture ARGs for supporting both x86 and aarch64 builds.
# ARCH: Used for package suffixes (e.g., amd64, arm64)
# ARCH: Used for package suffixes (e.g., amd64, arm64)
...
@@ -436,39 +435,15 @@ RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/la
...
@@ -436,39 +435,15 @@ RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/la
ENV VLLM_KV_CAPI_PATH=/opt/dynamo/bindings/lib/libdynamo_llm_capi.so
ENV VLLM_KV_CAPI_PATH=/opt/dynamo/bindings/lib/libdynamo_llm_capi.so
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/nvidia/nvda_nixl/lib/x86_64-linux-gnu/
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/nvidia/nvda_nixl/lib/x86_64-linux-gnu/
##########################################
########## Perf Analyzer Image ###########
##########################################
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS perf_analyzer
ARG GENAI_PERF_TAG
WORKDIR /workspace
# Build and install Perf Analyzer for benchmarking
RUN apt-get update -y && apt-get -y install cmake g++ libssl-dev python3 rapidjson-dev zlib1g-dev
RUN git clone https://github.com/triton-inference-server/perf_analyzer.git
RUN git -C perf_analyzer checkout ${GENAI_PERF_TAG}
RUN mkdir perf_analyzer/build
RUN cmake -B perf_analyzer/build -S perf_analyzer -D TRITON_ENABLE_PERF_ANALYZER_OPENAI=ON
RUN cmake --build perf_analyzer/build -- -j8
RUN mkdir bin && \
cp -r perf_analyzer/build/perf_analyzer/src/perf-analyzer-build /workspace/bin/
########################################
########################################
########## Development Image ###########
########## Development Image ###########
########################################
########################################
FROM ci_minimum AS dev
FROM ci_minimum AS dev
ARG GENAI_PERF_TAG
ARG GENAI_PERF_VERSION
COPY --from=perf_analyzer /workspace/bin/perf-analyzer-build/ /perf/bin
COPY --from=perf_analyzer /workspace/perf_analyzer /perf_analyzer
ENV PATH="/perf/bin:${PATH}"
# Install genai-perf for benchmarking
# Install genai-perf for benchmarking
RUN uv pip install "git+https://github.com/triton-inference-server/perf_analyzer.git@${GENAI_PERF_TAG}#subdirectory=genai-perf"
RUN uv pip install genai-perf==$GENAI_PERF_VERSION
RUN uv pip uninstall tritonclient
COPY . /workspace
COPY . /workspace
...
...
examples/llm/benchmarks/perf.sh
View file @
4426e937
...
@@ -30,7 +30,6 @@ for concurrency in 1 2 4 8 16 32 64 128 256; do
...
@@ -30,7 +30,6 @@ for concurrency in 1 2 4 8 16 32 64 128 256; do
genai-perf profile
\
genai-perf profile
\
--model
${
model
}
\
--model
${
model
}
\
--tokenizer
${
model
}
\
--tokenizer
${
model
}
\
--service-kind
openai
\
--endpoint-type
chat
\
--endpoint-type
chat
\
--endpoint
/v1/chat/completions
\
--endpoint
/v1/chat/completions
\
--streaming
\
--streaming
\
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment