chore: remove pa build (#1231)

4426e937 · Neelay Shah · GitHub · cd7a301b · 4426e937 · 4426e937
Unverified Commit 4426e937 authored May 28, 2025 by Neelay Shah Committed by GitHub May 28, 2025
4 changed files
--- a/container/Dockerfile.sglang
+++ b/container/Dockerfile.sglang
@@ -10,8 +10,7 @@ ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
 ARG RELEASE_BUILD
 ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
 ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
-# TODO: Move to published pypi tags
+ARG GENAI_PERF_VERSION=0.0.13
-ARG GENAI_PERF_TAG="e67e853413a07a778dd78a55e299be7fba9c9c24"
 # Define general architecture ARGs for supporting both x86 and aarch64 builds.
 #   ARCH: Used for package suffixes (e.g., amd64, arm64)
@@ -363,39 +362,15 @@ RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/la
 ENV VLLM_KV_CAPI_PATH=/opt/dynamo/bindings/lib/libdynamo_llm_capi.so
 ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/nvidia/nvda_nixl/lib/x86_64-linux-gnu/
-##########################################
-########## Perf Analyzer Image ###########
-##########################################
-FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS perf_analyzer
-ARG GENAI_PERF_TAG
-WORKDIR /workspace
-# Build and install Perf Analyzer for benchmarking
-RUN apt-get update -y && apt-get -y install cmake g++ libssl-dev python3 rapidjson-dev zlib1g-dev
-RUN git clone https://github.com/triton-inference-server/perf_analyzer.git
-RUN git -C perf_analyzer checkout ${GENAI_PERF_TAG}
-RUN mkdir perf_analyzer/build
-RUN cmake -B perf_analyzer/build -S perf_analyzer -D TRITON_ENABLE_PERF_ANALYZER_OPENAI=ON
-RUN cmake --build perf_analyzer/build -- -j8
-RUN mkdir bin &&  \
-    cp -r perf_analyzer/build/perf_analyzer/src/perf-analyzer-build /workspace/bin/
 ########################################
 ########## Development Image ###########
 ########################################
 FROM ci_minimum AS dev
-ARG GENAI_PERF_TAG
+ARG GENAI_PERF_VERSION
-COPY --from=perf_analyzer /workspace/bin/perf-analyzer-build/ /perf/bin
-COPY --from=perf_analyzer /workspace/perf_analyzer /perf_analyzer
-ENV PATH="/perf/bin:${PATH}"
 # Install genai-perf for benchmarking
-RUN uv pip install "git+https://github.com/triton-inference-server/perf_analyzer.git@${GENAI_PERF_TAG}#subdirectory=genai-perf"
+RUN uv pip install genai-perf==$GENAI_PERF_VERSION
-RUN uv pip uninstall tritonclient
 COPY . /workspace

--- a/container/Dockerfile.tensorrt_llm
+++ b/container/Dockerfile.tensorrt_llm
@@ -16,6 +16,7 @@
 ARG BASE_IMAGE="nvcr.io/nvidia/pytorch"
 ARG BASE_IMAGE_TAG="25.04-py3"
 ARG RELEASE_BUILD
+ARG GENAI_PERF_VERSION=0.0.13
 # Define general architecture ARGs for supporting both x86 and aarch64 builds.
 #   ARCH: Used for package suffixes (e.g., amd64, arm64)
@@ -162,10 +163,11 @@ RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \
         "${TENSORRTLLM_PIP_WHEEL}" ; \
    fi
+ARG GENAI_PERF_VERSION
 # Install genai-perf for benchmarking
-# TODO: Move to published pypi tags
-ARG GENAI_PERF_TAG="e67e853413a07a778dd78a55e299be7fba9c9c24"
+RUN uv pip install genai-perf==$GENAI_PERF_VERSION
-RUN pip install "git+https://github.com/triton-inference-server/perf_analyzer.git@${GENAI_PERF_TAG}#subdirectory=genai-perf"
 # Install test dependencies
 RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \

--- a/container/Dockerfile.vllm
+++ b/container/Dockerfile.vllm
@@ -10,8 +10,7 @@ ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
 ARG RELEASE_BUILD
 ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
 ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
-# TODO: Move to published pypi tags
+ARG GENAI_PERF_VERSION=0.0.13
-ARG GENAI_PERF_TAG="e67e853413a07a778dd78a55e299be7fba9c9c24"
 # Define general architecture ARGs for supporting both x86 and aarch64 builds.
 #   ARCH: Used for package suffixes (e.g., amd64, arm64)
@@ -436,39 +435,15 @@ RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/la
 ENV VLLM_KV_CAPI_PATH=/opt/dynamo/bindings/lib/libdynamo_llm_capi.so
 ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/nvidia/nvda_nixl/lib/x86_64-linux-gnu/
-##########################################
-########## Perf Analyzer Image ###########
-##########################################
-FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS perf_analyzer
-ARG GENAI_PERF_TAG
-WORKDIR /workspace
-# Build and install Perf Analyzer for benchmarking
-RUN apt-get update -y && apt-get -y install cmake g++ libssl-dev python3 rapidjson-dev zlib1g-dev
-RUN git clone https://github.com/triton-inference-server/perf_analyzer.git
-RUN git -C perf_analyzer checkout ${GENAI_PERF_TAG}
-RUN mkdir perf_analyzer/build
-RUN cmake -B perf_analyzer/build -S perf_analyzer -D TRITON_ENABLE_PERF_ANALYZER_OPENAI=ON
-RUN cmake --build perf_analyzer/build -- -j8
-RUN mkdir bin &&  \
-    cp -r perf_analyzer/build/perf_analyzer/src/perf-analyzer-build /workspace/bin/
 ########################################
 ########## Development Image ###########
 ########################################
 FROM ci_minimum AS dev
-ARG GENAI_PERF_TAG
+ARG GENAI_PERF_VERSION
-COPY --from=perf_analyzer /workspace/bin/perf-analyzer-build/ /perf/bin
-COPY --from=perf_analyzer /workspace/perf_analyzer /perf_analyzer
-ENV PATH="/perf/bin:${PATH}"
 # Install genai-perf for benchmarking
-RUN uv pip install "git+https://github.com/triton-inference-server/perf_analyzer.git@${GENAI_PERF_TAG}#subdirectory=genai-perf"
+RUN uv pip install genai-perf==$GENAI_PERF_VERSION
-RUN uv pip uninstall tritonclient
 COPY . /workspace

--- a/examples/llm/benchmarks/perf.sh
+++ b/examples/llm/benchmarks/perf.sh
@@ -30,7 +30,6 @@ for concurrency in 1 2 4 8 16 32 64 128 256; do
  genai-perf profile \
    --model ${model} \
    --tokenizer ${model} \
-    --service-kind openai \
    --endpoint-type chat \
    --endpoint /v1/chat/completions \
    --streaming \