Unverified Commit 4426e937 authored by Neelay Shah's avatar Neelay Shah Committed by GitHub
Browse files

chore: remove pa build (#1231)

parent cd7a301b
...@@ -10,8 +10,7 @@ ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04" ...@@ -10,8 +10,7 @@ ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
ARG RELEASE_BUILD ARG RELEASE_BUILD
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda" ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04" ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
# TODO: Move to published pypi tags ARG GENAI_PERF_VERSION=0.0.13
ARG GENAI_PERF_TAG="e67e853413a07a778dd78a55e299be7fba9c9c24"
# Define general architecture ARGs for supporting both x86 and aarch64 builds. # Define general architecture ARGs for supporting both x86 and aarch64 builds.
# ARCH: Used for package suffixes (e.g., amd64, arm64) # ARCH: Used for package suffixes (e.g., amd64, arm64)
...@@ -363,39 +362,15 @@ RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/la ...@@ -363,39 +362,15 @@ RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/la
ENV VLLM_KV_CAPI_PATH=/opt/dynamo/bindings/lib/libdynamo_llm_capi.so ENV VLLM_KV_CAPI_PATH=/opt/dynamo/bindings/lib/libdynamo_llm_capi.so
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/nvidia/nvda_nixl/lib/x86_64-linux-gnu/ ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/nvidia/nvda_nixl/lib/x86_64-linux-gnu/
##########################################
########## Perf Analyzer Image ###########
##########################################
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS perf_analyzer
ARG GENAI_PERF_TAG
WORKDIR /workspace
# Build and install Perf Analyzer for benchmarking
RUN apt-get update -y && apt-get -y install cmake g++ libssl-dev python3 rapidjson-dev zlib1g-dev
RUN git clone https://github.com/triton-inference-server/perf_analyzer.git
RUN git -C perf_analyzer checkout ${GENAI_PERF_TAG}
RUN mkdir perf_analyzer/build
RUN cmake -B perf_analyzer/build -S perf_analyzer -D TRITON_ENABLE_PERF_ANALYZER_OPENAI=ON
RUN cmake --build perf_analyzer/build -- -j8
RUN mkdir bin && \
cp -r perf_analyzer/build/perf_analyzer/src/perf-analyzer-build /workspace/bin/
######################################## ########################################
########## Development Image ########### ########## Development Image ###########
######################################## ########################################
FROM ci_minimum AS dev FROM ci_minimum AS dev
ARG GENAI_PERF_TAG ARG GENAI_PERF_VERSION
COPY --from=perf_analyzer /workspace/bin/perf-analyzer-build/ /perf/bin
COPY --from=perf_analyzer /workspace/perf_analyzer /perf_analyzer
ENV PATH="/perf/bin:${PATH}"
# Install genai-perf for benchmarking # Install genai-perf for benchmarking
RUN uv pip install "git+https://github.com/triton-inference-server/perf_analyzer.git@${GENAI_PERF_TAG}#subdirectory=genai-perf" RUN uv pip install genai-perf==$GENAI_PERF_VERSION
RUN uv pip uninstall tritonclient
COPY . /workspace COPY . /workspace
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
ARG BASE_IMAGE="nvcr.io/nvidia/pytorch" ARG BASE_IMAGE="nvcr.io/nvidia/pytorch"
ARG BASE_IMAGE_TAG="25.04-py3" ARG BASE_IMAGE_TAG="25.04-py3"
ARG RELEASE_BUILD ARG RELEASE_BUILD
ARG GENAI_PERF_VERSION=0.0.13
# Define general architecture ARGs for supporting both x86 and aarch64 builds. # Define general architecture ARGs for supporting both x86 and aarch64 builds.
# ARCH: Used for package suffixes (e.g., amd64, arm64) # ARCH: Used for package suffixes (e.g., amd64, arm64)
...@@ -162,10 +163,11 @@ RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \ ...@@ -162,10 +163,11 @@ RUN [ -f /etc/pip/constraint.txt ] && : > /etc/pip/constraint.txt || true && \
"${TENSORRTLLM_PIP_WHEEL}" ; \ "${TENSORRTLLM_PIP_WHEEL}" ; \
fi fi
ARG GENAI_PERF_VERSION
# Install genai-perf for benchmarking # Install genai-perf for benchmarking
# TODO: Move to published pypi tags
ARG GENAI_PERF_TAG="e67e853413a07a778dd78a55e299be7fba9c9c24" RUN uv pip install genai-perf==$GENAI_PERF_VERSION
RUN pip install "git+https://github.com/triton-inference-server/perf_analyzer.git@${GENAI_PERF_TAG}#subdirectory=genai-perf"
# Install test dependencies # Install test dependencies
RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \ RUN --mount=type=bind,source=./container/deps/requirements.test.txt,target=/tmp/requirements.txt \
......
...@@ -10,8 +10,7 @@ ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04" ...@@ -10,8 +10,7 @@ ARG BASE_IMAGE_TAG="25.01-cuda12.8-devel-ubuntu24.04"
ARG RELEASE_BUILD ARG RELEASE_BUILD
ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda" ARG RUNTIME_IMAGE="nvcr.io/nvidia/cuda"
ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04" ARG RUNTIME_IMAGE_TAG="12.8.1-runtime-ubuntu24.04"
# TODO: Move to published pypi tags ARG GENAI_PERF_VERSION=0.0.13
ARG GENAI_PERF_TAG="e67e853413a07a778dd78a55e299be7fba9c9c24"
# Define general architecture ARGs for supporting both x86 and aarch64 builds. # Define general architecture ARGs for supporting both x86 and aarch64 builds.
# ARCH: Used for package suffixes (e.g., amd64, arm64) # ARCH: Used for package suffixes (e.g., amd64, arm64)
...@@ -436,39 +435,15 @@ RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/la ...@@ -436,39 +435,15 @@ RUN --mount=type=bind,source=./container/launch_message.txt,target=/workspace/la
ENV VLLM_KV_CAPI_PATH=/opt/dynamo/bindings/lib/libdynamo_llm_capi.so ENV VLLM_KV_CAPI_PATH=/opt/dynamo/bindings/lib/libdynamo_llm_capi.so
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/nvidia/nvda_nixl/lib/x86_64-linux-gnu/ ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/nvidia/nvda_nixl/lib/x86_64-linux-gnu/
##########################################
########## Perf Analyzer Image ###########
##########################################
FROM ${BASE_IMAGE}:${BASE_IMAGE_TAG} AS perf_analyzer
ARG GENAI_PERF_TAG
WORKDIR /workspace
# Build and install Perf Analyzer for benchmarking
RUN apt-get update -y && apt-get -y install cmake g++ libssl-dev python3 rapidjson-dev zlib1g-dev
RUN git clone https://github.com/triton-inference-server/perf_analyzer.git
RUN git -C perf_analyzer checkout ${GENAI_PERF_TAG}
RUN mkdir perf_analyzer/build
RUN cmake -B perf_analyzer/build -S perf_analyzer -D TRITON_ENABLE_PERF_ANALYZER_OPENAI=ON
RUN cmake --build perf_analyzer/build -- -j8
RUN mkdir bin && \
cp -r perf_analyzer/build/perf_analyzer/src/perf-analyzer-build /workspace/bin/
######################################## ########################################
########## Development Image ########### ########## Development Image ###########
######################################## ########################################
FROM ci_minimum AS dev FROM ci_minimum AS dev
ARG GENAI_PERF_TAG ARG GENAI_PERF_VERSION
COPY --from=perf_analyzer /workspace/bin/perf-analyzer-build/ /perf/bin
COPY --from=perf_analyzer /workspace/perf_analyzer /perf_analyzer
ENV PATH="/perf/bin:${PATH}"
# Install genai-perf for benchmarking # Install genai-perf for benchmarking
RUN uv pip install "git+https://github.com/triton-inference-server/perf_analyzer.git@${GENAI_PERF_TAG}#subdirectory=genai-perf" RUN uv pip install genai-perf==$GENAI_PERF_VERSION
RUN uv pip uninstall tritonclient
COPY . /workspace COPY . /workspace
......
...@@ -30,7 +30,6 @@ for concurrency in 1 2 4 8 16 32 64 128 256; do ...@@ -30,7 +30,6 @@ for concurrency in 1 2 4 8 16 32 64 128 256; do
genai-perf profile \ genai-perf profile \
--model ${model} \ --model ${model} \
--tokenizer ${model} \ --tokenizer ${model} \
--service-kind openai \
--endpoint-type chat \ --endpoint-type chat \
--endpoint /v1/chat/completions \ --endpoint /v1/chat/completions \
--streaming \ --streaming \
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment