Unverified Commit f9858193 authored by Kyle McGill's avatar Kyle McGill Committed by GitHub
Browse files

fix: Update the vllm docker image to use the cuda sampler rather than the pytorch one (#5613)

parent ca63c49d
...@@ -847,5 +847,9 @@ USER dynamo ...@@ -847,5 +847,9 @@ USER dynamo
ARG DYNAMO_COMMIT_SHA ARG DYNAMO_COMMIT_SHA
ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA ENV DYNAMO_COMMIT_SHA=$DYNAMO_COMMIT_SHA
# In vLLM 0.12 the default sampler changed on the forward pass.
# We need to enable this to enable the cuda kernels.
ENV VLLM_USE_FLASHINFER_SAMPLER=1
ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"] ENTRYPOINT ["/opt/nvidia/nvidia_entrypoint.sh"]
CMD [] CMD []
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment