add more pytorch related tests for torch nightly (#17422)

Signed-off-by: Yang Wang <elainewy@meta.com>

add more pytorch related tests for torch nightly (#17422)
Signed-off-by: Yang Wang <elainewy@meta.com>
b8b0859b · Yang Wang · GitHub · d7543862 · b8b0859b · b8b0859b
Unverified Commit b8b0859b authored May 02, 2025 by Yang Wang Committed by GitHub May 02, 2025
Showing with 14 additions and 4 deletions

.buildkite/test-pipeline.yaml .buildkite/test-pipeline.yaml +4 -0

requirements/nightly_torch_test.txt requirements/nightly_torch_test.txt +7 -1

vllm/sampling_params.py vllm/sampling_params.py +3 -3

No files found.
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -293,6 +293,7 @@ steps:
  parallelism: 4
 - label: PyTorch Compilation Unit Tests
+  torch_nightly: true
  source_file_dependencies:
    - vllm/
    - tests/compile
@@ -302,6 +303,7 @@ steps:
    - pytest -v -s compile/test_sequence_parallelism.py
 - label: PyTorch Fullgraph Smoke Test # 9min
+  torch_nightly: true
  source_file_dependencies:
  - vllm/
  - tests/compile
@@ -312,6 +314,7 @@ steps:
  - pytest -v -s compile/piecewise/test_toy_llama.py
 - label: PyTorch Fullgraph Test # 18min
+  torch_nightly: true
  source_file_dependencies:
  - vllm/
  - tests/compile
@@ -436,6 +439,7 @@ steps:
 #####  models test  #####
 - label: Basic Models Test # 24min
+  torch_nightly: true
  source_file_dependencies:
  - vllm/
  - tests/models

--- a/requirements/nightly_torch_test.txt
+++ b/requirements/nightly_torch_test.txt
@@ -23,5 +23,11 @@ runai-model-streamer-s3==0.11.0
 tensorizer>=2.9.0
 lm-eval==0.4.8
 buildkite-test-collector==0.1.9
 lm-eval[api]==0.4.8 # required for model evaluation test
+# required for quantization test
+bitsandbytes>=0.45.3
+# required for minicpmo_26 test
+vector_quantize_pytorch
+vocos
--- a/vllm/sampling_params.py
+++ b/vllm/sampling_params.py
@@ -186,9 +186,9 @@ class SamplingParams(
        logits_processors: list of functions that modify logits based on
            previously generated tokens, and optionally prompt tokens as
            a first argument.
-        truncate_prompt_tokens: If set to -1, will use the truncation size 
+        truncate_prompt_tokens: If set to -1, will use the truncation size
-            supported by the model. If set to an integer k, will use only 
+            supported by the model. If set to an integer k, will use only
-            the last k tokens from the prompt (i.e., left truncation). 
+            the last k tokens from the prompt (i.e., left truncation).
            Defaults to None (i.e., no truncation).
        guided_decoding: If provided, the engine will construct a guided
            decoding logits processor from these parameters. Defaults to None.