release opt1

1a280da1 · zhuwenwen · a098923a · 1a280da1 · 1a280da1 · 1a280da1
Commit 1a280da1 authored Jun 18, 2025 by zhuwenwen
Showing with 4 additions and 25 deletions

requirements/rocm.txt requirements/rocm.txt +1 -1

setup.py setup.py +2 -3

vllm/attention/ops/paged_attn.py vllm/attention/ops/paged_attn.py +1 -1

vllm/sequence.py vllm/sequence.py +0 -20

No files found.
--- a/requirements/rocm.txt
+++ b/requirements/rocm.txt
@@ -22,7 +22,7 @@ torch == 2.4.1
 triton == 3.0.0
 flash_attn == 2.6.1
 flash_mla == 1.0.0
-lmslim == 0.2.1 
+lmslim == 0.3.0
 numa
 python-multipart
 pytrie

--- a/setup.py
+++ b/setup.py
@@ -562,10 +562,10 @@ def get_version_add(sha: Optional[str] = None) -> str:
            if sha is None:
                sha = get_sha(vllm_root)
            if (major, minor) == ('2', '4'):
-                version = 'das.opt1.rc2.' + sha[:7]
+                version = 'das.opt1.' + sha[:7]
    else:
        if (major, minor) == ('2', '4'):
-            version = 'das.opt1.rc2'
+            version = 'das.opt1'
    # dtk version
@@ -784,7 +784,6 @@ if skip_vllm_build:
            "_moe_C.abi3.so",
        ]
    }
-    package_data["vllm"].append("/opt/dtk/*.so")
 else:
    package_data = {
        "vllm": [

--- a/vllm/attention/ops/paged_attn.py
+++ b/vllm/attention/ops/paged_attn.py
@@ -38,7 +38,7 @@ class PagedAttention:
    @staticmethod
    def get_supported_head_sizes() -> List[int]:
-        return [64, 80, 96, 112, 120, 128, 192, 256]
+        return [32, 64, 80, 96, 112, 120, 128, 192, 256]
    @staticmethod
    def get_kv_cache_shape(

--- a/vllm/sequence.py
+++ b/vllm/sequence.py
@@ -187,26 +187,6 @@ class SequenceData(msgspec.Struct,
    _first_step_flag: bool = True
-    @staticmethod
-    def from_prompt_token_counts(
-            *token_counts: tuple[int, int]) -> "SequenceData":
-        """
-        Construct a :class:`SequenceData` instance by concatenating
-        prompt token sequences.
-        Each tuple represents one token sequence, expressed in the form
-        :code:`(token_id, count)`.
-        """
-        if len(token_counts) == 0:
-            return SequenceData.from_seqs([])
-        prompt_token_ids_arr = reduce(
-            array.__iadd__,
-            (array_full(token_id, count) for token_id, count in token_counts),
-        )
-        return SequenceData(prompt_token_ids_arr)
    @staticmethod
    def from_prompt_token_counts(
            *token_counts: tuple[int, int]) -> "SequenceData":