Commit 1a280da1 authored by zhuwenwen's avatar zhuwenwen
Browse files

release opt1

parent a098923a
...@@ -22,7 +22,7 @@ torch == 2.4.1 ...@@ -22,7 +22,7 @@ torch == 2.4.1
triton == 3.0.0 triton == 3.0.0
flash_attn == 2.6.1 flash_attn == 2.6.1
flash_mla == 1.0.0 flash_mla == 1.0.0
lmslim == 0.2.1 lmslim == 0.3.0
numa numa
python-multipart python-multipart
pytrie pytrie
......
...@@ -562,10 +562,10 @@ def get_version_add(sha: Optional[str] = None) -> str: ...@@ -562,10 +562,10 @@ def get_version_add(sha: Optional[str] = None) -> str:
if sha is None: if sha is None:
sha = get_sha(vllm_root) sha = get_sha(vllm_root)
if (major, minor) == ('2', '4'): if (major, minor) == ('2', '4'):
version = 'das.opt1.rc2.' + sha[:7] version = 'das.opt1.' + sha[:7]
else: else:
if (major, minor) == ('2', '4'): if (major, minor) == ('2', '4'):
version = 'das.opt1.rc2' version = 'das.opt1'
# dtk version # dtk version
...@@ -784,7 +784,6 @@ if skip_vllm_build: ...@@ -784,7 +784,6 @@ if skip_vllm_build:
"_moe_C.abi3.so", "_moe_C.abi3.so",
] ]
} }
package_data["vllm"].append("/opt/dtk/*.so")
else: else:
package_data = { package_data = {
"vllm": [ "vllm": [
......
...@@ -38,7 +38,7 @@ class PagedAttention: ...@@ -38,7 +38,7 @@ class PagedAttention:
@staticmethod @staticmethod
def get_supported_head_sizes() -> List[int]: def get_supported_head_sizes() -> List[int]:
return [64, 80, 96, 112, 120, 128, 192, 256] return [32, 64, 80, 96, 112, 120, 128, 192, 256]
@staticmethod @staticmethod
def get_kv_cache_shape( def get_kv_cache_shape(
......
...@@ -187,26 +187,6 @@ class SequenceData(msgspec.Struct, ...@@ -187,26 +187,6 @@ class SequenceData(msgspec.Struct,
_first_step_flag: bool = True _first_step_flag: bool = True
@staticmethod
def from_prompt_token_counts(
*token_counts: tuple[int, int]) -> "SequenceData":
"""
Construct a :class:`SequenceData` instance by concatenating
prompt token sequences.
Each tuple represents one token sequence, expressed in the form
:code:`(token_id, count)`.
"""
if len(token_counts) == 0:
return SequenceData.from_seqs([])
prompt_token_ids_arr = reduce(
array.__iadd__,
(array_full(token_id, count) for token_id, count in token_counts),
)
return SequenceData(prompt_token_ids_arr)
@staticmethod @staticmethod
def from_prompt_token_counts( def from_prompt_token_counts(
*token_counts: tuple[int, int]) -> "SequenceData": *token_counts: tuple[int, int]) -> "SequenceData":
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment