Commit 1a280da1 authored by zhuwenwen's avatar zhuwenwen
Browse files

release opt1

parent a098923a
......@@ -22,7 +22,7 @@ torch == 2.4.1
triton == 3.0.0
flash_attn == 2.6.1
flash_mla == 1.0.0
lmslim == 0.2.1
lmslim == 0.3.0
numa
python-multipart
pytrie
......
......@@ -562,10 +562,10 @@ def get_version_add(sha: Optional[str] = None) -> str:
if sha is None:
sha = get_sha(vllm_root)
if (major, minor) == ('2', '4'):
version = 'das.opt1.rc2.' + sha[:7]
version = 'das.opt1.' + sha[:7]
else:
if (major, minor) == ('2', '4'):
version = 'das.opt1.rc2'
version = 'das.opt1'
# dtk version
......@@ -784,7 +784,6 @@ if skip_vllm_build:
"_moe_C.abi3.so",
]
}
package_data["vllm"].append("/opt/dtk/*.so")
else:
package_data = {
"vllm": [
......
......@@ -38,7 +38,7 @@ class PagedAttention:
@staticmethod
def get_supported_head_sizes() -> List[int]:
return [64, 80, 96, 112, 120, 128, 192, 256]
return [32, 64, 80, 96, 112, 120, 128, 192, 256]
@staticmethod
def get_kv_cache_shape(
......
......@@ -187,26 +187,6 @@ class SequenceData(msgspec.Struct,
_first_step_flag: bool = True
@staticmethod
def from_prompt_token_counts(
*token_counts: tuple[int, int]) -> "SequenceData":
"""
Construct a :class:`SequenceData` instance by concatenating
prompt token sequences.
Each tuple represents one token sequence, expressed in the form
:code:`(token_id, count)`.
"""
if len(token_counts) == 0:
return SequenceData.from_seqs([])
prompt_token_ids_arr = reduce(
array.__iadd__,
(array_full(token_id, count) for token_id, count in token_counts),
)
return SequenceData(prompt_token_ids_arr)
@staticmethod
def from_prompt_token_counts(
*token_counts: tuple[int, int]) -> "SequenceData":
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment