Commit 66b809cc authored by zhuwenwen's avatar zhuwenwen
Browse files

Merge tag 'v0.7.2' into v0.7.2-dev

parents 37b63c24 0408efc6
# SPDX-License-Identifier: Apache-2.0
import pytest
from vllm.sampling_params import SamplingParams
......
# SPDX-License-Identifier: Apache-2.0
from typing import List
import torch
......
# SPDX-License-Identifier: Apache-2.0
from typing import Dict, List, Set, Tuple
import numpy as np
......
# SPDX-License-Identifier: Apache-2.0
from setuptools import setup
setup(
......
# SPDX-License-Identifier: Apache-2.0
"""
vllm_utils is a package for vLLM testing utilities.
It does not import any vLLM modules.
......
# SPDX-License-Identifier: Apache-2.0
import contextlib
import dataclasses
import sys
......
# SPDX-License-Identifier: Apache-2.0
import contextlib
import dataclasses
import sys
......
# SPDX-License-Identifier: Apache-2.0
import os
import pytest
......
# SPDX-License-Identifier: Apache-2.0
import itertools
from typing import List
......
# SPDX-License-Identifier: Apache-2.0
import dataclasses
from typing import List, Tuple, Type
......
# SPDX-License-Identifier: Apache-2.0
from typing import List
import pytest
......@@ -25,6 +27,15 @@ def _create_model_runner(model: str, *args, **kwargs) -> ModelRunner:
return model_runner
def test_deepseek_mla_attn_backend_module():
model_runner = _create_model_runner(
"deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct",
trust_remote_code=True,
enable_chunked_prefill=False,
)
assert model_runner.attn_backend.__name__ == "TritonMLABackend"
@pytest.mark.parametrize("batch_size", list(range(1, 257)))
def test_prepare_prompt(batch_size):
model_runner = _create_model_runner(
......
# SPDX-License-Identifier: Apache-2.0
import os
import torch
......
# SPDX-License-Identifier: Apache-2.0
import torch
import os
......
# SPDX-License-Identifier: Apache-2.0
import sys
SPDX_HEADER = "# SPDX-License-Identifier: Apache-2.0"
SPDX_HEADER_PREFIX = "# SPDX-License-Identifier:"
def check_spdx_header(file_path):
with open(file_path, encoding='UTF-8') as file:
lines = file.readlines()
if not lines:
# Empty file like __init__.py
return True
for line in lines:
if line.strip().startswith(SPDX_HEADER_PREFIX):
return True
return False
def add_header(file_path):
with open(file_path, 'r+', encoding='UTF-8') as file:
lines = file.readlines()
file.seek(0, 0)
if lines and lines[0].startswith("#!"):
file.write(lines[0])
file.write(SPDX_HEADER + '\n')
file.writelines(lines[1:])
else:
file.write(SPDX_HEADER + '\n')
file.writelines(lines)
def main():
files_with_missing_header = []
for file_path in sys.argv[1:]:
if not check_spdx_header(file_path):
files_with_missing_header.append(file_path)
if files_with_missing_header:
print("The following files are missing the SPDX header:")
for file_path in files_with_missing_header:
print(f" {file_path}")
add_header(file_path)
sys.exit(1 if files_with_missing_header else 0)
if __name__ == "__main__":
main()
# SPDX-License-Identifier: Apache-2.0
import argparse
import json
from typing import Dict
......
# SPDX-License-Identifier: Apache-2.0
import argparse
import copy
import json
......
#!/usr/bin/env python3
# SPDX-License-Identifier: Apache-2.0
# Copyright (c) 2018 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
......
# SPDX-License-Identifier: Apache-2.0
import glob
requires_files = glob.glob('requirements*.txt')
......
# SPDX-License-Identifier: Apache-2.0
"""vLLM: a high-throughput and memory-efficient inference engine for LLMs"""
import os
......
# SPDX-License-Identifier: Apache-2.0
import contextlib
import importlib
from typing import TYPE_CHECKING, List, Optional, Tuple, Union, Type
......@@ -1369,6 +1371,15 @@ def moe_align_block_size(topk_ids: torch.Tensor, num_experts: int,
num_tokens_post_pad)
def sgl_moe_align_block_size(topk_ids: torch.Tensor, num_experts: int,
block_size: int, sorted_token_ids: torch.Tensor,
experts_ids: torch.Tensor,
num_tokens_post_pad: torch.Tensor) -> None:
torch.ops._moe_C.sgl_moe_align_block_size(topk_ids, num_experts,
block_size, sorted_token_ids,
experts_ids, num_tokens_post_pad)
def topk_softmax(topk_weights: torch.Tensor, topk_ids: torch.Tensor,
token_expert_indicies: torch.Tensor,
gating_output: float) -> None:
......@@ -1445,6 +1456,11 @@ def copy_blocks(key_caches: List[torch.Tensor],
torch.ops._C_cache_ops.copy_blocks(key_caches, value_caches, block_mapping)
def copy_blocks_mla(kv_caches: List[torch.Tensor],
block_mapping: torch.Tensor) -> None:
torch.ops._C_cache_ops.copy_blocks_mla(kv_caches, block_mapping)
def swap_blocks(src: torch.Tensor, dst: torch.Tensor,
block_mapping: torch.Tensor) -> None:
torch.ops._C_cache_ops.swap_blocks(src, dst, block_mapping)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment