Unverified Commit 8279201c authored by Gregory Shtrasberg's avatar Gregory Shtrasberg Committed by GitHub
Browse files

[Build] Cython compilation support fix (#14296)


Signed-off-by: default avatarGregory Shtrasberg <Gregory.Shtrasberg@amd.com>
parent 23fdab00
......@@ -40,7 +40,7 @@ ARG USE_CYTHON
RUN cd vllm \
&& python3 -m pip install -r requirements/rocm.txt \
&& python3 setup.py clean --all \
&& if [ ${USE_CYTHON} -eq "1" ]; then python3 setup_cython.py build_ext --inplace; fi \
&& if [ ${USE_CYTHON} -eq "1" ]; then python3 tests/build_cython.py build_ext --inplace; fi \
&& python3 setup.py bdist_wheel --dist-dir=dist
FROM scratch AS export_vllm
ARG COMMON_WORKDIR
......
......@@ -86,6 +86,7 @@ exclude = [
"vllm/triton_utils/**/*.py" = ["UP006", "UP035"]
"vllm/vllm_flash_attn/**/*.py" = ["UP006", "UP035"]
"vllm/worker/**/*.py" = ["UP006", "UP035"]
"vllm/utils.py" = ["UP006", "UP035"]
[tool.ruff.lint]
select = [
......
# SPDX-License-Identifier: Apache-2.0
import Cython.Compiler.Options
from Cython.Build import cythonize
from setuptools import setup
Cython.Compiler.Options.annotate = True
infiles = []
infiles += [
"vllm/engine/llm_engine.py",
"vllm/transformers_utils/detokenizer.py",
"vllm/engine/output_processor/single_step.py",
"vllm/outputs.py",
"vllm/engine/output_processor/stop_checker.py",
]
infiles += [
"vllm/core/scheduler.py",
"vllm/sequence.py",
"vllm/core/block_manager.py",
]
infiles += [
"vllm/model_executor/layers/sampler.py",
"vllm/sampling_params.py",
"vllm/utils.py",
]
setup(ext_modules=cythonize(infiles,
annotate=False,
force=True,
compiler_directives={
'language_level': "3",
'infer_types': True
}))
# example usage: python3 build_cython.py build_ext --inplace
......@@ -1249,7 +1249,7 @@ class LLMEngine:
return None
def _advance_to_next_step(
self, output: List[SamplerOutput],
self, output: SamplerOutput,
seq_group_metadata_list: List[SequenceGroupMetadata],
scheduled_seq_groups: List[ScheduledSequenceGroup]) -> None:
"""Given model output from a single run, append the tokens to the
......
......@@ -1187,7 +1187,8 @@ def _build_sampler_output(
deferred_sample_results_args=deferred_sample_results_args)
def _get_next_prompt_tokens(seq_group: SequenceGroupToSample) -> List[int]:
def _get_next_prompt_tokens(
seq_group: SequenceGroupToSample) -> tuple[int, ...]:
"""Get a list of next prompt tokens to compute logprob from a
given sequence group.
......
......@@ -37,7 +37,7 @@ from collections.abc import (AsyncGenerator, Awaitable, Generator, Hashable,
from dataclasses import dataclass, field
from functools import cache, lru_cache, partial, wraps
from typing import (TYPE_CHECKING, Any, Callable, Generic, Literal, NamedTuple,
Optional, TypeVar, Union)
Optional, Type, TypeVar, Union)
from uuid import uuid4
import cloudpickle
......@@ -1544,9 +1544,9 @@ class LazyDict(Mapping[str, T], Generic[T]):
return len(self._factory)
class ClassRegistry(UserDict[type[T], _V]):
class ClassRegistry(UserDict[Type[T], _V]):
def __getitem__(self, key: type[T]) -> _V:
def __getitem__(self, key: Type[T]) -> _V:
for cls in key.mro():
if cls in self.data:
return self.data[cls]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment