Commit 1591c68f authored by zhuwenwen's avatar zhuwenwen
Browse files

merge v0.4.2

parents 09bcf00b c7f2cf2b
......@@ -2,5 +2,5 @@
-r requirements-common.txt
# Dependencies for x86_64 CPUs
torch == 2.2.1+cpu
torch == 2.3.0+cpu
triton >= 2.2.0 # FIXME(woosuk): This is a hack to avoid import error.
\ No newline at end of file
......@@ -5,5 +5,5 @@
ray >= 2.9
nvidia-ml-py # for pynvml package
vllm-nccl-cu12>=2.18,<2.19 # for downloading nccl library
torch == 2.2.1
xformers == 0.0.25 # Requires PyTorch 2.2.1
torch == 2.3.0
xformers == 0.0.26.post1 # Requires PyTorch 2.3.0
......@@ -14,19 +14,17 @@ types-setuptools
# testing
pytest
tensorizer==2.9.0a0
tensorizer==2.9.0
pytest-forked
pytest-asyncio
pytest-rerunfailures
pytest-shard
httpx
einops # required for MPT
openai
requests
ray
peft
awscli
ai2-olmo # required for OLMo
# Benchmarking
aiohttp
......
This diff is collapsed.
......@@ -91,4 +91,6 @@ async def test_new_requests_event():
assert engine.engine.step_calls == old_step_calls + 1
engine = MockAsyncLLMEngine(worker_use_ray=True, engine_use_ray=True)
assert engine.get_model_config() is not None
assert engine.get_tokenizer() is not None
assert engine.get_decoding_config() is not None
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -55,7 +55,6 @@ def test_models(
)
vllm_outputs = vllm_model.generate_greedy(example_prompts, max_tokens)
del vllm_model
print(vllm_outputs[0])
for i in range(len(example_prompts)):
hf_output_ids, hf_output_str = hf_outputs[i]
......
This diff is collapsed.
......@@ -296,6 +296,7 @@ class VllmRunner:
tensor_parallel_size: int = 1,
block_size: int = 16,
enable_chunked_prefill: bool = False,
swap_space=4,
**kwargs,
) -> None:
self.model = LLM(
......@@ -303,7 +304,7 @@ class VllmRunner:
tokenizer=tokenizer_name,
trust_remote_code=True,
dtype=dtype,
swap_space=0,
swap_space=swap_space,
disable_log_stats=disable_log_stats,
tensor_parallel_size=tensor_parallel_size,
max_model_len=max_model_len,
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment