Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
norm
vllm
Commits
1951f478
Commit
1951f478
authored
Apr 03, 2024
by
zhuwenwen
Browse files
update tests and offline_inference.py
parent
9b28ea43
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
7 additions
and
6 deletions
+7
-6
examples/offline_inference.py
examples/offline_inference.py
+1
-1
tests/kernels/test_attention.py
tests/kernels/test_attention.py
+3
-2
tests/kernels/test_cache.py
tests/kernels/test_cache.py
+1
-1
tests/samplers/test_logprobs.py
tests/samplers/test_logprobs.py
+2
-2
No files found.
examples/offline_inference.py
View file @
1951f478
...
...
@@ -11,7 +11,7 @@ prompts = [
sampling_params
=
SamplingParams
(
temperature
=
0.8
,
top_p
=
0.95
)
# Create an LLM.
llm
=
LLM
(
model
=
"facebook/opt-125m"
)
llm
=
LLM
(
model
=
"facebook/opt-125m"
,
trust_remote_code
=
True
,
dtype
=
"float16"
,
enforce_eager
=
True
)
# Generate texts from the prompts. The output is a list of RequestOutput objects
# that contain the prompt, generated text, and other information.
outputs
=
llm
.
generate
(
prompts
,
sampling_params
)
...
...
tests/kernels/test_attention.py
View file @
1951f478
...
...
@@ -21,7 +21,8 @@ NUM_BLOCKS = 4321 # Arbitrary values for testing
PARTITION_SIZE
=
512
# flshattF and tritonflashattF supported: {torch.float16, torch.bfloat16}
DTYPES
=
[
torch
.
half
,
torch
.
bfloat16
,
torch
.
float
]
if
not
is_hip
()
else
[
torch
.
half
,
torch
.
bfloat16
]
# ] if not is_hip() else [torch.half, torch.bfloat16]
]
if
not
is_hip
()
else
[
torch
.
half
]
NUM_GEN_SEQS
=
[
7
]
# Arbitrary values for testing
NUM_PREFILL_SEQS
=
[
3
]
# Arbitrary values for testing
NUM_HEADS
=
[(
40
,
40
),
(
64
,
8
)]
# Arbitrary values for testing
...
...
@@ -33,7 +34,7 @@ HEAD_SIZES = [64, 80, 96, 112, 128, 256
BLOCK_SIZES
=
[
16
,
32
]
USE_ALIBI
=
[
False
,
True
]
KV_CACHE_DTYPE
=
[
"auto"
,
"fp8_e5m2"
]
KV_CACHE_DTYPE
=
[
"auto"
,
"fp8_e5m2"
]
if
not
is_hip
()
else
[
"auto"
]
SEEDS
=
[
0
]
CUDA_DEVICES
=
[
f
"cuda:
{
i
}
"
for
i
in
range
(
1
if
torch
.
cuda
.
device_count
()
==
1
else
2
)
...
...
tests/kernels/test_cache.py
View file @
1951f478
...
...
@@ -24,7 +24,7 @@ SEEDS = [0]
CUDA_DEVICES
=
[
f
"cuda:
{
i
}
"
for
i
in
range
(
1
if
torch
.
cuda
.
device_count
()
==
1
else
2
)
]
KV_CACHE_DTYPE
=
[
"auto"
,
"fp8_e5m2"
]
KV_CACHE_DTYPE
=
[
"auto"
,
"fp8_e5m2"
]
if
not
is_hip
()
else
[
"auto"
]
@
pytest
.
mark
.
parametrize
(
"num_mappings"
,
NUM_MAPPINGS
)
...
...
tests/samplers/test_logprobs.py
View file @
1951f478
...
...
@@ -52,5 +52,5 @@ def test_get_prompt_logprobs(
for
token_id
,
logprob
in
vllm_sample_logprob_dict
.
items
():
torch
.
testing
.
assert_close
(
logprob
,
hf_logprob
[
i
][
-
1
][
token_id
].
item
(),
atol
=
1e-
2
,
rtol
=
1e-
2
)
atol
=
1e-
1
,
rtol
=
1e-
1
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment